11import re
22
33import pytest
4- import sqlalchemy as sa
54
65import datachain as dc
76from datachain .dataset import DatasetStatus
87from datachain .error import DatasetNotFoundError
9- from datachain .query .dataset import DatasetQuery
108from datachain .query .session import Session
11- from datachain .sql .types import String
129
1310
1411@pytest .fixture
1512def project (catalog ):
1613 return catalog .metastore .create_project ("dev" , "animals" )
1714
1815
16+ def _fqn (project , name ):
17+ return f"{ project .namespace .name } .{ project .name } .{ name } "
18+
19+
1920def test_ephemeral_dataset_naming (catalog , project ):
2021 session_name = "qwer45"
2122
2223 with pytest .raises (ValueError ):
2324 Session ("wrong-ds_name" , catalog = catalog )
2425
2526 with Session (session_name , catalog = catalog ) as session :
26- ds_name = "my_test_ds12"
27- session .catalog .create_dataset (
28- ds_name , project , columns = (sa .Column ("name" , String ),)
29- )
30- ds_tmp = DatasetQuery (
31- name = ds_name ,
32- namespace_name = project .namespace .name ,
33- project_name = project .name ,
34- session = session ,
35- catalog = session .catalog ,
36- include_incomplete = True , # Test works with CREATED dataset
37- ).save ()
27+ fqn = _fqn (project , "my_test_ds12" )
28+ dc .read_values (name = ["a" ], session = session ).save (fqn )
29+ tmp_name = session .generate_temp_dataset_name ()
30+ ds_tmp = dc .read_dataset (fqn , session = session ).save (tmp_name )
3831 session_uuid = f"[0-9a-fA-F]{{{ Session .SESSION_UUID_LEN } }}"
3932 table_uuid = f"[0-9a-fA-F]{{{ Session .TEMP_TABLE_UUID_LEN } }}"
4033
@@ -48,15 +41,11 @@ def test_global_session_naming(catalog, project):
4841 session_uuid = f"[0-9a-fA-F]{{{ Session .SESSION_UUID_LEN } }}"
4942 table_uuid = f"[0-9a-fA-F]{{{ Session .TEMP_TABLE_UUID_LEN } }}"
5043
51- ds_name = "qwsd"
52- catalog .create_dataset (ds_name , project , columns = (sa .Column ("name" , String ),))
53- ds_tmp = DatasetQuery (
54- name = ds_name ,
55- namespace_name = project .namespace .name ,
56- project_name = project .name ,
57- catalog = catalog ,
58- include_incomplete = True , # Test works with CREATED dataset
59- ).save ()
44+ fqn = _fqn (project , "qwsd" )
45+ global_session = Session .get (catalog = catalog )
46+ dc .read_values (name = ["a" ], session = global_session ).save (fqn )
47+ tmp_name = global_session .generate_temp_dataset_name ()
48+ ds_tmp = dc .read_dataset (fqn , session = global_session ).save (tmp_name )
6049 global_prefix = f"{ Session .DATASET_PREFIX } { Session .GLOBAL_SESSION_NAME } "
6150 pattern = rf"^{ global_prefix } _{ session_uuid } _{ table_uuid } $"
6251 assert re .match (pattern , ds_tmp .name ) is not None
@@ -83,21 +72,12 @@ def test_is_temp_dataset(name, is_temp):
8372def test_ephemeral_dataset_lifecycle (catalog , project ):
8473 session_name = "asd3d4"
8574 with Session (session_name , catalog = catalog ) as session :
86- ds_name = "my_test_ds12"
87- session .catalog .create_dataset (
88- ds_name , project , columns = (sa .Column ("name" , String ),)
89- )
90- ds_tmp = DatasetQuery (
91- name = ds_name ,
92- namespace_name = project .namespace .name ,
93- project_name = project .name ,
94- session = session ,
95- catalog = session .catalog ,
96- include_incomplete = True , # Test works with CREATED dataset
97- ).save ()
98-
99- assert isinstance (ds_tmp , DatasetQuery )
100- assert ds_tmp .name != ds_name
75+ fqn = _fqn (project , "my_test_ds12" )
76+ dc .read_values (name = ["a" ], session = session ).save (fqn )
77+ tmp_name = session .generate_temp_dataset_name ()
78+ ds_tmp = dc .read_dataset (fqn , session = session ).save (tmp_name )
79+
80+ assert ds_tmp .name != "my_test_ds12"
10181 assert ds_tmp .name is not None
10282 assert ds_tmp .name .startswith (Session .DATASET_PREFIX )
10383 assert session_name in ds_tmp .name
@@ -113,27 +93,17 @@ def test_session_datasets_not_in_ls_datasets(catalog, project):
11393 session_name = "testls"
11494 with Session (session_name , catalog = catalog ) as session :
11595 # Create a regular dataset
116- ds_name = "regular_dataset"
117- (
118- dc .read_values (num = [1 , 2 , 3 ], session = session )
119- .settings (namespace = project .namespace .name , project = project .name )
120- .save (ds_name )
121- )
96+ fqn = _fqn (project , "regular_dataset" )
97+ dc .read_values (num = [1 , 2 , 3 ], session = session ).save (fqn )
12298
123- # Create a temp dataset
124- ds_tmp = DatasetQuery (
125- name = ds_name ,
126- namespace_name = project .namespace .name ,
127- project_name = project .name ,
128- session = session ,
129- catalog = session .catalog ,
130- include_incomplete = True ,
131- ).save ()
99+ # Create a temp dataset by re-saving the regular one
100+ tmp_name = session .generate_temp_dataset_name ()
101+ ds_tmp = dc .read_dataset (fqn , session = session ).save (tmp_name )
132102
133103 datasets = list (catalog .ls_datasets ())
134104 dataset_names = [d .name for d in datasets ]
135105
136- assert ds_name in dataset_names
106+ assert "regular_dataset" in dataset_names
137107
138108 assert ds_tmp .name not in dataset_names
139109 assert all (not Session .is_temp_dataset (name ) for name in dataset_names )
@@ -142,49 +112,30 @@ def test_session_datasets_not_in_ls_datasets(catalog, project):
142112def test_cleanup_temp_datasets_all_states (catalog , project ):
143113 session_name = "testcleanup"
144114 with Session (session_name , catalog = catalog ) as session :
145- ds_name = "test_dataset"
146- session .catalog .create_dataset (
147- ds_name , project , columns = (sa .Column ("name" , String ),)
148- )
115+ fqn = _fqn (project , "test_dataset" )
116+ dc .read_values (name = ["a" ], session = session ).save (fqn )
149117
150118 # Create temp datasets in different states
151119
152- # 1. CREATED state
153- ds_created = DatasetQuery (
154- name = ds_name ,
155- namespace_name = project .namespace .name ,
156- project_name = project .name ,
157- session = session ,
158- catalog = session .catalog ,
159- include_incomplete = True ,
160- ).save ()
161-
162- # 2. COMPLETE state
163- ds_complete = DatasetQuery (
164- name = ds_name ,
165- namespace_name = project .namespace .name ,
166- project_name = project .name ,
167- session = session ,
168- catalog = session .catalog ,
169- include_incomplete = True ,
170- ).save ()
171- ds_complete_record = catalog .get_dataset (
172- ds_complete .name , include_incomplete = True
120+ # 1. CREATED state (default after save — mark it back to CREATED)
121+ ds_created = dc .read_dataset (fqn , session = session ).save (
122+ session .generate_temp_dataset_name ()
173123 )
124+ ds_created_record = catalog .get_dataset (ds_created .name )
174125 catalog .metastore .update_dataset_status (
175- ds_complete_record , DatasetStatus .COMPLETE , version = "1.0.0"
126+ ds_created_record , DatasetStatus .CREATED , version = "1.0.0"
127+ )
128+
129+ # 2. COMPLETE state (save already marks COMPLETE)
130+ ds_complete = dc .read_dataset (fqn , session = session ).save (
131+ session .generate_temp_dataset_name ()
176132 )
177133
178134 # 3. FAILED state
179- ds_failed = DatasetQuery (
180- name = ds_name ,
181- namespace_name = project .namespace .name ,
182- project_name = project .name ,
183- session = session ,
184- catalog = session .catalog ,
185- include_incomplete = True ,
186- ).save ()
187- ds_failed_record = catalog .get_dataset (ds_failed .name , include_incomplete = True )
135+ ds_failed = dc .read_dataset (fqn , session = session ).save (
136+ session .generate_temp_dataset_name ()
137+ )
138+ ds_failed_record = catalog .get_dataset (ds_failed .name )
188139 catalog .metastore .update_dataset_status (
189140 ds_failed_record , DatasetStatus .FAILED , version = "1.0.0"
190141 )
0 commit comments