Skip to content

Commit a3fea9d

Browse files
committed
fix tests
1 parent c1631bd commit a3fea9d

File tree

1 file changed

+41
-90
lines changed

1 file changed

+41
-90
lines changed

tests/unit/test_session.py

Lines changed: 41 additions & 90 deletions
Original file line numberDiff line numberDiff line change
@@ -1,40 +1,33 @@
11
import re
22

33
import pytest
4-
import sqlalchemy as sa
54

65
import datachain as dc
76
from datachain.dataset import DatasetStatus
87
from datachain.error import DatasetNotFoundError
9-
from datachain.query.dataset import DatasetQuery
108
from datachain.query.session import Session
11-
from datachain.sql.types import String
129

1310

1411
@pytest.fixture
1512
def project(catalog):
1613
return catalog.metastore.create_project("dev", "animals")
1714

1815

16+
def _fqn(project, name):
17+
return f"{project.namespace.name}.{project.name}.{name}"
18+
19+
1920
def test_ephemeral_dataset_naming(catalog, project):
2021
session_name = "qwer45"
2122

2223
with pytest.raises(ValueError):
2324
Session("wrong-ds_name", catalog=catalog)
2425

2526
with Session(session_name, catalog=catalog) as session:
26-
ds_name = "my_test_ds12"
27-
session.catalog.create_dataset(
28-
ds_name, project, columns=(sa.Column("name", String),)
29-
)
30-
ds_tmp = DatasetQuery(
31-
name=ds_name,
32-
namespace_name=project.namespace.name,
33-
project_name=project.name,
34-
session=session,
35-
catalog=session.catalog,
36-
include_incomplete=True, # Test works with CREATED dataset
37-
).save()
27+
fqn = _fqn(project, "my_test_ds12")
28+
dc.read_values(name=["a"], session=session).save(fqn)
29+
tmp_name = session.generate_temp_dataset_name()
30+
ds_tmp = dc.read_dataset(fqn, session=session).save(tmp_name)
3831
session_uuid = f"[0-9a-fA-F]{{{Session.SESSION_UUID_LEN}}}"
3932
table_uuid = f"[0-9a-fA-F]{{{Session.TEMP_TABLE_UUID_LEN}}}"
4033

@@ -48,15 +41,11 @@ def test_global_session_naming(catalog, project):
4841
session_uuid = f"[0-9a-fA-F]{{{Session.SESSION_UUID_LEN}}}"
4942
table_uuid = f"[0-9a-fA-F]{{{Session.TEMP_TABLE_UUID_LEN}}}"
5043

51-
ds_name = "qwsd"
52-
catalog.create_dataset(ds_name, project, columns=(sa.Column("name", String),))
53-
ds_tmp = DatasetQuery(
54-
name=ds_name,
55-
namespace_name=project.namespace.name,
56-
project_name=project.name,
57-
catalog=catalog,
58-
include_incomplete=True, # Test works with CREATED dataset
59-
).save()
44+
fqn = _fqn(project, "qwsd")
45+
global_session = Session.get(catalog=catalog)
46+
dc.read_values(name=["a"], session=global_session).save(fqn)
47+
tmp_name = global_session.generate_temp_dataset_name()
48+
ds_tmp = dc.read_dataset(fqn, session=global_session).save(tmp_name)
6049
global_prefix = f"{Session.DATASET_PREFIX}{Session.GLOBAL_SESSION_NAME}"
6150
pattern = rf"^{global_prefix}_{session_uuid}_{table_uuid}$"
6251
assert re.match(pattern, ds_tmp.name) is not None
@@ -83,21 +72,12 @@ def test_is_temp_dataset(name, is_temp):
8372
def test_ephemeral_dataset_lifecycle(catalog, project):
8473
session_name = "asd3d4"
8574
with Session(session_name, catalog=catalog) as session:
86-
ds_name = "my_test_ds12"
87-
session.catalog.create_dataset(
88-
ds_name, project, columns=(sa.Column("name", String),)
89-
)
90-
ds_tmp = DatasetQuery(
91-
name=ds_name,
92-
namespace_name=project.namespace.name,
93-
project_name=project.name,
94-
session=session,
95-
catalog=session.catalog,
96-
include_incomplete=True, # Test works with CREATED dataset
97-
).save()
98-
99-
assert isinstance(ds_tmp, DatasetQuery)
100-
assert ds_tmp.name != ds_name
75+
fqn = _fqn(project, "my_test_ds12")
76+
dc.read_values(name=["a"], session=session).save(fqn)
77+
tmp_name = session.generate_temp_dataset_name()
78+
ds_tmp = dc.read_dataset(fqn, session=session).save(tmp_name)
79+
80+
assert ds_tmp.name != "my_test_ds12"
10181
assert ds_tmp.name is not None
10282
assert ds_tmp.name.startswith(Session.DATASET_PREFIX)
10383
assert session_name in ds_tmp.name
@@ -113,27 +93,17 @@ def test_session_datasets_not_in_ls_datasets(catalog, project):
11393
session_name = "testls"
11494
with Session(session_name, catalog=catalog) as session:
11595
# Create a regular dataset
116-
ds_name = "regular_dataset"
117-
(
118-
dc.read_values(num=[1, 2, 3], session=session)
119-
.settings(namespace=project.namespace.name, project=project.name)
120-
.save(ds_name)
121-
)
96+
fqn = _fqn(project, "regular_dataset")
97+
dc.read_values(num=[1, 2, 3], session=session).save(fqn)
12298

123-
# Create a temp dataset
124-
ds_tmp = DatasetQuery(
125-
name=ds_name,
126-
namespace_name=project.namespace.name,
127-
project_name=project.name,
128-
session=session,
129-
catalog=session.catalog,
130-
include_incomplete=True,
131-
).save()
99+
# Create a temp dataset by re-saving the regular one
100+
tmp_name = session.generate_temp_dataset_name()
101+
ds_tmp = dc.read_dataset(fqn, session=session).save(tmp_name)
132102

133103
datasets = list(catalog.ls_datasets())
134104
dataset_names = [d.name for d in datasets]
135105

136-
assert ds_name in dataset_names
106+
assert "regular_dataset" in dataset_names
137107

138108
assert ds_tmp.name not in dataset_names
139109
assert all(not Session.is_temp_dataset(name) for name in dataset_names)
@@ -142,49 +112,30 @@ def test_session_datasets_not_in_ls_datasets(catalog, project):
142112
def test_cleanup_temp_datasets_all_states(catalog, project):
143113
session_name = "testcleanup"
144114
with Session(session_name, catalog=catalog) as session:
145-
ds_name = "test_dataset"
146-
session.catalog.create_dataset(
147-
ds_name, project, columns=(sa.Column("name", String),)
148-
)
115+
fqn = _fqn(project, "test_dataset")
116+
dc.read_values(name=["a"], session=session).save(fqn)
149117

150118
# Create temp datasets in different states
151119

152-
# 1. CREATED state
153-
ds_created = DatasetQuery(
154-
name=ds_name,
155-
namespace_name=project.namespace.name,
156-
project_name=project.name,
157-
session=session,
158-
catalog=session.catalog,
159-
include_incomplete=True,
160-
).save()
161-
162-
# 2. COMPLETE state
163-
ds_complete = DatasetQuery(
164-
name=ds_name,
165-
namespace_name=project.namespace.name,
166-
project_name=project.name,
167-
session=session,
168-
catalog=session.catalog,
169-
include_incomplete=True,
170-
).save()
171-
ds_complete_record = catalog.get_dataset(
172-
ds_complete.name, include_incomplete=True
120+
# 1. CREATED state (default after save — mark it back to CREATED)
121+
ds_created = dc.read_dataset(fqn, session=session).save(
122+
session.generate_temp_dataset_name()
173123
)
124+
ds_created_record = catalog.get_dataset(ds_created.name)
174125
catalog.metastore.update_dataset_status(
175-
ds_complete_record, DatasetStatus.COMPLETE, version="1.0.0"
126+
ds_created_record, DatasetStatus.CREATED, version="1.0.0"
127+
)
128+
129+
# 2. COMPLETE state (save already marks COMPLETE)
130+
ds_complete = dc.read_dataset(fqn, session=session).save(
131+
session.generate_temp_dataset_name()
176132
)
177133

178134
# 3. FAILED state
179-
ds_failed = DatasetQuery(
180-
name=ds_name,
181-
namespace_name=project.namespace.name,
182-
project_name=project.name,
183-
session=session,
184-
catalog=session.catalog,
185-
include_incomplete=True,
186-
).save()
187-
ds_failed_record = catalog.get_dataset(ds_failed.name, include_incomplete=True)
135+
ds_failed = dc.read_dataset(fqn, session=session).save(
136+
session.generate_temp_dataset_name()
137+
)
138+
ds_failed_record = catalog.get_dataset(ds_failed.name)
188139
catalog.metastore.update_dataset_status(
189140
ds_failed_record, DatasetStatus.FAILED, version="1.0.0"
190141
)

0 commit comments

Comments
 (0)