Skip to content

Commit 875be3c

Browse files
committed
Cleanup collate test fixtures and order
1 parent ad9e732 commit 875be3c

File tree

2 files changed

+13
-15
lines changed

2 files changed

+13
-15
lines changed

tests/conftest.py

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -380,7 +380,6 @@ def collated_dataset_directory(run_directory):
380380
write_to_dataset(
381381
pa.Table.from_pandas(df),
382382
base_dir=dataset_directory,
383-
partition_columns=["source"],
384383
)
385384
return dataset_directory
386385

@@ -476,7 +475,6 @@ def collated_with_dupe_dataset_directory(run_directory):
476475
write_to_dataset(
477476
pa.Table.from_pandas(df),
478477
base_dir=dataset_directory,
479-
partition_columns=["source"],
480478
)
481479
return dataset_directory
482480

tests/test_collate_ab_transforms.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -128,6 +128,19 @@ def test_get_joined_batches_iter_success(transformed_parquet_dataset):
128128
assert joined_batch.schema.names == COLLATED_DATASET_SCHEMA.names
129129

130130

131+
def test_get_deduped_batches_iter_success(collated_with_dupe_dataset_directory):
132+
deduped_batches_iter = get_deduped_batches_iter(collated_with_dupe_dataset_directory)
133+
deduped_df = next(deduped_batches_iter).to_pandas()
134+
135+
# assert record 'def456' was dropped because most recent is action=delete
136+
assert len(deduped_df) == 2
137+
assert set(deduped_df.timdex_record_id) == {"abc123", "ghi789"}
138+
139+
# assert record 'ghi789' has most recent 2024-10-02 version
140+
deduped_record = deduped_df.set_index("timdex_record_id").loc["ghi789"]
141+
assert json.loads(deduped_record.record_a)["material"] == "stucco"
142+
143+
131144
def test_validate_output_success(collated_dataset_directory):
132145
validate_output(dataset_path=collated_dataset_directory)
133146

@@ -198,16 +211,3 @@ def test_get_transform_version_success(transformed_directories, output_filename)
198211
def test_get_transform_version_raise_error():
199212
with pytest.raises(ValueError, match="Transformed filepath is invalid."):
200213
get_transform_version("invalid")
201-
202-
203-
def test_get_deduped_batches_iter_success(collated_with_dupe_dataset_directory):
204-
deduped_batches_iter = get_deduped_batches_iter(collated_with_dupe_dataset_directory)
205-
deduped_df = next(deduped_batches_iter).to_pandas()
206-
207-
# assert record 'def456' was dropped because most recent is action=delete
208-
assert len(deduped_df) == 2
209-
assert set(deduped_df.timdex_record_id) == {"abc123", "ghi789"}
210-
211-
# assert record 'ghi789' has most recent 2024-10-02 version
212-
deduped_record = deduped_df.set_index("timdex_record_id").loc["ghi789"]
213-
assert json.loads(deduped_record.record_a)["material"] == "stucco"

0 commit comments

Comments
 (0)