Skip to content

Commit 6c6a276

Browse files
committed
ruff compliance
1 parent aa6a563 commit 6c6a276

3 files changed

Lines changed: 74 additions & 78 deletions

File tree

tests/test_indexer.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -39,9 +39,9 @@ def test_load_results(sample_jsonl):
3939
results = load_results(str(sample_jsonl))
4040
assert len(results) == 2, "Should load exactly 2 documents"
4141
print(type(results[0]), MultimodalSample)
42-
assert isinstance(
43-
results[0], MultimodalSample
44-
), "Should return MultimodalSample objects"
42+
assert isinstance(results[0], MultimodalSample), (
43+
"Should return MultimodalSample objects"
44+
)
4545
# If your code overrides the .id, don't check for '1':
4646
assert "Document text 1" in results[0].text
4747
assert results[1].metadata.get("author") == "Alice"
@@ -109,9 +109,9 @@ def test_indexer_integration(
109109
)
110110

111111
# Verify the client did what we expect
112-
assert (
113-
client_instance.create_collection.called
114-
), "Should create collection if it does not exist"
112+
assert client_instance.create_collection.called, (
113+
"Should create collection if it does not exist"
114+
)
115115
assert client_instance.insert.called, "Should insert documents into Milvus"
116116

117117

tests/test_postprocessors.py

Lines changed: 35 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -27,9 +27,9 @@ def test_chunker_from_load_postprocessor():
2727
config_args = {"chunking_strategy": "sentence", "text_chunker_config": {}}
2828
base_config = BasePostProcessorConfig(type="chunker", args=config_args)
2929
processor = load_postprocessor(base_config)
30-
assert isinstance(
31-
processor, MultimodalChunker
32-
), "Expected a MultimodalChunker instance."
30+
assert isinstance(processor, MultimodalChunker), (
31+
"Expected a MultimodalChunker instance."
32+
)
3333

3434

3535
def test_chunker_process():
@@ -47,12 +47,12 @@ def test_chunker_process():
4747
chunks = chunker.process(sample)
4848
# Expect 2 chunks for the 2 sentences
4949
assert len(chunks) == 2, f"Expected 2 chunks, got {len(chunks)}"
50-
assert (
51-
chunks[0].text.strip() == "Hello world."
52-
), f"Unexpected first chunk: {chunks[0].text}"
53-
assert (
54-
chunks[1].text.strip() == "This is a test."
55-
), f"Unexpected second chunk: {chunks[1].text}"
50+
assert chunks[0].text.strip() == "Hello world.", (
51+
f"Unexpected first chunk: {chunks[0].text}"
52+
)
53+
assert chunks[1].text.strip() == "This is a test.", (
54+
f"Unexpected second chunk: {chunks[1].text}"
55+
)
5656

5757

5858
# ------------------ Filter Tests ------------------
@@ -117,16 +117,16 @@ def filter(self, sample: MultimodalSample) -> bool:
117117
accept_filter = DummyAcceptFilter("dummy_accept")
118118
accepted = accept_filter.process(sample)
119119
# When filter returns True, process() should return the sample wrapped in a list.
120-
assert accepted == [
121-
sample
122-
], f"Expected sample to be kept when filter returns True, got {accepted}"
120+
assert accepted == [sample], (
121+
f"Expected sample to be kept when filter returns True, got {accepted}"
122+
)
123123

124124
reject_filter = DummyRejectFilter("dummy_reject")
125125
rejected = reject_filter.process(sample)
126126
# When filter returns False, process() should return an empty list.
127-
assert (
128-
rejected == []
129-
), f"Expected sample to be rejected when filter returns False, got {rejected}"
127+
assert rejected == [], (
128+
f"Expected sample to be rejected when filter returns False, got {rejected}"
129+
)
130130

131131

132132
# ------------------ NER Tests ------------------
@@ -147,9 +147,7 @@ def test_ner_from_config():
147147
"""
148148
# Patch LLM.from_config to return our dummy LLM regardless of input.
149149
original_llm_from_config = LLM.from_config
150-
LLM.from_config = (
151-
lambda cfg: DummyLLM()
152-
) # pyright: ignore[reportAttributeAccessIssue]
150+
LLM.from_config = lambda cfg: DummyLLM() # pyright: ignore[reportAttributeAccessIssue]
153151

154152
config = NERExtractorConfig(
155153
llm=LLMConfig("dummy"), # dummy config; our lambda ignores it
@@ -174,9 +172,7 @@ def test_ner_process():
174172
which should add to the sample's metadata a list with one dictionary.
175173
"""
176174
original_llm_from_config = LLM.from_config
177-
LLM.from_config = (
178-
lambda cfg: DummyLLM()
179-
) # pyright: ignore[reportAttributeAccessIssue]
175+
LLM.from_config = lambda cfg: DummyLLM() # pyright: ignore[reportAttributeAccessIssue]
180176

181177
config = NERExtractorConfig(
182178
llm=LLMConfig("dummy"),
@@ -204,15 +200,15 @@ def test_ner_process():
204200
# We expect one entity: HELLO WORLD as an ORGANIZATION with the given description.
205201
assert len(ner_entities) == 1, f"Expected 1 entity, got {len(ner_entities)}."
206202
entity_info: dict[str, str] = ner_entities[0]
207-
assert (
208-
entity_info.get("entity") == "HELLO WORLD"
209-
), f"Unexpected entity name: {entity_info.get('entity')}"
210-
assert (
211-
entity_info.get("type") == "ORGANIZATION"
212-
), f"Unexpected entity type: {entity_info.get('type')}"
213-
assert entity_info.get("description") == [
214-
"A SAMPLE ORGANIZATION"
215-
], f"Unexpected entity description: {entity_info.get('description')}"
203+
assert entity_info.get("entity") == "HELLO WORLD", (
204+
f"Unexpected entity name: {entity_info.get('entity')}"
205+
)
206+
assert entity_info.get("type") == "ORGANIZATION", (
207+
f"Unexpected entity type: {entity_info.get('type')}"
208+
)
209+
assert entity_info.get("description") == ["A SAMPLE ORGANIZATION"], (
210+
f"Unexpected entity description: {entity_info.get('description')}"
211+
)
216212

217213
# Restore the original LLM.from_config
218214
LLM.from_config = original_llm_from_config
@@ -253,9 +249,9 @@ def test_tagger_from_load_tagger_modalities():
253249
"""
254250
config = BaseTaggerConfig(type="modalities_counter", args={})
255251
tagger = load_tagger(config)
256-
assert isinstance(
257-
tagger, ModalitiesCounter
258-
), "Expected a ModalitiesCounter instance."
252+
assert isinstance(tagger, ModalitiesCounter), (
253+
"Expected a ModalitiesCounter instance."
254+
)
259255

260256

261257
def test_tagger_from_load_tagger_lang_detector():
@@ -292,9 +288,9 @@ def test_tagger_process_words_counter():
292288
processed = tagger.process(sample)
293289
expected_count = len(sample.text.split())
294290
# WordsCounter's default metadata_key is set in its __init__ to 'word_count'
295-
assert (
296-
sample.metadata.get("word_count") == expected_count
297-
), f"Expected word_count {expected_count}, got {sample.metadata.get('word_count')}"
291+
assert sample.metadata.get("word_count") == expected_count, (
292+
f"Expected word_count {expected_count}, got {sample.metadata.get('word_count')}"
293+
)
298294
assert isinstance(processed, list), "Expected process() to return a list."
299295

300296

@@ -318,9 +314,9 @@ def test_tagger_process_modalities_counter():
318314
processed = tagger.process(sample)
319315
expected_count = len(sample.modalities)
320316
# ModalitiesCounter's default metadata_key is 'modalities_count'
321-
assert (
322-
sample.metadata.get("modalities_count") == expected_count
323-
), f"Expected modalities_count {expected_count}, got {sample.metadata.get('modalities_count')}"
317+
assert sample.metadata.get("modalities_count") == expected_count, (
318+
f"Expected modalities_count {expected_count}, got {sample.metadata.get('modalities_count')}"
319+
)
324320
assert isinstance(processed, list), "Expected process() to return a list."
325321

326322

tests/test_processors_local.py

Lines changed: 33 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -63,13 +63,13 @@ def test_docx_no_image_extraction():
6363
)
6464

6565
# Ensure that the attachment placeholder is not present.
66-
assert (
67-
"<attachment>" not in combined_text
68-
), "Attachment tag should not appear when image extraction is disabled."
66+
assert "<attachment>" not in combined_text, (
67+
"Attachment tag should not appear when image extraction is disabled."
68+
)
6969
# Verify that no images were extracted.
70-
assert (
71-
len(result.modalities) == 0
72-
), "Expected no images when image extraction is disabled."
70+
assert len(result.modalities) == 0, (
71+
"Expected no images when image extraction is disabled."
72+
)
7373

7474

7575
# ------------------ EML Processor Tests ------------------
@@ -142,14 +142,14 @@ def test_md_image_extraction():
142142
)
143143
# Count the number of attachment placeholders inserted in text
144144
placeholder_count = combined_text.count(custom_attachment_tag)
145-
assert (
146-
placeholder_count == 2
147-
), f"Expected 2 attachment placeholders, found {placeholder_count}"
145+
assert placeholder_count == 2, (
146+
f"Expected 2 attachment placeholders, found {placeholder_count}"
147+
)
148148
# Assert that modalities is a list and that two images were extracted
149149
assert isinstance(result.modalities, list), "Modalities should be a list"
150-
assert (
151-
len(result.modalities) == 2
152-
), f"Expected 2 images in modalities, found {len(result.modalities)}"
150+
assert len(result.modalities) == 2, (
151+
f"Expected 2 images in modalities, found {len(result.modalities)}"
152+
)
153153

154154

155155
# ------------------ Media Processor Tests ------------------
@@ -192,9 +192,9 @@ def test_media_process_batch():
192192
# Call process_batch with a dummy num_workers value
193193
results = processor.process_batch(files, fast_mode=False, num_workers=1)
194194
# Verify that each file in the batch produces a result with non-empty text and a list of modalities.
195-
assert len(results) == len(
196-
files
197-
), "Number of results should match number of files processed."
195+
assert len(results) == len(files), (
196+
"Number of results should match number of files processed."
197+
)
198198
for result in results:
199199
assert result.text, "Text should not be empty"
200200
assert isinstance(result.modalities, list), "Modalities should be a list"
@@ -230,9 +230,9 @@ def test_pptx_extract_notes():
230230
)
231231

232232
expected_text = "Data analysis has multiple facets and approaches"
233-
assert (
234-
expected_text in combined_text
235-
), f"Expected notes not found in extracted text: {combined_text}"
233+
assert expected_text in combined_text, (
234+
f"Expected notes not found in extracted text: {combined_text}"
235+
)
236236

237237

238238
# ------------------ Spreadsheet Processor Tests ------------------
@@ -273,9 +273,9 @@ def test_spreadsheet_multi_sheet_content():
273273
# 1) Confirm that the names of each sheet appear in the extracted text
274274
expected_sheet_names = ["Form Responses 1"]
275275
for sheet_name in expected_sheet_names:
276-
assert (
277-
sheet_name in combined_text
278-
), f"Didn't find '{sheet_name}' in extracted text."
276+
assert sheet_name in combined_text, (
277+
f"Didn't find '{sheet_name}' in extracted text."
278+
)
279279

280280
# 2) Check for specific cell content that should exist in the file
281281
expected_snippets = [
@@ -285,9 +285,9 @@ def test_spreadsheet_multi_sheet_content():
285285
"Female",
286286
]
287287
for snippet in expected_snippets:
288-
assert (
289-
snippet in combined_text
290-
), f"Expected '{snippet}' not found in spreadsheet text."
288+
assert snippet in combined_text, (
289+
f"Expected '{snippet}' not found in spreadsheet text."
290+
)
291291

292292
# 3) Since there are no images, confirm modalities is empty
293293
assert isinstance(result.modalities, list), "Modalities should be a list."
@@ -352,9 +352,9 @@ def test_text_process_standard():
352352
result = processor.process(sample_file)
353353
# Verify that some text is extracted and no image modalities are returned.
354354
assert result.text, "Text should not be empty"
355-
assert (
356-
isinstance(result.modalities, list) and len(result.modalities) == 0
357-
), "Modalities should be an empty list"
355+
assert isinstance(result.modalities, list) and len(result.modalities) == 0, (
356+
"Modalities should be an empty list"
357+
)
358358

359359

360360
# ------------------ URL Processor Tests ------------------
@@ -373,9 +373,9 @@ def test_url_process_standard():
373373
" ".join(result.text) if isinstance(result.text, list) else result.text
374374
)
375375
# Expect that the text from example.com contains "illustrative examples".
376-
assert (
377-
"illustrative examples" in combined_text
378-
), "Expected 'illustrative examples' in extracted text from http://example.com"
376+
assert "illustrative examples" in combined_text, (
377+
"Expected 'illustrative examples' in extracted text from http://example.com"
378+
)
379379
assert isinstance(result.modalities, list), "Modalities should be a list"
380380

381381

@@ -392,6 +392,6 @@ def test_url_process_invalid():
392392
result = processor.process(sample_url)
393393
# If URL processing fails, expect empty text and no modalities.
394394
assert not result.text, "Expected empty text for invalid URL"
395-
assert (
396-
isinstance(result.modalities, list) and len(result.modalities) == 0
397-
), "Expected no modalities for invalid URL"
395+
assert isinstance(result.modalities, list) and len(result.modalities) == 0, (
396+
"Expected no modalities for invalid URL"
397+
)

0 commit comments

Comments
 (0)