Skip to content

Commit 108ec18

Browse files
committed
style: Fix code formatting with ruff format
Applied ruff formatting to ensure code style consistency. No functional changes.
1 parent f6d0253 commit 108ec18

File tree

4 files changed

+98
-25
lines changed

4 files changed

+98
-25
lines changed

src/aletheia_probe/bibtex_parser.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -378,7 +378,7 @@ def _remove_nested_braces(value: str) -> str:
378378

379379
# Remove nested curly braces iteratively until none remain
380380
# This handles multiple levels like {{{text}}} -> {{text}} -> {text} -> text
381-
while re.search(r'\{[^{}]*\}', value):
382-
value = re.sub(r'\{([^{}]*)\}', r'\1', value)
381+
while re.search(r"\{[^{}]*\}", value):
382+
value = re.sub(r"\{([^{}]*)\}", r"\1", value)
383383

384384
return value.strip()

src/aletheia_probe/normalizer.py

Lines changed: 60 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,61 @@ def __init__(self) -> None:
2121

2222
# Common acronyms that should remain uppercase
2323
self.acronyms = {
24-
"IEEE", "ACM", "SIGCOMM", "SIGCHI", "SIGKDD", "SIGMOD", "SIGPLAN",
25-
"VLDB", "ICML", "NIPS", "NEURIPS", "ICLR", "AAAI", "IJCAI", "CIKM",
26-
"WWW", "KDD", "ICDM", "SDM", "PAKDD", "ECML", "PKDD", "CLOUD",
27-
"NASA", "NIH", "NSF", "DARPA", "NIST", "ISO", "IEC", "ITU",
28-
"RFC", "HTTP", "TCP", "IP", "UDP", "DNS", "SSL", "TLS",
29-
"AI", "ML", "NLP", "CV", "HCI", "DB", "OS", "SE", "PL",
30-
"UK", "USA", "US", "EU", "UN", "WHO", "NATO"
24+
"IEEE",
25+
"ACM",
26+
"SIGCOMM",
27+
"SIGCHI",
28+
"SIGKDD",
29+
"SIGMOD",
30+
"SIGPLAN",
31+
"VLDB",
32+
"ICML",
33+
"NIPS",
34+
"NEURIPS",
35+
"ICLR",
36+
"AAAI",
37+
"IJCAI",
38+
"CIKM",
39+
"WWW",
40+
"KDD",
41+
"ICDM",
42+
"SDM",
43+
"PAKDD",
44+
"ECML",
45+
"PKDD",
46+
"CLOUD",
47+
"NASA",
48+
"NIH",
49+
"NSF",
50+
"DARPA",
51+
"NIST",
52+
"ISO",
53+
"IEC",
54+
"ITU",
55+
"RFC",
56+
"HTTP",
57+
"TCP",
58+
"IP",
59+
"UDP",
60+
"DNS",
61+
"SSL",
62+
"TLS",
63+
"AI",
64+
"ML",
65+
"NLP",
66+
"CV",
67+
"HCI",
68+
"DB",
69+
"OS",
70+
"SE",
71+
"PL",
72+
"UK",
73+
"USA",
74+
"US",
75+
"EU",
76+
"UN",
77+
"WHO",
78+
"NATO",
3179
}
3280

3381
# Common abbreviation expansions
@@ -135,19 +183,19 @@ def _remove_bracketed_content(self, text: str) -> str:
135183
"""
136184
# Remove nested curly braces (BibTeX formatting) - handle multiple levels
137185
# This handles cases like {{IEEE}} -> IEEE
138-
while re.search(r'\{[^{}]*\}', text):
139-
text = re.sub(r'\{([^{}]*)\}', r'\1', text)
186+
while re.search(r"\{[^{}]*\}", text):
187+
text = re.sub(r"\{([^{}]*)\}", r"\1", text)
140188

141189
# Remove content within square brackets [...]
142190
# This handles abbreviations and annotations like [2023], [Online]
143-
text = re.sub(r'\[[^\]]*\]', '', text)
191+
text = re.sub(r"\[[^\]]*\]", "", text)
144192

145193
# Remove content within parentheses (...)
146194
# This handles journal/conference abbreviations like (NeurIPS), (CLOUD)
147-
text = re.sub(r'\([^)]*\)', '', text)
195+
text = re.sub(r"\([^)]*\)", "", text)
148196

149197
# Clean up multiple spaces left by bracket removal
150-
text = re.sub(r'\s+', ' ', text)
198+
text = re.sub(r"\s+", " ", text)
151199

152200
return text.strip()
153201

tests/unit/test_bibtex_parser.py

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -727,7 +727,9 @@ def test_nested_brace_removal(self):
727727
assert result == "CLOUD"
728728

729729
# Mixed content with multiple braced sections
730-
result = BibtexParser._remove_nested_braces("{{IEEE}} {{International Conference}} on {{Cloud Computing}}")
730+
result = BibtexParser._remove_nested_braces(
731+
"{{IEEE}} {{International Conference}} on {{Cloud Computing}}"
732+
)
731733
assert result == "IEEE International Conference on Cloud Computing"
732734

733735
# Text without braces should remain unchanged
@@ -761,11 +763,15 @@ def test_parse_bibtex_conference_with_nested_braces(self, tmp_path):
761763
entry = entries[0]
762764

763765
# Title should have nested braces removed
764-
expected_title = "Software Greenability: A Case Study of Cloud-Based Applications"
766+
expected_title = (
767+
"Software Greenability: A Case Study of Cloud-Based Applications"
768+
)
765769
assert entry.title == expected_title
766770

767771
# Conference name should have nested braces removed
768-
expected_conference = "2018 IEEE 11th International Conference on Cloud Computing (CLOUD)"
772+
expected_conference = (
773+
"2018 IEEE 11th International Conference on Cloud Computing (CLOUD)"
774+
)
769775
assert entry.journal_name == expected_conference
770776

771777
def test_parse_bibtex_journal_with_nested_braces(self, tmp_path):

tests/unit/test_normalizer.py

Lines changed: 27 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -143,12 +143,22 @@ def test_bracket_removal_parentheses(self):
143143
normalizer = InputNormalizer()
144144

145145
# Test journal name with abbreviation in parentheses
146-
result = normalizer.normalize("Advances in Neural Information Processing Systems (NeurIPS)")
147-
assert result.normalized_name == "Advances in Neural Information Processing Systems"
148-
assert result.raw_input == "Advances in Neural Information Processing Systems (NeurIPS)"
146+
result = normalizer.normalize(
147+
"Advances in Neural Information Processing Systems (NeurIPS)"
148+
)
149+
assert (
150+
result.normalized_name
151+
== "Advances in Neural Information Processing Systems"
152+
)
153+
assert (
154+
result.raw_input
155+
== "Advances in Neural Information Processing Systems (NeurIPS)"
156+
)
149157

150158
# Test conference name with abbreviation in parentheses
151-
result2 = normalizer.normalize("International Conference on Cloud Computing (CLOUD)")
159+
result2 = normalizer.normalize(
160+
"International Conference on Cloud Computing (CLOUD)"
161+
)
152162
assert result2.normalized_name == "International Conference on CLOUD Computing"
153163

154164
def test_bracket_removal_square_brackets(self):
@@ -172,8 +182,13 @@ def test_bracket_removal_curly_braces(self):
172182
assert result.normalized_name == "IEEE Conference"
173183

174184
# Test nested braces (BibTeX style)
175-
result2 = normalizer.normalize("{{IEEE}} {{International Conference}} on {{Cloud Computing}}")
176-
assert result2.normalized_name == "IEEE International Conference on CLOUD Computing"
185+
result2 = normalizer.normalize(
186+
"{{IEEE}} {{International Conference}} on {{Cloud Computing}}"
187+
)
188+
assert (
189+
result2.normalized_name
190+
== "IEEE International Conference on CLOUD Computing"
191+
)
177192

178193
# Test triple nested braces
179194
result3 = normalizer.normalize("{{{CLOUD}}} Conference")
@@ -184,12 +199,16 @@ def test_bracket_removal_mixed_brackets(self):
184199
normalizer = InputNormalizer()
185200

186201
# Test combination of all bracket types
187-
result = normalizer.normalize("2018 {{IEEE}} 11th {International Conference} on [Cloud] Computing (CLOUD)")
202+
result = normalizer.normalize(
203+
"2018 {{IEEE}} 11th {International Conference} on [Cloud] Computing (CLOUD)"
204+
)
188205
expected = "2018 IEEE 11th International Conference on Computing" # IEEE preserved as acronym
189206
assert result.normalized_name == expected
190207

191208
# Test real BibTeX example
192-
result2 = normalizer.normalize("2018 {{IEEE}} 11th {{International Conference}} on {{Cloud Computing}} ({{CLOUD}})")
209+
result2 = normalizer.normalize(
210+
"2018 {{IEEE}} 11th {{International Conference}} on {{Cloud Computing}} ({{CLOUD}})"
211+
)
193212
expected2 = "2018 IEEE 11th International Conference on CLOUD Computing" # IEEE and CLOUD preserved as acronyms
194213
assert result2.normalized_name == expected2
195214

0 commit comments

Comments
 (0)