Skip to content

Commit e085343

Browse files
authored
Fix highly-nested parens when formula parsing in Composition (#3569)
* fix highly-nested formula parsing in Composition * add failure cases from gh-3559 as tests
1 parent 6e9a033 commit e085343

File tree

2 files changed

+20
-9
lines changed

2 files changed

+20
-9
lines changed

Diff for: pymatgen/core/composition.py

+10-9
Original file line numberDiff line numberDiff line change
@@ -558,15 +558,16 @@ def get_sym_dict(form: str, factor: float) -> dict[str, float]:
558558
raise ValueError(f"{form} is an invalid formula!")
559559
return sym_dict
560560

561-
m = re.search(r"\(([^\(\)]+)\)\s*([\.e\d]*)", formula)
562-
if m:
561+
match = re.search(r"\(([^\(\)]+)\)\s*([\.e\d]*)", formula)
562+
while match:
563563
factor = 1.0
564-
if m.group(2) != "":
565-
factor = float(m.group(2))
566-
unit_sym_dict = get_sym_dict(m.group(1), factor)
564+
if match.group(2) != "":
565+
factor = float(match.group(2))
566+
unit_sym_dict = get_sym_dict(match.group(1), factor)
567567
expanded_sym = "".join(f"{el}{amt}" for el, amt in unit_sym_dict.items())
568-
expanded_formula = formula.replace(m.group(), expanded_sym)
569-
return self._parse_formula(expanded_formula)
568+
expanded_formula = formula.replace(match.group(), expanded_sym, 1)
569+
formula = expanded_formula
570+
match = re.search(r"\(([^\(\)]+)\)\s*([\.e\d]*)", formula)
570571
return get_sym_dict(formula, 1)
571572

572573
@property
@@ -581,14 +582,14 @@ def anonymized_formula(self) -> str:
581582
reduced /= gcd(*(int(i) for i in self.values()))
582583

583584
anon = ""
584-
for e, amt in zip(string.ascii_uppercase, sorted(reduced.values())):
585+
for elem, amt in zip(string.ascii_uppercase, sorted(reduced.values())):
585586
if amt == 1:
586587
amt_str = ""
587588
elif abs(amt % 1) < 1e-8:
588589
amt_str = str(int(amt))
589590
else:
590591
amt_str = str(amt)
591-
anon += f"{e}{amt_str}"
592+
anon += f"{elem}{amt_str}"
592593
return anon
593594

594595
@property

Diff for: tests/core/test_composition.py

+10
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,16 @@ def test_formula(self):
186186

187187
assert Composition("Na 3 Zr (PO 4) 3").reduced_formula == "Na3Zr(PO4)3"
188188

189+
# gh-3559
190+
very_nested_formula = (
191+
"(Bi2(Mg0.667Nb1.333)O7)((Bi2(Mg0.667Nb1.333)O7)0.9(SrCO3)0.1)((Bi2(Mg0.667Nb1.333)O7)0.7(SrCO3)0.3)"
192+
)
193+
assert Composition(very_nested_formula).formula == "Sr0.4 Mg1.7342 Nb3.4658 Bi5.2 C0.4 O19.4"
194+
assert Composition(very_nested_formula) == Composition(
195+
"(Bi2(Mg0.667Nb1.333)O7)1((Bi2(Mg0.667Nb1.333)O7)0.9(SrCO3)0.1)((Bi2(Mg0.667Nb1.333)O7)0.7(SrCO3)0.3)"
196+
)
197+
assert Composition("(C)((C)0.9(B)0.1)") == Composition("C1.9 B0.1")
198+
189199
assert Composition("NaN").reduced_formula == "NaN"
190200
with pytest.raises(ValueError, match=r"float\('NaN'\) is not a valid Composition, did you mean str\('NaN'\)\?"):
191201
Composition(float("NaN"))

0 commit comments

Comments
 (0)