Skip to content

Commit bc0885b

Browse files
committed
enhance incar parsing of backslash and multi-line str, fix ! comment handle
fix most issues: multi-line str still doesn't work almost there, one statement in comment should not be parsed guess it's fine to strip multi-line string I guess ;! is not a valid use case, ; is meant to connect to statements oops, looks like comment would be parsed fix test first working version
1 parent e4139b1 commit bc0885b

File tree

2 files changed

+99
-86
lines changed

2 files changed

+99
-86
lines changed

src/pymatgen/io/vasp/inputs.py

Lines changed: 33 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -961,13 +961,39 @@ def from_str(cls, string: str) -> Self:
961961
Returns:
962962
Incar object
963963
"""
964+
string = "\n".join([ln.split("#", 1)[0].split("!", 1)[0].rstrip() for ln in string.splitlines()])
965+
964966
params: dict[str, Any] = {}
965-
for line in clean_lines(string.splitlines()):
966-
for sline in line.split(";"):
967-
if match := re.match(r"(\w+)\s*=\s*(.*)", sline.strip()):
968-
key: str = match[1].strip()
969-
val: str = match[2].strip()
970-
params[key] = cls.proc_val(key, val)
967+
968+
# Handle line continuations (\)
969+
string = re.sub(r"\\\s*\n", " ", string)
970+
971+
# Regex pattern to find all valid "key = value" assignments at once
972+
pattern = re.compile(
973+
r"""
974+
(?P<key>\w+) # Key (e.g. ENCUT)
975+
\s*=\s* # Equals sign and optional spaces
976+
(?: # Non-capturing group for the value
977+
" # Opening quote
978+
(?P<qval>.*?) # Capture everything inside (non-greedy)
979+
[ \t]*" # Allow trailing spaces/tabs before closing quote
980+
| # OR
981+
(?P<val>[^#!;\n]*) # Unquoted value (stops before comment/separator)
982+
)
983+
""",
984+
re.VERBOSE | re.DOTALL,
985+
)
986+
987+
# Find all matches in the entire string
988+
for match in pattern.finditer(string):
989+
key = match.group("key")
990+
val = match.group("qval") if match.group("qval") is not None else (match.group("val") or "").strip()
991+
992+
if not val:
993+
continue
994+
995+
params[key] = cls.proc_val(key, val)
996+
971997
return cls(params)
972998

973999
@staticmethod
@@ -1038,7 +1064,7 @@ def proc_val(key: str, val: str) -> list | bool | float | int | str:
10381064
)
10391065
lower_str_keys = ("ML_MODE",)
10401066
# String keywords to read "as is" (no case transformation, only stripped)
1041-
as_is_str_keys = ("SYSTEM",)
1067+
as_is_str_keys = ("SYSTEM", "WANNIER90_WIN")
10421068

10431069
def smart_int_or_float_bool(str_: str) -> float | int | bool:
10441070
"""Determine whether a string represents an integer or a float."""

tests/io/vasp/test_inputs.py

Lines changed: 66 additions & 79 deletions
Original file line numberDiff line numberDiff line change
@@ -886,104 +886,91 @@ def test_write(self):
886886
incar = Incar.from_file(tmp_file)
887887
assert incar == self.incar
888888

889-
def test_from_str_complex(self):
890-
r"""Test of handling complex INCAR:
891-
- Multiple statements on a single line separated by semicolon
892-
- Comments marked by hashtag # or exclamation mark !
893-
- Ignore lines does not fit (tag = values) statement format
894-
- Long lines split by backslash \
895-
- Multi-line strings (comment would not be ignored), e.g. WANNIER90_WIN
896-
897-
TODO:
898-
- test line-ending char independence
899-
- test cast casting for multi-line string (auto-capitalization?)
900-
"""
889+
def test_from_str_comment_handling(self):
901890
incar_str = r"""
902-
# Test comment handling (especially for string tags)
903-
SIGMA = 0.05 # random comment (known float tag)
904-
EDIFF = 1e-6 ! another comment (known float tag)
905-
ALGO = Normal # comment (unknown tag -> inferred as str)
906-
GGA = PE ! comment (unknown tag -> inferred as str)
907-
908-
# Test interaction between semicolon and comment
909-
ENCUT = 520; ISMEAR = 0 # smearing scheme
910-
PREC = Accurate ; LREAL = Auto ! precision and projection scheme
911-
NELM = 60; ! ENCUT = 200 # should not parse second assignment
912-
ENMIN = 100; # ENCUT = 200 # should not parse second assignment
913-
914-
# Line continuation with backslash (backslash in comment)
915-
ENMAX = 200 ! \
916-
IBRION = 0 # \
917-
MAGMOM = 0 0 1.0 0 0 -1.0 \
918-
0 0 1.0 0 0 -1.0 \
919-
6*0
920-
921-
# Multi-line string with embedded comments
922-
WANNIER90_WIN = "Begin Projections
923-
Fe:d ; Fe:p # comment inside string
924-
End Projections ! random comment
925-
"
891+
# A = 0
892+
! B=1
893+
SIGMA = 0.05 # random comment (known float tag)
894+
EDIFF = 1e-6 ! another comment (known float tag)
895+
ALGO = Normal # comment (unknown tag -> inferred as str)
896+
GGA = PE ! comment (unknown tag -> inferred as str)
897+
"""
898+
incar = Incar.from_str(incar_str)
926899

927-
# Test valid statement (tag = values) in comment
928-
! invalid ENCUT = 100
929-
# still invalid ENCUT = 200
900+
assert set(incar.keys()) == {"SIGMA", "EDIFF", "ALGO", "GGA"}
901+
assert incar["SIGMA"] == approx(0.05)
902+
assert incar["EDIFF"] == approx(1e-6)
903+
assert incar["ALGO"] == "Normal"
904+
assert incar["GGA"] == "Pe"
930905

931-
# Test invalid statement (tag = values)
932-
Not a valid statement
933-
ENCUT 300
906+
def test_from_str_semicolon_separated_statements(self):
907+
# Test interaction between semicolon and comment
908+
incar_str = r"""
909+
ENMAX = 400; ALGO = Fast ! A = 0
910+
ENCUT = 500; ISMEAR = 0 # B=1
911+
PREC = Accurate ; LREAL = Auto ! precision and projection scheme
912+
IBRION = 2; ISIF = 3; NSW = 100 # three statements in one line
934913
"""
935-
936914
incar = Incar.from_str(incar_str)
937915

938-
expected_keys = {
916+
assert set(incar.keys()) == {
917+
"ENMAX",
918+
"ALGO",
939919
"ENCUT",
940920
"ISMEAR",
941921
"PREC",
942922
"LREAL",
943-
"NELM",
944-
"ENMIN",
945-
"ENMAX",
946923
"IBRION",
947-
"ALGO",
948-
"GGA",
949-
"SIGMA",
950-
"EDIFF",
951-
"MAGMOM",
952-
"WANNIER90_WIN",
924+
"ISIF",
925+
"NSW",
953926
}
954-
assert set(incar.keys()) == expected_keys
955927

956-
# Comment handling
957-
assert incar["SIGMA"] == approx(0.05)
958-
assert incar["EDIFF"] == approx(1e-6)
959-
assert incar["ALGO"] == "Normal"
960-
assert incar["GGA"] == "Pe"
961-
962-
# Line with both ; and comment
963-
assert incar["ENCUT"] == 520
928+
assert incar["ENMAX"] == 400
929+
assert incar["ALGO"] == "Fast"
930+
assert incar["ENCUT"] == 500
964931
assert incar["ISMEAR"] == 0
965-
assert incar["NELM"] == 60
966-
assert incar["ENMIN"] == 100
932+
assert incar["PREC"] == "Accurate"
933+
assert incar["LREAL"] == "Auto"
934+
assert incar["IBRION"] == 2
935+
assert incar["ISIF"] == 3
936+
assert incar["NSW"] == 100
937+
938+
def test_from_str_line_continuation_with_backslash(self):
939+
# Test line continuation with backslash
940+
incar_str = r"""
941+
ALGO = Normal # \ This backslash should be ignored
942+
ENMAX = 200 ! \ This backslash should be ignored
943+
MAGMOM = 0 0 1.0 0 0 -1.0 \
944+
0 0 1.0 0 0 -1.0 \
945+
6*0
946+
"""
947+
incar = Incar.from_str(incar_str)
948+
949+
assert set(incar.keys()) == {"ALGO", "ENMAX", "MAGMOM"}
950+
assert incar["ALGO"] == "Normal"
967951
assert incar["ENMAX"] == 200
968-
assert incar["IBRION"] == 0
969-
assert incar["PREC"].lower() == "accurate"
970-
assert incar["LREAL"].lower() == "auto"
971952

972-
# Continuation merged properly
973-
magmom = incar["MAGMOM"]
974-
assert magmom == [0, 0, 1.0, 0, 0, -1.0, 0, 0, 1.0, 0, 0, -1.0] + [0.0] * 6
953+
assert incar["MAGMOM"] == [0, 0, 1.0, 0, 0, -1.0, 0, 0, 1.0, 0, 0, -1.0] + [0.0] * 6
975954

976-
# Multi-line string with comment
977-
win = incar["WANNIER90_WIN"]
978-
expected_win = "Begin Projections\nFe:d ; Fe:p # comment inside string\nEnd Projections ! random comment\n"
979-
# Comments and structure inside string should be preserved exactly
980-
assert win.strip() == expected_win.strip()
955+
def test_from_str_multiline_string(self):
956+
incar_str = r"""
957+
# Multi-line string with embedded comments
958+
WANNIER90_WIN = "begin Projections # should NOT be capitalized
959+
Fe:d ; Fe:p # comment inside string
960+
End Projections ! random comment
961+
" # comment after closing quote
962+
"""
963+
incar = Incar.from_str(incar_str)
981964

982-
def test_from_str_not_closed_multi_line_str(self):
983-
"""Test not closed (no ending quote) multi-line string.
965+
assert set(incar.keys()) == {"WANNIER90_WIN"}
984966

985-
TODO:
986-
"""
967+
# Comments inside the string would be lost
968+
assert (
969+
incar["WANNIER90_WIN"]
970+
== """begin Projections
971+
Fe:d ; Fe:p
972+
End Projections"""
973+
)
987974

988975
def test_get_str(self):
989976
incar_str = self.incar.get_str(pretty=True, sort_keys=True)

0 commit comments

Comments
 (0)