diff --git a/dpdata/vasp/outcar.py b/dpdata/vasp/outcar.py index a16fd6f9..1fd9c46f 100644 --- a/dpdata/vasp/outcar.py +++ b/dpdata/vasp/outcar.py @@ -6,6 +6,35 @@ import numpy as np +def _safe_float(value: str, context: str = "") -> float: + """Safely convert string to float with informative error message. + + Parameters + ---------- + value : str + String value to convert to float + context : str, optional + Context information for error message + + Returns + ------- + float + Converted float value + + Raises + ------ + ValueError + If conversion fails, with informative error message + """ + try: + return float(value) + except ValueError as e: + if context: + raise ValueError(f"Failed to parse {context}: {e}") from e + else: + raise ValueError(f"Failed to convert to float: {e}") from e + + def atom_name_from_potcar_string(instr: str) -> str: """Get atom name from a potcar element name. @@ -240,12 +269,20 @@ def analyze_block(lines, ntot, nelm, ml=False): if sc_index >= nelm: is_converge = False elif energy_token[ml_index] in ii: - energy = float(ii.split()[energy_index[ml_index]]) + try: + energy = _safe_float(ii.split()[energy_index[ml_index]], "energy value from OUTCAR") + except (ValueError, IndexError) as e: + raise ValueError(f"Failed to parse energy from OUTCAR line: '{ii.strip()}'. {e}") from e return coord, cell, energy, force, virial, is_converge elif cell_token[ml_index] in ii: for dd in range(3): tmp_l = lines[idx + cell_index[ml_index] + dd] - cell.append([float(ss) for ss in tmp_l.replace("-", " -").split()[0:3]]) + try: + cell_row = [_safe_float(ss, f"cell component on line {idx + cell_index[ml_index] + dd + 1}") + for ss in tmp_l.replace("-", " -").split()[0:3]] + cell.append(cell_row) + except (ValueError, IndexError) as e: + raise ValueError(f"Failed to parse cell vectors from OUTCAR line: '{tmp_l.strip()}'. {e}") from e elif virial_token[ml_index] in ii: in_kB_index = virial_index[ml_index] while idx + in_kB_index < len(lines) and ( @@ -255,7 +292,12 @@ def analyze_block(lines, ntot, nelm, ml=False): assert idx + in_kB_index < len(lines), ( 'ERROR: "in kB" is not found in OUTCAR. Unable to extract virial.' ) - tmp_v = [float(ss) for ss in lines[idx + in_kB_index].split()[2:8]] + virial_line = lines[idx + in_kB_index] + try: + tmp_v = [_safe_float(ss, "virial component from OUTCAR") + for ss in virial_line.split()[2:8]] + except (ValueError, IndexError) as e: + raise ValueError(f"Failed to parse virial from OUTCAR line: '{virial_line.strip()}'. {e}") from e virial = np.zeros([3, 3]) virial[0][0] = tmp_v[0] virial[1][1] = tmp_v[1] @@ -269,7 +311,11 @@ def analyze_block(lines, ntot, nelm, ml=False): elif "TOTAL-FORCE" in ii and (("ML" in ii) == ml): for jj in range(idx + 2, idx + 2 + ntot): tmp_l = lines[jj] - info = [float(ss) for ss in tmp_l.split()] + try: + info = [_safe_float(ss, "force/coordinate component from OUTCAR") + for ss in tmp_l.split()] + except (ValueError, IndexError) as e: + raise ValueError(f"Failed to parse forces/coordinates from OUTCAR line: '{tmp_l.strip()}'. {e}") from e coord.append(info[:3]) force.append(info[3:6]) return coord, cell, energy, force, virial, is_converge diff --git a/dpdata/vasp/poscar.py b/dpdata/vasp/poscar.py index 78b8dbbe..c6fb7ac4 100644 --- a/dpdata/vasp/poscar.py +++ b/dpdata/vasp/poscar.py @@ -4,6 +4,35 @@ import numpy as np +def _safe_float(value: str, context: str = "") -> float: + """Safely convert string to float with informative error message. + + Parameters + ---------- + value : str + String value to convert to float + context : str, optional + Context information for error message + + Returns + ------- + float + Converted float value + + Raises + ------ + ValueError + If conversion fails, with informative error message + """ + try: + return float(value) + except ValueError as e: + if context: + raise ValueError(f"Failed to parse {context}: {e}") from e + else: + raise ValueError(f"Failed to convert to float: {e}") from e + + def _to_system_data_lower(lines, cartesian=True, selective_dynamics=False): def move_flag_mapper(flag): if flag == "T": @@ -17,11 +46,15 @@ def move_flag_mapper(flag): system = {} system["atom_names"] = [str(ii) for ii in lines[5].split()] system["atom_numbs"] = [int(ii) for ii in lines[6].split()] - scale = float(lines[1]) + scale = _safe_float(lines[1].strip(), "scale factor from POSCAR line 2") cell = [] move_flags = [] for ii in range(2, 5): - boxv = [float(jj) for jj in lines[ii].split()] + line_content = lines[ii].split() + try: + boxv = [_safe_float(jj, f"cell vector component on line {ii+1}") for jj in line_content] + except ValueError as e: + raise ValueError(f"Failed to parse cell vectors in POSCAR: {e}") from e boxv = np.array(boxv) * scale cell.append(boxv) system["cells"] = [np.array(cell)] @@ -29,7 +62,10 @@ def move_flag_mapper(flag): coord = [] for ii in range(8, 8 + natoms): tmp = lines[ii].split() - tmpv = [float(jj) for jj in tmp[:3]] + try: + tmpv = [_safe_float(jj, f"coordinate component on line {ii+1}") for jj in tmp[:3]] + except ValueError as e: + raise ValueError(f"Failed to parse coordinates in POSCAR: {e}") from e if cartesian: tmpv = np.array(tmpv) * scale else: diff --git a/tests/test_vasp_float_conversion_fix.py b/tests/test_vasp_float_conversion_fix.py new file mode 100644 index 00000000..f365af35 --- /dev/null +++ b/tests/test_vasp_float_conversion_fix.py @@ -0,0 +1,182 @@ +#!/usr/bin/env python3 +"""Test suite for the string-to-float conversion bug fix in VASP parsing. + +This tests the fix for issue #611: "could not convert string to float" +""" + +import os +import tempfile +import unittest + +from dpdata import System + + +class TestVaspFloatConversionFix(unittest.TestCase): + """Test robust error handling for malformed VASP files.""" + + def setUp(self): + """Set up temporary files for testing.""" + self.temp_files = [] + + def tearDown(self): + """Clean up temporary files.""" + for temp_file in self.temp_files: + try: + os.unlink(temp_file) + except FileNotFoundError: + pass + + def _create_temp_file(self, content, suffix='.poscar'): + """Create a temporary file with given content.""" + with tempfile.NamedTemporaryFile(mode='w', suffix=suffix, delete=False) as f: + f.write(content) + f.flush() + self.temp_files.append(f.name) + return f.name + + def test_poscar_invalid_scale_factor(self): + """Test POSCAR parsing with invalid scale factor.""" + poscar_content = """Test +INVALID_SCALE + 1.0000000000000000 0.0000000000000000 0.0000000000000000 + 0.0000000000000000 1.0000000000000000 0.0000000000000000 + 0.0000000000000000 0.0000000000000000 1.0000000000000000 +Mg +1 +Cartesian +0.0000000000000000 0.0000000000000000 0.0000000000000000 +""" + temp_file = self._create_temp_file(poscar_content) + + with self.assertRaises(ValueError) as cm: + System(temp_file, fmt='vasp/poscar') + + error_msg = str(cm.exception) + self.assertIn("Failed to parse scale factor from POSCAR line 2", error_msg) + self.assertIn("INVALID_SCALE", error_msg) + + def test_poscar_invalid_cell_vector(self): + """Test POSCAR parsing with invalid cell vector component.""" + poscar_content = """Test +1.0 + INVALID_CELL 0.0000000000000000 0.0000000000000000 + 0.0000000000000000 1.0000000000000000 0.0000000000000000 + 0.0000000000000000 0.0000000000000000 1.0000000000000000 +Mg +1 +Cartesian +0.0000000000000000 0.0000000000000000 0.0000000000000000 +""" + temp_file = self._create_temp_file(poscar_content) + + with self.assertRaises(ValueError) as cm: + System(temp_file, fmt='vasp/poscar') + + error_msg = str(cm.exception) + self.assertIn("Failed to parse cell vectors in POSCAR", error_msg) + self.assertIn("INVALID_CELL", error_msg) + + def test_poscar_invalid_coordinates(self): + """Test POSCAR parsing with invalid coordinate component.""" + poscar_content = """Test +1.0 + 1.0000000000000000 0.0000000000000000 0.0000000000000000 + 0.0000000000000000 1.0000000000000000 0.0000000000000000 + 0.0000000000000000 0.0000000000000000 1.0000000000000000 +Mg +1 +Cartesian +INVALID_COORD 0.0000000000000000 0.0000000000000000 +""" + temp_file = self._create_temp_file(poscar_content) + + with self.assertRaises(ValueError) as cm: + System(temp_file, fmt='vasp/poscar') + + error_msg = str(cm.exception) + self.assertIn("Failed to parse coordinates in POSCAR", error_msg) + self.assertIn("INVALID_COORD", error_msg) + + def test_poscar_empty_scale_factor(self): + """Test POSCAR parsing with empty scale factor.""" + poscar_content = """Test + + 1.0000000000000000 0.0000000000000000 0.0000000000000000 + 0.0000000000000000 1.0000000000000000 0.0000000000000000 + 0.0000000000000000 0.0000000000000000 1.0000000000000000 +Mg +1 +Cartesian +0.0000000000000000 0.0000000000000000 0.0000000000000000 +""" + temp_file = self._create_temp_file(poscar_content) + + with self.assertRaises(ValueError) as cm: + System(temp_file, fmt='vasp/poscar') + + error_msg = str(cm.exception) + self.assertIn("Failed to parse scale factor from POSCAR line 2", error_msg) + + def test_poscar_insufficient_cell_components(self): + """Test POSCAR parsing with insufficient cell vector components.""" + poscar_content = """Test +1.0 + 1.0000000000000000 0.0000000000000000 + 0.0000000000000000 1.0000000000000000 0.0000000000000000 + 0.0000000000000000 0.0000000000000000 1.0000000000000000 +Mg +1 +Cartesian +0.0000000000000000 0.0000000000000000 0.0000000000000000 +""" + temp_file = self._create_temp_file(poscar_content) + + with self.assertRaises((ValueError, IndexError)): + System(temp_file, fmt='vasp/poscar') + + def test_poscar_valid_file_still_works(self): + """Test that valid POSCAR files still work correctly.""" + poscar_content = """Test System +1.0 + 5.0000000000000000 0.0000000000000000 0.0000000000000000 + 0.0000000000000000 5.0000000000000000 0.0000000000000000 + 0.0000000000000000 0.0000000000000000 5.0000000000000000 +Mg +1 +Cartesian +0.0000000000000000 0.0000000000000000 0.0000000000000000 +""" + temp_file = self._create_temp_file(poscar_content) + + # This should not raise any exception + sys = System(temp_file, fmt='vasp/poscar') + + # Verify the system was parsed correctly + self.assertEqual(len(sys), 1) # 1 frame + self.assertEqual(sys.get_natoms(), 1) # 1 atom + self.assertEqual(sys["atom_names"], ["Mg"]) + + def test_poscar_special_characters_in_numbers(self): + """Test POSCAR parsing with special characters that might appear in malformed files.""" + poscar_content = """Test +1.0 + 1.0000000000000000 0.0000000000000000 0.0000000000000000 + 0.0000000000000000 1.0000000000000000 0.0000000000000000 + 0.0000000000000000 0.0000000000000000 1.0000000000000000 +Mg +1 +Cartesian +0.0000000000000000 0.0000000000000000 *INVALID* +""" + temp_file = self._create_temp_file(poscar_content) + + with self.assertRaises(ValueError) as cm: + System(temp_file, fmt='vasp/poscar') + + error_msg = str(cm.exception) + self.assertIn("Failed to parse coordinates in POSCAR", error_msg) + self.assertIn("*INVALID*", error_msg) + + +if __name__ == '__main__': + unittest.main() \ No newline at end of file