From 6dd593030eb85cf463428fcdd142bf364ac94c35 Mon Sep 17 00:00:00 2001 From: Joachim Meyer Date: Thu, 9 Oct 2025 09:53:19 -0600 Subject: [PATCH 1/2] Input Mapping error - Improve logging message. Add a colon to the error message to better identify the column that was not matched. Before it was hard to discern the actual column name. --- insitupy/variables/base_variables.py | 2 +- tests/variables/test_base_variables.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/insitupy/variables/base_variables.py b/insitupy/variables/base_variables.py index d28628e..ab0e7b8 100644 --- a/insitupy/variables/base_variables.py +++ b/insitupy/variables/base_variables.py @@ -154,7 +154,7 @@ def from_mapping( column_mapping[result] = None else: raise InputMappingError( - f"Could not find mapping for {input_name}" + f"Could not find mapping for: {input_name}" ) LOG.debug( f"Mapping {result} to {result} (type {column_mapping[result]})" diff --git a/tests/variables/test_base_variables.py b/tests/variables/test_base_variables.py index a45bc53..9a171da 100644 --- a/tests/variables/test_base_variables.py +++ b/tests/variables/test_base_variables.py @@ -66,7 +66,7 @@ def test_from_mapping_success_on_code( def test_from_mapping_failure(self, extendable_variables_fixture): with pytest.raises( - InputMappingError, match="Could not find mapping for humidity" + InputMappingError, match="Could not find mapping for: humidity" ): extendable_variables_fixture.from_mapping("humidity") From 1bf99ff10021909cd901156a7215430efe05f146 Mon Sep 17 00:00:00 2001 From: Joachim Meyer Date: Thu, 9 Oct 2025 09:55:17 -0600 Subject: [PATCH 2/2] Metadata - Improve handling of empty headers. Add handling for headers that are empty or a sequence of empty separators. Also improve handling of different header line separators (\n or \r) --- insitupy/io/metadata.py | 32 ++++++++++++------- tests/io/__init__.py | 0 tests/io/test_metadata.py | 66 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 86 insertions(+), 12 deletions(-) create mode 100644 tests/io/__init__.py create mode 100644 tests/io/test_metadata.py diff --git a/insitupy/io/metadata.py b/insitupy/io/metadata.py index 0a577a8..bcdcff1 100644 --- a/insitupy/io/metadata.py +++ b/insitupy/io/metadata.py @@ -26,7 +26,8 @@ class MetaDataParser: OUT_TIMEZONE = "UTC" DEFAULT_HEADER_SEPARATOR = "," DEFAULT_HEADER_LINE_START = '#' - DEAFAULT_COLUMN_SEPARATOR = ',' + DEFAULT_COLUMN_SEPARATOR = ',' + END_OF_LINE = '\n\r' def __init__( self, @@ -34,7 +35,7 @@ def __init__( primary_variable_file: Optional[Union[str, Path]] = None, metadata_variable_file: Optional[Union[str, Path]] = None, header_sep=DEFAULT_HEADER_SEPARATOR, - column_sep=DEAFAULT_COLUMN_SEPARATOR, + column_sep=DEFAULT_COLUMN_SEPARATOR, allow_split_lines: bool = False, allow_map_failures: bool = False, _id: Optional[str] = None, @@ -200,16 +201,25 @@ def _preparse_meta(self, meta_lines): # Collect key value pairs from the information above the column header for ln in meta_lines: - d = ln.split(self._header_sep) + # Filter empty strings. Sometimes we get empty metadata lines or + # entries in form of: Key,Value,,, , + metadata_columns = [ + col for col in ln.split(self._header_sep) + if (col and len(col.strip()) > 0) + ] + + # If we don't have a key and value pair, there is no information to store + if len(metadata_columns) < 2: + continue # Key is always the first entry in comma sep list - key = StringManager.standardize_key(d[0]) + key = StringManager.standardize_key(metadata_columns[0]) # Avoid splitting on times if 'time' in key or 'date' in key: - value = ':'.join(d[1:]).strip() + value = ':'.join(metadata_columns[1:]).strip() else: - value = ', '.join(d[1:]) + value = ', '.join(metadata_columns[1:]) value = StringManager.clean_str(value) # cast the rough object key to a known key @@ -291,16 +301,14 @@ def _parse_columns(self, str_line): """ Parse the column names from the input line. This can include mapping """ - # Parse the columns header based on the size of the last line - # Remove units - # for c in ['()', '[]']: - # str_line = StringManager.strip_encapsulated(str_line, c) - raw_cols = str_line.strip( - self.DEFAULT_HEADER_LINE_START + self.DEFAULT_HEADER_LINE_START + self.END_OF_LINE ).split( self._column_sep ) + # Filter empty strings, especially with trailing commas. + # Example: col1, col2, col3, + raw_cols = [col for col in raw_cols if col] # Clean the raw columns standard_cols = [StringManager.standardize_key(c) for c in raw_cols] # Infer units from the raw columns diff --git a/tests/io/__init__.py b/tests/io/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/io/test_metadata.py b/tests/io/test_metadata.py new file mode 100644 index 0000000..4d008e5 --- /dev/null +++ b/tests/io/test_metadata.py @@ -0,0 +1,66 @@ +import pytest +from insitupy.io.metadata import MetaDataParser + + +@pytest.fixture +def metadata_parser(): + """Fixture to initialize the MetaDataParser with default settings.""" + return MetaDataParser() + + +META_LINES = [ + "Site,Site1", + "Pit ID,1234", + "date/time,2025-10-08 12:34:56", +] +META_LINES_PARSED = { + "site_id": "Site1", + "pit_id": "1234", + "datetime": "2025-10-08 12:34:56", +} + + +class TestMetaDataParser: + def test_preparse_meta_returns_dict(self, metadata_parser): + result = metadata_parser._preparse_meta(META_LINES) + + assert result == META_LINES_PARSED, "Metadata lines parsed incorrectly." + + def test_preparse_meta_with_empty_lines(self, metadata_parser): + meta_lines = META_LINES + [ + "", + " ," + ] + + result = metadata_parser._preparse_meta(meta_lines) + + assert result == META_LINES_PARSED, "Empty lines were not ignored correctly." + + def test_preparse_meta_with_time_and_date_key(self, metadata_parser): + """ + Ensure that times are not improperly split into separate entries. + """ + meta_lines = [ + "Time,12:34:56", + "Date,2025-10-09", + ] + + result = metadata_parser._preparse_meta(meta_lines) + + expected = { + "time": "12:34:56", + "date": "2025-10-09", + } + assert result == expected, "Time or date keys processed incorrectly." + + def test_preparse_meta_with_no_key_value_pairs(self, metadata_parser): + meta_lines = META_LINES + [ + "Time,,", + "Time start/end,,, ,", + ] + + result = metadata_parser._preparse_meta(meta_lines) + + assert result == META_LINES_PARSED, ( + "Lines without key-value pairs were not skipped." + )