Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
32 changes: 20 additions & 12 deletions insitupy/io/metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,15 +26,16 @@ class MetaDataParser:
OUT_TIMEZONE = "UTC"
DEFAULT_HEADER_SEPARATOR = ","
DEFAULT_HEADER_LINE_START = '#'
DEAFAULT_COLUMN_SEPARATOR = ','
DEFAULT_COLUMN_SEPARATOR = ','
END_OF_LINE = '\n\r'

def __init__(
self,
timezone: Optional[str] = OUT_TIMEZONE,
primary_variable_file: Optional[Union[str, Path]] = None,
metadata_variable_file: Optional[Union[str, Path]] = None,
header_sep=DEFAULT_HEADER_SEPARATOR,
column_sep=DEAFAULT_COLUMN_SEPARATOR,
column_sep=DEFAULT_COLUMN_SEPARATOR,
allow_split_lines: bool = False,
allow_map_failures: bool = False,
_id: Optional[str] = None,
Expand Down Expand Up @@ -200,16 +201,25 @@ def _preparse_meta(self, meta_lines):

# Collect key value pairs from the information above the column header
for ln in meta_lines:
d = ln.split(self._header_sep)
# Filter empty strings. Sometimes we get empty metadata lines or
# entries in form of: Key,Value,,, ,
metadata_columns = [
col for col in ln.split(self._header_sep)
if (col and len(col.strip()) > 0)
]

# If we don't have a key and value pair, there is no information to store
if len(metadata_columns) < 2:
continue

# Key is always the first entry in comma sep list
key = StringManager.standardize_key(d[0])
key = StringManager.standardize_key(metadata_columns[0])

# Avoid splitting on times
if 'time' in key or 'date' in key:
value = ':'.join(d[1:]).strip()
value = ':'.join(metadata_columns[1:]).strip()
else:
value = ', '.join(d[1:])
value = ', '.join(metadata_columns[1:])
value = StringManager.clean_str(value)

# cast the rough object key to a known key
Expand Down Expand Up @@ -291,16 +301,14 @@ def _parse_columns(self, str_line):
"""
Parse the column names from the input line. This can include mapping
"""
# Parse the columns header based on the size of the last line
# Remove units
# for c in ['()', '[]']:
# str_line = StringManager.strip_encapsulated(str_line, c)

raw_cols = str_line.strip(
self.DEFAULT_HEADER_LINE_START
self.DEFAULT_HEADER_LINE_START + self.END_OF_LINE
).split(
self._column_sep
)
# Filter empty strings, especially with trailing commas.
# Example: col1, col2, col3,
raw_cols = [col for col in raw_cols if col]
# Clean the raw columns
standard_cols = [StringManager.standardize_key(c) for c in raw_cols]
# Infer units from the raw columns
Expand Down
2 changes: 1 addition & 1 deletion insitupy/variables/base_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,7 +154,7 @@ def from_mapping(
column_mapping[result] = None
else:
raise InputMappingError(
f"Could not find mapping for {input_name}"
f"Could not find mapping for: {input_name}"
)
LOG.debug(
f"Mapping {result} to {result} (type {column_mapping[result]})"
Expand Down
Empty file added tests/io/__init__.py
Empty file.
66 changes: 66 additions & 0 deletions tests/io/test_metadata.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
import pytest
from insitupy.io.metadata import MetaDataParser


@pytest.fixture
def metadata_parser():
"""Fixture to initialize the MetaDataParser with default settings."""
return MetaDataParser()


META_LINES = [
"Site,Site1",
"Pit ID,1234",
"date/time,2025-10-08 12:34:56",
]
META_LINES_PARSED = {
"site_id": "Site1",
"pit_id": "1234",
"datetime": "2025-10-08 12:34:56",
}


class TestMetaDataParser:
def test_preparse_meta_returns_dict(self, metadata_parser):
result = metadata_parser._preparse_meta(META_LINES)

assert result == META_LINES_PARSED, "Metadata lines parsed incorrectly."

def test_preparse_meta_with_empty_lines(self, metadata_parser):
meta_lines = META_LINES + [
"",
" ,"
]

result = metadata_parser._preparse_meta(meta_lines)

assert result == META_LINES_PARSED, "Empty lines were not ignored correctly."

def test_preparse_meta_with_time_and_date_key(self, metadata_parser):
"""
Ensure that times are not improperly split into separate entries.
"""
meta_lines = [
"Time,12:34:56",
"Date,2025-10-09",
]

result = metadata_parser._preparse_meta(meta_lines)

expected = {
"time": "12:34:56",
"date": "2025-10-09",
}
assert result == expected, "Time or date keys processed incorrectly."

def test_preparse_meta_with_no_key_value_pairs(self, metadata_parser):
meta_lines = META_LINES + [
"Time,,",
"Time start/end,,, ,",
]

result = metadata_parser._preparse_meta(meta_lines)

assert result == META_LINES_PARSED, (
"Lines without key-value pairs were not skipped."
)
2 changes: 1 addition & 1 deletion tests/variables/test_base_variables.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ def test_from_mapping_success_on_code(

def test_from_mapping_failure(self, extendable_variables_fixture):
with pytest.raises(
InputMappingError, match="Could not find mapping for humidity"
InputMappingError, match="Could not find mapping for: humidity"
):
extendable_variables_fixture.from_mapping("humidity")

Expand Down