OpenLCA Import Errors (Date Fields) #64

michaelweinold · 2025-01-16T20:16:33Z

@Shan253 recently asked me to review an error message related to importing Ecospold1 files from OpenLCA.
The error message is related to a date string, which cannot be read by dateutil:

ParserError: Unknown string format: 9999-12-31+01:00

Complete Error Message

---------------------------------------------------------------------------
ParserError                               Traceback (most recent call last)
Cell In[4], line 2
      1 electric_aviation_master_LCI = r"/Users/michaelweinold/Downloads/EcoSpold01"
----> 2 importer = bi.SingleOutputEcospold1Importer(filepath = electric_aviation_master_LCI, db_name= "electric_aviation", use_mp=False)
      3 importer.apply_strategies()
      4 importer.write_database()

File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/bw2io/importers/ecospold1.py:85, in SingleOutputEcospold1Importer.__init__(self, filepath, db_name, use_mp, extractor)
     83 start = time()
     84 try:
---> 85     self.data = extractor.extract(filepath, db_name, use_mp=use_mp)
     86 except RuntimeError as e:
     87     raise MultiprocessingError(
     88         "Multiprocessing error; re-run using `use_mp=False`"
     89     ).with_traceback(e.__traceback__)

File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/bw2io/extractors/ecospold1.py:93, in Ecospold1DataExtractor.extract(cls, path, db_name, use_mp)
     90 data = []
     92 for index, filepath in enumerate(tqdm(filelist)):
---> 93     for x in cls.process_file(filepath, db_name):
     94         if x:
     95             data.append(x)

File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/bw2io/extractors/ecospold1.py:123, in Ecospold1DataExtractor.process_file(cls, filepath, db_name)
    121     if dataset.tag == "comment":
    122         continue
--> 123     data.append(cls.process_dataset(dataset, filepath, db_name))
    124 return data

File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/bw2io/extractors/ecospold1.py:224, in Ecospold1DataExtractor.process_dataset(cls, dataset, filename, db_name)
    201     # We don't extract the `dataGeneratorAndPublication` tag because
    202     # it is insane; there is only one but we have multiple publications,
    203     # and implementing software puts in garbage anyway
    205     return data
    207 data = {
    208     "tags": [
    209         ("ecoSpold01datasetRelatesToProduct", RF.datasetRelatesToProduct),
    210         ("ecoSpold01infrastructureProcess", RF.infrastructureProcess),
    211         ("ecoSpold01infrastructureIncluded", RF.infrastructureIncluded),
    212         ("ecoSpold01localName", RF.localName),
    213         ("ecoSpold01localCategory", RF.localCategory),
    214         ("ecoSpold01localSubCategory", RF.localSubCategory),
    215         ("ecoSpold01category", RF.category),
    216         ("ecoSpold01subCategory", RF.subCategory),
    217         ("ecoSpold01includedProcesses", RF.includedProcesses),
    218         (
    219             "ecoSpold01dataValidForEntirePeriod",
    220             PI.timePeriod.dataValidForEntirePeriod,
    221         ),
    222         # Get string representation instead of converting to native
    223         # date type
--> 224         ("ecoSpold01endDate", PI.timePeriod.endDate.strftime("%Y-%m-%d")),
    225         ("ecoSpold01startDate", PI.timePeriod.startDate.strftime("%Y-%m-%d")),
    226         ("ecoSpold01type", PI.dataSetInformation.type),
    227         (
    228             "ecoSpold01impactAssessmentResult",
    229             PI.dataSetInformation.impactAssessmentResult,
    230         ),
    231         ("ecoSpold01version", PI.dataSetInformation.version),
    232         (
    233             "ecoSpold01internalVersion",
    234             PI.dataSetInformation.internalVersion,
    235         ),
    236         ("ecoSpold01timestamp", PI.dataSetInformation.timestamp.isoformat()),
    237         ("ecoSpold01languageCode", PI.dataSetInformation.languageCode),
    238         (
    239             "ecoSpold01localLanguageCode",
    240             PI.dataSetInformation.localLanguageCode,
    241         ),
    242         ("ecoSpold01energyValues", PI.dataSetInformation.energyValues),
    243     ],
    244     "references": [
    245         {
    246             "identifier": source.number,
    247             "type": source.sourceTypeStr,
    248             # additional authors supposed to be split by comma, but comma
    249             # also used in first/last names, so can split names.
    250             # Just add as long string
    251             "authors": [source.firstAuthor, source.additionalAuthors],
    252             "year": source.year,
    253             "title": source.title,
    254             "pages": source.pageNumbers,
    255             "editors": source.nameOfEditors,
    256             "anthology": source.titleOfAnthology,
    257             "place_of_publication": source.placeOfPublications,
    258             "publisher": source.publisher,
    259             "journal": source.journal,
    260             "volume": source.volumeNo,
    261             "issue": source.issueNo,
    262             "text": source.text,
    263         }
    264         for source in MV.sources
    265     ],
    266     "categories": [RF.get("category"), RF.get("subCategory")],
    267     "code": int(dataset.get("number")),
    268     "comment": "\n".join(text for text in comments.values() if text),
    269     "comments": comments,
    270     "authors": get_authors(),
    271     "database": db_name,
    272     "exchanges": cls.process_exchanges(dataset),
    273     "filename": (
    274         Path(filename).name
    275         if not isinstance(filename, StringIO)
    276         else "StringIO"
    277     ),
    278     "location": PI.geography.location,
    279     "name": RF.name.strip(),
    280     "unit": RF.unit,
    281     "type": "process",
    282 }
    284 allocation_exchanges = [
    285     exc for exc in data["exchanges"] if exc.get("reference")
    286 ]
    288 if allocation_exchanges:

File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/pyecospold/model_v1.py:815, in TimePeriod.endDate(self)
    812 @property
    813 def endDate(self) -> date:
    814     """End date of the time period for which the dataset is valid."""
--> 815     return parse(self._endDate).date()

File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/dateutil/parser/_parser.py:1368, in parse(timestr, parserinfo, **kwargs)
   1366     return parser(parserinfo).parse(timestr, **kwargs)
   1367 else:
-> 1368     return DEFAULTPARSER.parse(timestr, **kwargs)

File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/dateutil/parser/_parser.py:643, in parser.parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
    640 res, skipped_tokens = self._parse(timestr, **kwargs)
    642 if res is None:
--> 643     raise ParserError("Unknown string format: %s", timestr)
    645 if len(res) == 0:
    646     raise ParserError("String does not contain a date: %s", timestr)

ParserError: Unknown string format: 9999-12-31+01:00

Indeed, the timezone offset +01:00 is not understood by the parser:

from dateutil.parser import parse
parse("9999-12-31+01:00")

Manually removing the timezone offsets from the datasets leads to the next error:

ValueError: unconverted data remains: .088+01:00

Complete Error Message

---------------------------------------------------------------------------
ValueError                                Traceback (most recent call last)
Cell In[6], line 2
      1 electric_aviation_master_LCI = r"/Users/michaelweinold/Downloads/EcoSpold01"
----> 2 importer = bi.SingleOutputEcospold1Importer(filepath = electric_aviation_master_LCI, db_name= "electric_aviation", use_mp=False)
      3 importer.apply_strategies()
      4 importer.write_database()

File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/bw2io/importers/ecospold1.py:85, in SingleOutputEcospold1Importer.__init__(self, filepath, db_name, use_mp, extractor)
     83 start = time()
     84 try:
---> 85     self.data = extractor.extract(filepath, db_name, use_mp=use_mp)
     86 except RuntimeError as e:
     87     raise MultiprocessingError(
     88         "Multiprocessing error; re-run using `use_mp=False`"
     89     ).with_traceback(e.__traceback__)

File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/bw2io/extractors/ecospold1.py:93, in Ecospold1DataExtractor.extract(cls, path, db_name, use_mp)
     90 data = []
     92 for index, filepath in enumerate(tqdm(filelist)):
---> 93     for x in cls.process_file(filepath, db_name):
     94         if x:
     95             data.append(x)

File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/bw2io/extractors/ecospold1.py:123, in Ecospold1DataExtractor.process_file(cls, filepath, db_name)
    121     if dataset.tag == "comment":
    122         continue
--> 123     data.append(cls.process_dataset(dataset, filepath, db_name))
    124 return data

File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/bw2io/extractors/ecospold1.py:236, in Ecospold1DataExtractor.process_dataset(cls, dataset, filename, db_name)
    201     # We don't extract the `dataGeneratorAndPublication` tag because
    202     # it is insane; there is only one but we have multiple publications,
    203     # and implementing software puts in garbage anyway
    205     return data
    207 data = {
    208     "tags": [
    209         ("ecoSpold01datasetRelatesToProduct", RF.datasetRelatesToProduct),
    210         ("ecoSpold01infrastructureProcess", RF.infrastructureProcess),
    211         ("ecoSpold01infrastructureIncluded", RF.infrastructureIncluded),
    212         ("ecoSpold01localName", RF.localName),
    213         ("ecoSpold01localCategory", RF.localCategory),
    214         ("ecoSpold01localSubCategory", RF.localSubCategory),
    215         ("ecoSpold01category", RF.category),
    216         ("ecoSpold01subCategory", RF.subCategory),
    217         ("ecoSpold01includedProcesses", RF.includedProcesses),
    218         (
    219             "ecoSpold01dataValidForEntirePeriod",
    220             PI.timePeriod.dataValidForEntirePeriod,
    221         ),
    222         # Get string representation instead of converting to native
    223         # date type
    224         ("ecoSpold01endDate", PI.timePeriod.endDate.strftime("%Y-%m-%d")),
    225         ("ecoSpold01startDate", PI.timePeriod.startDate.strftime("%Y-%m-%d")),
    226         ("ecoSpold01type", PI.dataSetInformation.type),
    227         (
    228             "ecoSpold01impactAssessmentResult",
    229             PI.dataSetInformation.impactAssessmentResult,
    230         ),
    231         ("ecoSpold01version", PI.dataSetInformation.version),
    232         (
    233             "ecoSpold01internalVersion",
    234             PI.dataSetInformation.internalVersion,
    235         ),
--> 236         ("ecoSpold01timestamp", PI.dataSetInformation.timestamp.isoformat()),
    237         ("ecoSpold01languageCode", PI.dataSetInformation.languageCode),
    238         (
    239             "ecoSpold01localLanguageCode",
    240             PI.dataSetInformation.localLanguageCode,
    241         ),
    242         ("ecoSpold01energyValues", PI.dataSetInformation.energyValues),
    243     ],
    244     "references": [
    245         {
    246             "identifier": source.number,
    247             "type": source.sourceTypeStr,
    248             # additional authors supposed to be split by comma, but comma
    249             # also used in first/last names, so can split names.
    250             # Just add as long string
    251             "authors": [source.firstAuthor, source.additionalAuthors],
    252             "year": source.year,
    253             "title": source.title,
    254             "pages": source.pageNumbers,
    255             "editors": source.nameOfEditors,
    256             "anthology": source.titleOfAnthology,
    257             "place_of_publication": source.placeOfPublications,
    258             "publisher": source.publisher,
    259             "journal": source.journal,
    260             "volume": source.volumeNo,
    261             "issue": source.issueNo,
    262             "text": source.text,
    263         }
    264         for source in MV.sources
    265     ],
    266     "categories": [RF.get("category"), RF.get("subCategory")],
    267     "code": int(dataset.get("number")),
    268     "comment": "\n".join(text for text in comments.values() if text),
    269     "comments": comments,
    270     "authors": get_authors(),
    271     "database": db_name,
    272     "exchanges": cls.process_exchanges(dataset),
    273     "filename": (
    274         Path(filename).name
    275         if not isinstance(filename, StringIO)
    276         else "StringIO"
    277     ),
    278     "location": PI.geography.location,
    279     "name": RF.name.strip(),
    280     "unit": RF.unit,
    281     "type": "process",
    282 }
    284 allocation_exchanges = [
    285     exc for exc in data["exchanges"] if exc.get("reference")
    286 ]
    288 if allocation_exchanges:

File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/pyecospold/lxmlh/helpers.py:122, in create_attribute.<locals>.<lambda>(self)
    114 def create_attribute(
    115     name: str,
    116     attr_type: type,
    117     schema_file: str,
    118     validator: Optional[Callable] = None,
    119 ) -> property:
    120     """Helper wrapper method for creating setters and getters for an attribute"""
    121     return property(
--> 122         fget=lambda self: get_attribute(self, name, attr_type),
    123         fset=lambda self, value: set_attribute(
    124             self, name, value, schema_file, validator
    125         ),
    126     )

File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/pyecospold/lxmlh/helpers.py:94, in get_attribute(parent, attribute, attr_type)
     89 def get_attribute(
     90     parent: etree.ElementBase, attribute: str, attr_type: type = str
     91 ) -> Any:
     92     """Helper wrapper method for retrieving XML attributes. Returns
     93     TYPE_DEFAULTS[type] if attribute doesn't exist."""
---> 94     return TYPE_FUNC_MAP.get(attr_type, attr_type)(
     95         parent.get(attribute, TYPE_DEFAULTS.get(attr_type, None))
     96     )

File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/pyecospold/lxmlh/config.py:10, in <lambda>(string)
      4 import numpy as np
      6 TIMESTAMP_FORMAT: str = "%Y-%m-%dT%H:%M:%S"
      8 TYPE_FUNC_MAP: Dict[type, Callable[[str], Any]] = {
      9     bool: lambda string: string.lower() == "true",
---> 10     datetime: lambda string: datetime.strptime(string, TIMESTAMP_FORMAT),
     11 }
     13 TYPE_DEFAULTS: Dict[type, Any] = {
     14     int: np.nan_to_num(np.nan),
     15     float: np.nan,
     16     bool: "false",
     17     str: "",
     18 }

File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/_strptime.py:567, in _strptime_datetime(cls, data_string, format)
    564 def _strptime_datetime(cls, data_string, format="%a %b %d %H:%M:%S %Y"):
    565     """Return a class cls instance based on the input string and the
    566     format string."""
--> 567     tt, fraction, gmtoff_fraction = _strptime(data_string, format)
    568     tzname, gmtoff = tt[-2:]
    569     args = tt[:6] + (fraction,)

File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/_strptime.py:352, in _strptime(data_string, format)
    349     raise ValueError("time data %r does not match format %r" %
    350                      (data_string, format))
    351 if len(data_string) != found.end():
--> 352     raise ValueError("unconverted data remains: %s" %
    353                       data_string[found.end():])
    355 iso_year = year = None
    356 month = day = 1

ValueError: unconverted data remains: .088+01:00

This time, the decimal part of the seconds in the XML timestamp field are the issue:

timestamp="2023-11-13T13:18:45.088+01:00"

After removing those from the XML files, the time-related errors are resolved.

I don't know enough about the Ecospold file format or the OpenLCA software to assess if this is an issue with this specific dataset, or a problem with either the OpenLCA export or the pyecospold package.

The text was updated successfully, but these errors were encountered:

michaelweinold added the bug Something isn't working label Jan 16, 2025

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

OpenLCA Import Errors (Date Fields) #64

OpenLCA Import Errors (Date Fields) #64

michaelweinold commented Jan 16, 2025

OpenLCA Import Errors (Date Fields) #64

OpenLCA Import Errors (Date Fields) #64

Comments

michaelweinold commented Jan 16, 2025