You signed in with another tab or window. Reload to refresh your session.You signed out in another tab or window. Reload to refresh your session.You switched accounts on another tab or window. Reload to refresh your session.Dismiss alert
@Shan253 recently asked me to review an error message related to importing Ecospold1 files from OpenLCA.
The error message is related to a date string, which cannot be read by dateutil:
---------------------------------------------------------------------------
ParserError Traceback (most recent call last)
Cell In[4], line 2
1 electric_aviation_master_LCI = r"/Users/michaelweinold/Downloads/EcoSpold01"
----> 2 importer = bi.SingleOutputEcospold1Importer(filepath = electric_aviation_master_LCI, db_name= "electric_aviation", use_mp=False)
3 importer.apply_strategies()
4 importer.write_database()
File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/bw2io/importers/ecospold1.py:85, in SingleOutputEcospold1Importer.__init__(self, filepath, db_name, use_mp, extractor)
83 start = time()
84 try:
---> 85 self.data = extractor.extract(filepath, db_name, use_mp=use_mp)
86 except RuntimeError as e:
87 raise MultiprocessingError(
88 "Multiprocessing error; re-run using `use_mp=False`"
89 ).with_traceback(e.__traceback__)
File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/bw2io/extractors/ecospold1.py:93, in Ecospold1DataExtractor.extract(cls, path, db_name, use_mp)
90 data = []
92 for index, filepath in enumerate(tqdm(filelist)):
---> 93 for x in cls.process_file(filepath, db_name):
94 if x:
95 data.append(x)
File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/bw2io/extractors/ecospold1.py:123, in Ecospold1DataExtractor.process_file(cls, filepath, db_name)
121 if dataset.tag == "comment":
122 continue
--> 123 data.append(cls.process_dataset(dataset, filepath, db_name))
124 return data
File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/bw2io/extractors/ecospold1.py:224, in Ecospold1DataExtractor.process_dataset(cls, dataset, filename, db_name)
201 # We don't extract the `dataGeneratorAndPublication` tag because
202 # it is insane; there is only one but we have multiple publications,
203 # and implementing software puts in garbage anyway
205 return data
207 data = {
208 "tags": [
209 ("ecoSpold01datasetRelatesToProduct", RF.datasetRelatesToProduct),
210 ("ecoSpold01infrastructureProcess", RF.infrastructureProcess),
211 ("ecoSpold01infrastructureIncluded", RF.infrastructureIncluded),
212 ("ecoSpold01localName", RF.localName),
213 ("ecoSpold01localCategory", RF.localCategory),
214 ("ecoSpold01localSubCategory", RF.localSubCategory),
215 ("ecoSpold01category", RF.category),
216 ("ecoSpold01subCategory", RF.subCategory),
217 ("ecoSpold01includedProcesses", RF.includedProcesses),
218 (
219 "ecoSpold01dataValidForEntirePeriod",
220 PI.timePeriod.dataValidForEntirePeriod,
221 ),
222 # Get string representation instead of converting to native
223 # date type
--> 224 ("ecoSpold01endDate", PI.timePeriod.endDate.strftime("%Y-%m-%d")),
225 ("ecoSpold01startDate", PI.timePeriod.startDate.strftime("%Y-%m-%d")),
226 ("ecoSpold01type", PI.dataSetInformation.type),
227 (
228 "ecoSpold01impactAssessmentResult",
229 PI.dataSetInformation.impactAssessmentResult,
230 ),
231 ("ecoSpold01version", PI.dataSetInformation.version),
232 (
233 "ecoSpold01internalVersion",
234 PI.dataSetInformation.internalVersion,
235 ),
236 ("ecoSpold01timestamp", PI.dataSetInformation.timestamp.isoformat()),
237 ("ecoSpold01languageCode", PI.dataSetInformation.languageCode),
238 (
239 "ecoSpold01localLanguageCode",
240 PI.dataSetInformation.localLanguageCode,
241 ),
242 ("ecoSpold01energyValues", PI.dataSetInformation.energyValues),
243 ],
244 "references": [
245 {
246 "identifier": source.number,
247 "type": source.sourceTypeStr,
248 # additional authors supposed to be split by comma, but comma
249 # also used in first/last names, so can split names.
250 # Just add as long string
251 "authors": [source.firstAuthor, source.additionalAuthors],
252 "year": source.year,
253 "title": source.title,
254 "pages": source.pageNumbers,
255 "editors": source.nameOfEditors,
256 "anthology": source.titleOfAnthology,
257 "place_of_publication": source.placeOfPublications,
258 "publisher": source.publisher,
259 "journal": source.journal,
260 "volume": source.volumeNo,
261 "issue": source.issueNo,
262 "text": source.text,
263 }
264 for source in MV.sources
265 ],
266 "categories": [RF.get("category"), RF.get("subCategory")],
267 "code": int(dataset.get("number")),
268 "comment": "\n".join(text for text in comments.values() if text),
269 "comments": comments,
270 "authors": get_authors(),
271 "database": db_name,
272 "exchanges": cls.process_exchanges(dataset),
273 "filename": (
274 Path(filename).name
275 if not isinstance(filename, StringIO)
276 else "StringIO"
277 ),
278 "location": PI.geography.location,
279 "name": RF.name.strip(),
280 "unit": RF.unit,
281 "type": "process",
282 }
284 allocation_exchanges = [
285 exc for exc in data["exchanges"] if exc.get("reference")
286 ]
288 if allocation_exchanges:
File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/pyecospold/model_v1.py:815, in TimePeriod.endDate(self)
812 @property
813 def endDate(self) -> date:
814 """End date of the time period for which the dataset is valid."""
--> 815 return parse(self._endDate).date()
File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/dateutil/parser/_parser.py:1368, in parse(timestr, parserinfo, **kwargs)
1366 return parser(parserinfo).parse(timestr, **kwargs)
1367 else:
-> 1368 return DEFAULTPARSER.parse(timestr, **kwargs)
File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/dateutil/parser/_parser.py:643, in parser.parse(self, timestr, default, ignoretz, tzinfos, **kwargs)
640 res, skipped_tokens = self._parse(timestr, **kwargs)
642 if res is None:
--> 643 raise ParserError("Unknown string format: %s", timestr)
645 if len(res) == 0:
646 raise ParserError("String does not contain a date: %s", timestr)
ParserError: Unknown string format: 9999-12-31+01:00
Indeed, the timezone offset +01:00 is not understood by the parser:
from dateutil.parser import parse
parse("9999-12-31+01:00")
Manually removing the timezone offsets from the datasets leads to the next error:
ValueError: unconverted data remains: .088+01:00
Complete Error Message
---------------------------------------------------------------------------
ValueError Traceback (most recent call last)
Cell In[6], line 2
1 electric_aviation_master_LCI = r"/Users/michaelweinold/Downloads/EcoSpold01"
----> 2 importer = bi.SingleOutputEcospold1Importer(filepath = electric_aviation_master_LCI, db_name= "electric_aviation", use_mp=False)
3 importer.apply_strategies()
4 importer.write_database()
File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/bw2io/importers/ecospold1.py:85, in SingleOutputEcospold1Importer.__init__(self, filepath, db_name, use_mp, extractor)
83 start = time()
84 try:
---> 85 self.data = extractor.extract(filepath, db_name, use_mp=use_mp)
86 except RuntimeError as e:
87 raise MultiprocessingError(
88 "Multiprocessing error; re-run using `use_mp=False`"
89 ).with_traceback(e.__traceback__)
File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/bw2io/extractors/ecospold1.py:93, in Ecospold1DataExtractor.extract(cls, path, db_name, use_mp)
90 data = []
92 for index, filepath in enumerate(tqdm(filelist)):
---> 93 for x in cls.process_file(filepath, db_name):
94 if x:
95 data.append(x)
File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/bw2io/extractors/ecospold1.py:123, in Ecospold1DataExtractor.process_file(cls, filepath, db_name)
121 if dataset.tag == "comment":
122 continue
--> 123 data.append(cls.process_dataset(dataset, filepath, db_name))
124 return data
File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/bw2io/extractors/ecospold1.py:236, in Ecospold1DataExtractor.process_dataset(cls, dataset, filename, db_name)
201 # We don't extract the `dataGeneratorAndPublication` tag because
202 # it is insane; there is only one but we have multiple publications,
203 # and implementing software puts in garbage anyway
205 return data
207 data = {
208 "tags": [
209 ("ecoSpold01datasetRelatesToProduct", RF.datasetRelatesToProduct),
210 ("ecoSpold01infrastructureProcess", RF.infrastructureProcess),
211 ("ecoSpold01infrastructureIncluded", RF.infrastructureIncluded),
212 ("ecoSpold01localName", RF.localName),
213 ("ecoSpold01localCategory", RF.localCategory),
214 ("ecoSpold01localSubCategory", RF.localSubCategory),
215 ("ecoSpold01category", RF.category),
216 ("ecoSpold01subCategory", RF.subCategory),
217 ("ecoSpold01includedProcesses", RF.includedProcesses),
218 (
219 "ecoSpold01dataValidForEntirePeriod",
220 PI.timePeriod.dataValidForEntirePeriod,
221 ),
222 # Get string representation instead of converting to native
223 # date type
224 ("ecoSpold01endDate", PI.timePeriod.endDate.strftime("%Y-%m-%d")),
225 ("ecoSpold01startDate", PI.timePeriod.startDate.strftime("%Y-%m-%d")),
226 ("ecoSpold01type", PI.dataSetInformation.type),
227 (
228 "ecoSpold01impactAssessmentResult",
229 PI.dataSetInformation.impactAssessmentResult,
230 ),
231 ("ecoSpold01version", PI.dataSetInformation.version),
232 (
233 "ecoSpold01internalVersion",
234 PI.dataSetInformation.internalVersion,
235 ),
--> 236 ("ecoSpold01timestamp", PI.dataSetInformation.timestamp.isoformat()),
237 ("ecoSpold01languageCode", PI.dataSetInformation.languageCode),
238 (
239 "ecoSpold01localLanguageCode",
240 PI.dataSetInformation.localLanguageCode,
241 ),
242 ("ecoSpold01energyValues", PI.dataSetInformation.energyValues),
243 ],
244 "references": [
245 {
246 "identifier": source.number,
247 "type": source.sourceTypeStr,
248 # additional authors supposed to be split by comma, but comma
249 # also used in first/last names, so can split names.
250 # Just add as long string
251 "authors": [source.firstAuthor, source.additionalAuthors],
252 "year": source.year,
253 "title": source.title,
254 "pages": source.pageNumbers,
255 "editors": source.nameOfEditors,
256 "anthology": source.titleOfAnthology,
257 "place_of_publication": source.placeOfPublications,
258 "publisher": source.publisher,
259 "journal": source.journal,
260 "volume": source.volumeNo,
261 "issue": source.issueNo,
262 "text": source.text,
263 }
264 for source in MV.sources
265 ],
266 "categories": [RF.get("category"), RF.get("subCategory")],
267 "code": int(dataset.get("number")),
268 "comment": "\n".join(text for text in comments.values() if text),
269 "comments": comments,
270 "authors": get_authors(),
271 "database": db_name,
272 "exchanges": cls.process_exchanges(dataset),
273 "filename": (
274 Path(filename).name
275 if not isinstance(filename, StringIO)
276 else "StringIO"
277 ),
278 "location": PI.geography.location,
279 "name": RF.name.strip(),
280 "unit": RF.unit,
281 "type": "process",
282 }
284 allocation_exchanges = [
285 exc for exc in data["exchanges"] if exc.get("reference")
286 ]
288 if allocation_exchanges:
File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/pyecospold/lxmlh/helpers.py:122, in create_attribute.<locals>.<lambda>(self)
114 def create_attribute(
115 name: str,
116 attr_type: type,
117 schema_file: str,
118 validator: Optional[Callable] = None,
119 ) -> property:
120 """Helper wrapper method for creating setters and getters for an attribute"""
121 return property(
--> 122 fget=lambda self: get_attribute(self, name, attr_type),
123 fset=lambda self, value: set_attribute(
124 self, name, value, schema_file, validator
125 ),
126 )
File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/pyecospold/lxmlh/helpers.py:94, in get_attribute(parent, attribute, attr_type)
89 def get_attribute(
90 parent: etree.ElementBase, attribute: str, attr_type: type = str
91 ) -> Any:
92 """Helper wrapper method for retrieving XML attributes. Returns
93 TYPE_DEFAULTS[type] if attribute doesn't exist."""
---> 94 return TYPE_FUNC_MAP.get(attr_type, attr_type)(
95 parent.get(attribute, TYPE_DEFAULTS.get(attr_type, None))
96 )
File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/site-packages/pyecospold/lxmlh/config.py:10, in <lambda>(string)
4 import numpy as np
6 TIMESTAMP_FORMAT: str = "%Y-%m-%dT%H:%M:%S"
8 TYPE_FUNC_MAP: Dict[type, Callable[[str], Any]] = {
9 bool: lambda string: string.lower() == "true",
---> 10 datetime: lambda string: datetime.strptime(string, TIMESTAMP_FORMAT),
11 }
13 TYPE_DEFAULTS: Dict[type, Any] = {
14 int: np.nan_to_num(np.nan),
15 float: np.nan,
16 bool: "false",
17 str: "",
18 }
File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/_strptime.py:567, in _strptime_datetime(cls, data_string, format)
564 def _strptime_datetime(cls, data_string, format="%a %b %d %H:%M:%S %Y"):
565 """Return a class cls instance based on the input string and the
566 format string."""
--> 567 tt, fraction, gmtoff_fraction = _strptime(data_string, format)
568 tzname, gmtoff = tt[-2:]
569 args = tt[:6] + (fraction,)
File /opt/homebrew/Caskroom/miniconda/base/envs/env_bw25/lib/python3.11/_strptime.py:352, in _strptime(data_string, format)
349 raise ValueError("time data %r does not match format %r" %
350 (data_string, format))
351 if len(data_string) != found.end():
--> 352 raise ValueError("unconverted data remains: %s" %
353 data_string[found.end():])
355 iso_year = year = None
356 month = day = 1
ValueError: unconverted data remains: .088+01:00
This time, the decimal part of the seconds in the XML timestamp field are the issue:
timestamp="2023-11-13T13:18:45.088+01:00"
After removing those from the XML files, the time-related errors are resolved.
I don't know enough about the Ecospold file format or the OpenLCA software to assess if this is an issue with this specific dataset, or a problem with either the OpenLCA export or the pyecospold package.
The text was updated successfully, but these errors were encountered:
@Shan253 recently asked me to review an error message related to importing Ecospold1 files from OpenLCA.
The error message is related to a date string, which cannot be read by
dateutil
:Complete Error Message
Indeed, the timezone offset
+01:00
is not understood by the parser:Manually removing the timezone offsets from the datasets leads to the next error:
Complete Error Message
This time, the decimal part of the seconds in the XML timestamp field are the issue:
After removing those from the XML files, the time-related errors are resolved.
I don't know enough about the Ecospold file format or the OpenLCA software to assess if this is an issue with this specific dataset, or a problem with either the OpenLCA export or the pyecospold package.
The text was updated successfully, but these errors were encountered: