Skip to content

Commit

Permalink
Allow parsing xml-in-xml to FileTypeXML (#42)
Browse files Browse the repository at this point in the history
* feat: add parsing xml-in-xml to FileTypeXML

* chore: version bump
  • Loading branch information
fritz-astronomer authored Jan 13, 2025
1 parent 1cf2709 commit 41b5f45
Show file tree
Hide file tree
Showing 2 changed files with 14 additions and 1 deletion.
2 changes: 1 addition & 1 deletion orbiter/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import re
from typing import Any, Tuple

__version__ = "1.4.2"
__version__ = "1.4.3"

version = __version__

Expand Down
13 changes: 13 additions & 0 deletions orbiter/file_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@

import xmltodict
import yaml
from loguru import logger
from pydantic import (
BaseModel,
)
Expand Down Expand Up @@ -100,6 +101,8 @@ def xmltodict_parse(input_str: str) -> Any:
{'a': [{'@foo': 'bar', 'foo': [{'@bar': 'baz', 'bar': [{'bop': None}]}]}]}
>>> xmltodict_parse("<a foo='bar'><foo bar='baz'></foo><foo bing='bop'></foo></a>")
{'a': [{'@foo': 'bar', 'foo': [{'@bar': 'baz'}, {'@bing': 'bop'}]}]}
>>> xmltodict_parse("<a>&lt;?xml version=&apos;1.0&apos; encoding=&apos;UTF-16&apos;?&gt;&lt;Properties version=&apos;1.1&apos;&gt;&lt;/Properties&gt;</a>")
{'a': {'Properties': [{'@version': '1.1'}]}}
```
:param input_str: The XML string to parse
Expand All @@ -108,6 +111,14 @@ def xmltodict_parse(input_str: str) -> Any:
:rtype: dict
"""

def _fix_escaped_xml(v):
try:
parsed_unescaped_xml = xmltodict.parse(v)
_fix(parsed_unescaped_xml)
return parsed_unescaped_xml
except Exception as e:
logger.debug(f"Error parsing escaped XML: {e}")

# noinspection t
def _fix(d):
"""fix the dict in place, recursively, standardizing on a list of dict even if there's only one entry."""
Expand All @@ -123,6 +134,8 @@ def _fix(d):
_fix(v)
else:
_fix(v)
if isinstance(v, str) and (v.startswith("<?xml") or v.startswith("<?XML")):
d[k] = _fix_escaped_xml(v)
# if it's a list, descend to fix
if isinstance(d, list):
for v in d:
Expand Down

0 comments on commit 41b5f45

Please sign in to comment.