Skip to content

Commit 56880b8

Browse files
committed
Add limited XML create-template support
OpenDataServices/cove#775 Based on this unmerged commit to CoVE OpenDataServices/cove@e274142
1 parent 0843693 commit 56880b8

File tree

5 files changed

+141
-13
lines changed

5 files changed

+141
-13
lines changed

examples/help/create-template/expected.txt

+10-1
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,9 @@
1-
usage: flatten-tool create-template [-h] -s SCHEMA [-f {csv,xlsx,all}]
1+
usage: flatten-tool create-template [-h] [-s SCHEMA] [-f {csv,xlsx,all}]
22
[-m MAIN_SHEET_NAME] [-o OUTPUT_NAME]
33
[--rollup] [-r ROOT_ID] [--use-titles]
4+
[--xml]
5+
[--xml-schema [XML_SCHEMA [XML_SCHEMA ...]]]
6+
[--root-list-path ROOT_LIST_PATH]
47

58
optional arguments:
69
-h, --help show this help message and exit
@@ -22,3 +25,9 @@ optional arguments:
2225
-r ROOT_ID, --root-id ROOT_ID
2326
Root ID of the data format, e.g. ocid for OCDS
2427
--use-titles Convert titles. Requires a schema to be specified.
28+
--xml Use XML as the input format
29+
--xml-schema [XML_SCHEMA [XML_SCHEMA ...]]
30+
Path to one or more XML schemas
31+
--root-list-path ROOT_LIST_PATH
32+
Path of the root list, defaults to main. Needed for
33+
XML template creation only.

flattentool/__init__.py

+8-3
Original file line numberDiff line numberDiff line change
@@ -5,23 +5,28 @@
55
from flattentool.input import FORMATS as INPUT_FORMATS
66
from flattentool.xml_output import toxml
77
from flattentool.lib import parse_sheet_configuration
8+
from flattentool.xml_create_template import XMLSchemaParser
89
import sys
910
import json
1011
import codecs
1112
from decimal import Decimal
1213
from collections import OrderedDict
1314

1415

15-
def create_template(schema, output_name='template', output_format='all', main_sheet_name='main',
16-
rollup=False, root_id=None, use_titles=False, **_):
16+
def create_template(schema=None, output_name='template', output_format='all', main_sheet_name='main',
17+
rollup=False, root_id=None, use_titles=False,
18+
xml=False, xml_schemas=None, root_list_path=None, **_):
1719
"""
1820
Creates template file(s) from given inputs
1921
This function is built to deal with commandline input and arguments
2022
but to also be called from elswhere in future
2123
2224
"""
2325

24-
parser = SchemaParser(schema_filename=schema, rollup=rollup, root_id=root_id, use_titles=use_titles)
26+
if xml:
27+
parser = XMLSchemaParser(xml_schemas=xml_schemas, root_list_path=root_list_path)
28+
else:
29+
parser = SchemaParser(schema_filename=schema, rollup=rollup, root_id=root_id, use_titles=use_titles)
2530
parser.parse()
2631

2732
def spreadsheet_output(spreadsheet_output_class, name):

flattentool/cli.py

+16-3
Original file line numberDiff line numberDiff line change
@@ -36,10 +36,10 @@ def create_parser():
3636
parser_create_template = subparsers.add_parser(
3737
'create-template',
3838
help='Create a template from the given schema')
39-
parser_create_template.add_argument(
39+
schema_group = parser_create_template.add_mutually_exclusive_group(required=True)
40+
schema_group.add_argument(
4041
"-s", "--schema",
41-
help="Path to the schema file you want to use to create the template",
42-
required=True)
42+
help="Path to the schema file you want to use to create the template")
4343
parser_create_template.add_argument(
4444
"-f", "--output-format",
4545
help="Type of template you want to create. Defaults to all available options",
@@ -61,6 +61,19 @@ def create_parser():
6161
"--use-titles",
6262
action='store_true',
6363
help="Convert titles. Requires a schema to be specified.")
64+
parser_create_template.add_argument(
65+
"--xml",
66+
action='store_true',
67+
help="Use XML as the input format")
68+
schema_group.add_argument(
69+
"--xml-schema",
70+
dest='xml_schemas',
71+
metavar='XML_SCHEMA',
72+
nargs='*',
73+
help="Path to one or more XML schemas")
74+
parser_create_template.add_argument(
75+
"--root-list-path",
76+
help="Path of the root list, defaults to main. Needed for XML template creation only.")
6477

6578
parser_flatten = subparsers.add_parser(
6679
'flatten',

flattentool/sort_xml.py

+4-6
Original file line numberDiff line numberDiff line change
@@ -69,7 +69,7 @@ def get_schema_element(self, tag_name, name_attribute):
6969
return schema_element
7070
return schema_element
7171

72-
def element_loop(self, element, path):
72+
def element_loop(self, element):
7373
"""
7474
Return information about the children of the supplied element.
7575
"""
@@ -95,14 +95,12 @@ def element_loop(self, element, path):
9595
'xsd:complexType/xsd:all/xsd:element',
9696
namespaces=namespaces)
9797
+ type_elements)
98-
child_tuples = []
9998
for child in children:
10099
a = child.attrib
101100
if 'name' in a:
102-
child_tuples.append((a['name'], child, None, a.get('minOccurs'), a.get('maxOccurs')))
101+
yield a['name'], child, None, a.get('minOccurs'), a.get('maxOccurs')
103102
else:
104-
child_tuples.append((a['ref'], None, child, a.get('minOccurs'), a.get('maxOccurs')))
105-
return child_tuples
103+
yield a['ref'], None, child, a.get('minOccurs'), a.get('maxOccurs')
106104

107105
def create_schema_dict(self, parent_name, parent_element=None):
108106
"""
@@ -114,7 +112,7 @@ def create_schema_dict(self, parent_name, parent_element=None):
114112

115113
return OrderedDict([
116114
(name, self.create_schema_dict(name, element))
117-
for name, element, _, _, _ in self.element_loop(parent_element, '')])
115+
for name, element, _, _, _ in self.element_loop(parent_element)])
118116

119117

120118
def sort_element(element, schema_subdict):

flattentool/xml_create_template.py

+103
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,103 @@
1+
import sys
2+
3+
from .sort_xml import XMLSchemaWalker, namespaces
4+
from .sheet import Sheet
5+
6+
7+
class XMLSchemaWalkerForTemplate(XMLSchemaWalker):
8+
def attribute_loop(self, element):
9+
"""
10+
Returns a list containing a tuple for each attribute the given element
11+
can have.
12+
The format of the tuple is (name, is_required)
13+
"""
14+
#if element.find("xsd:complexType[@mixed='true']", namespaces=namespaces) is not None:
15+
# print_column_info('text', indent)
16+
17+
a = element.attrib
18+
type_attributes = []
19+
type_attributeGroups = []
20+
if 'type' in a:
21+
complexType = self.get_schema_element('complexType', a['type'])
22+
if complexType is not None:
23+
type_attributes = (
24+
complexType.findall('xsd:attribute', namespaces=namespaces) +
25+
complexType.findall('xsd:simpleContent/xsd:extension/xsd:attribute', namespaces=namespaces)
26+
)
27+
type_attributeGroups = (
28+
complexType.findall('xsd:attributeGroup', namespaces=namespaces) +
29+
complexType.findall('xsd:simpleContent/xsd:extension/xsd:attributeGroup', namespaces=namespaces)
30+
)
31+
32+
group_attributes = []
33+
for attributeGroup in (
34+
element.findall('xsd:complexType/xsd:attributeGroup', namespaces=namespaces) +
35+
element.findall('xsd:complexType/xsd:simpleContent/xsd:extension/xsd:attributeGroup', namespaces=namespaces) +
36+
type_attributeGroups
37+
):
38+
group_attributes += self.get_schema_element('attributeGroup', attributeGroup.attrib['ref']).findall('xsd:attribute', namespaces=namespaces)
39+
40+
for attribute in (
41+
element.findall('xsd:complexType/xsd:attribute', namespaces=namespaces) +
42+
element.findall('xsd:complexType/xsd:simpleContent/xsd:extension/xsd:attribute', namespaces=namespaces) +
43+
type_attributes + group_attributes
44+
):
45+
doc = attribute.find(".//xsd:documentation", namespaces=namespaces)
46+
if 'ref' in attribute.attrib:
47+
referenced_attribute = self.get_schema_element('attribute', attribute.get('ref'))
48+
if referenced_attribute is not None:
49+
attribute = referenced_attribute
50+
if doc is None:
51+
# Only fetch the documentation of the referenced definition
52+
# if we don't already have documentation.
53+
doc = attribute.find(".//xsd:documentation", namespaces=namespaces)
54+
yield attribute.get('name') or attribute.get('ref'), attribute.get('use') == 'required'
55+
56+
def has_simple_content(self, element):
57+
a = element.attrib
58+
simple_content = False
59+
if 'type' in a:
60+
complexType = self.get_schema_element('complexType', a['type'])
61+
if complexType is not None:
62+
simple_content = bool(complexType.findall('xsd:simpleContent', namespaces=namespaces))
63+
return simple_content or bool(element.findall('xsd:complexType/xsd:simpleContent', namespaces=namespaces))
64+
65+
def generate_paths(self, parent_name, parent_element=None, parent_path=''):
66+
if parent_element is None:
67+
parent_element = self.get_schema_element('element', parent_name)
68+
69+
for name, required, in self.attribute_loop(parent_element):
70+
if name == 'xml:lang':
71+
# Namespaces not supported yet https://github.com/OpenDataServices/flatten-tool/issues/148
72+
# And no way to specify two narrative elements anyway https://github.com/OpenDataServices/cove/issues/777
73+
continue
74+
yield parent_path + '@' + name
75+
76+
for name, element, _, minOccurs, maxOccurs in self.element_loop(parent_element):
77+
if element is None:
78+
element = self.get_schema_element('element', name)
79+
path = parent_path + name
80+
if self.has_simple_content(element):
81+
yield path
82+
if maxOccurs == 'unbounded' or int(maxOccurs) > 1:
83+
path += '/0/'
84+
else:
85+
path += '/'
86+
yield from list(self.generate_paths(name, element, path))
87+
88+
89+
class XMLSchemaParser(object):
90+
"""Parse the fields of a JSON schema into a flattened structure."""
91+
92+
def __init__(self, xml_schemas=[], root_list_path=None):
93+
self.sub_sheets = {}
94+
self.main_sheet = Sheet()
95+
self.sub_sheet_mapping = {}
96+
self.xml_schemas = xml_schemas
97+
assert root_list_path is not None
98+
self.root_list_path = root_list_path
99+
100+
def parse(self):
101+
for path in XMLSchemaWalkerForTemplate(self.xml_schemas).generate_paths(self.root_list_path):
102+
self.main_sheet.append(path)
103+

0 commit comments

Comments
 (0)