Skip to content

Commit 843dd99

Browse files
authored
Merge pull request #373 from OpenDataServices/342-last-modified-does-not-convert
ODSReader: Fix parsing date and number formatting from .ods files
2 parents bb5c539 + 44fa738 commit 843dd99

14 files changed

+60
-25
lines changed

CHANGELOG.md

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,9 +6,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
66

77
## [Unreleased]
88

9+
## [0.15.4] - 2021-03-08
10+
911
### Fixed
1012

11-
## [0.15.3] - 2020-02-23
13+
- Fix parsing date and number formatting from .ods files https://github.com/OpenDataServices/flatten-tool/pull/373
14+
15+
## [0.15.3] - 2021-02-23
16+
17+
### Fixed
1218

1319
- use-titles: Use $ref'erring title if available https://github.com/OpenDataServices/flatten-tool/pull/368
1420
- create-template --no-deprecated-fields: Did not work if deprecated element at same level as a $ref https://github.com/OpenDataServices/flatten-tool/issues/185#issuecomment-719587348

examples/iati.ods

11.4 KB
Binary file not shown.

examples/iati.xlsx

6.5 KB
Binary file not shown.

examples/iati/expected.xml

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
<?xml version='1.0' encoding='utf-8'?>
22
<iati-activities>
33
<!--XML generated by flatten-tool-->
4-
<iati-activity>
4+
<iati-activity last-updated-datetime="2011-10-01T00:00:00+00:00">
55
<iati-identifier>AA-AAA-123456789-ABC123</iati-identifier>
66
<reporting-org ref="AA-AAA-123456789" type="40">
77
<narrative>Organisation name</narrative>
@@ -28,7 +28,7 @@
2828
<value value-date="2012-03-03">20</value>
2929
</transaction>
3030
</iati-activity>
31-
<iati-activity>
31+
<iati-activity last-updated-datetime="2016-01-01T00:00:00+00:00">
3232
<iati-identifier>AA-AAA-123456789-ABC124</iati-identifier>
3333
<reporting-org ref="AA-AAA-123456789" type="40">
3434
<narrative>Organisation name</narrative>

examples/iati/main.csv

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
1-
iati-identifier,reporting-org/@ref,reporting-org/@type,reporting-org/narrative,participating-org/@role,participating-org/@ref,activity-status/@code,activity-date/@type,activity-date/@iso-date,recipient-country/0/@code,recipient-country/0/@percentage,recipient-country/1/@code,recipient-country/1/@percentage,title/narrative,description/narrative
2-
AA-AAA-123456789-ABC123,AA-AAA-123456789,40,Organisation name,1,AA-AAA-123456789,2,1,2011-10-01,AF,40,XK,60,A title,A description
3-
AA-AAA-123456789-ABC124,AA-AAA-123456789,40,Organisation name,1,AA-AAA-123456789,3,2,2016-01-01,AG,30,XK,70,Another title,Another description
1+
iati-identifier,reporting-org/@ref,reporting-org/@type,reporting-org/narrative,participating-org/@role,participating-org/@ref,activity-status/@code,activity-date/@type,activity-date/@iso-date,recipient-country/0/@code,recipient-country/0/@percentage,recipient-country/1/@code,recipient-country/1/@percentage,title/narrative,description/narrative,@last-updated-datetime
2+
AA-AAA-123456789-ABC123,AA-AAA-123456789,40,Organisation name,1,AA-AAA-123456789,2,1,2011-10-01,AF,40,XK,60,A title,A description,2011-10-01T00:00:00+00:00
3+
AA-AAA-123456789-ABC124,AA-AAA-123456789,40,Organisation name,1,AA-AAA-123456789,3,2,2016-01-01,AG,30,XK,70,Another title,Another description,2016-01-01T00:00:00+00:00

flattentool/ODSReader.py

Lines changed: 19 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -15,12 +15,16 @@
1515
# Thanks to grt for the fixes
1616
# https://github.com/marcoconti83/read-ods-with-odfpy
1717

18-
import re
1918
from collections import OrderedDict
19+
from datetime import datetime
2020

21+
import backports.datetime_fromisoformat
2122
import odf.opendocument
2223
from odf.table import Table, TableCell, TableRow
2324

25+
# Backport for datetime.fromisoformat, which is new in Python 3.7
26+
backports.datetime_fromisoformat.MonkeyPatch.patch_fromisoformat()
27+
2428

2529
# http://stackoverflow.com/a/4544699/1846474
2630
class GrowingList(list):
@@ -74,23 +78,29 @@ def readSheet(self, sheet):
7478
)
7579
)
7680
if value_type == "float":
81+
value = cell.attributes.get(
82+
(
83+
"urn:oasis:names:tc:opendocument:xmlns:office:1.0",
84+
"value",
85+
)
86+
)
7787
if "." in str(cell):
78-
arrCells[count] = float(str(cell))
88+
arrCells[count] = float(value)
7989
else:
80-
arrCells[count] = int(str(cell))
90+
arrCells[count] = int(value)
8191
elif value_type == "date":
8292
date_value = cell.attributes.get(
8393
(
8494
"urn:oasis:names:tc:opendocument:xmlns:office:1.0",
8595
"date-value",
8696
)
8797
)
88-
# Add UTC timezone to naive datetime strings
89-
if re.match(
90-
r"^\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d$", date_value
91-
):
92-
date_value += "Z"
93-
arrCells[count] = date_value
98+
# fromisoformat assumes microseconds appear as 3 or
99+
# 6 digits, whereas ods drops trailing 0s, so can
100+
# have 1-6 digits, so pad some extra 0s
101+
if "." in date_value:
102+
date_value = date_value.ljust(26, "0")
103+
arrCells[count] = datetime.fromisoformat(date_value)
94104
else:
95105
arrCells[count] = str(cell)
96106
count += 1
10.8 KB
Binary file not shown.
112 Bytes
Binary file not shown.
5.4 KB
Binary file not shown.
-1.44 KB
Binary file not shown.

flattentool/tests/test_input_SpreadsheetInput.py

Lines changed: 11 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -193,13 +193,18 @@ def test_xlsx_input_types(self):
193193
assert list(xlsxinput.get_sheet_lines("main")) == [
194194
{
195195
"colInt": 1,
196-
"colFloat": 1.2,
196+
"colFloat": 1000.2,
197+
"colFloatComma": 1000.2,
197198
"colDate": datetime.datetime(2020, 3, 5),
198199
"colDateTime": datetime.datetime(2020, 2, 7, 16, 41, 0, 1),
200+
None: None,
199201
}
200202
]
201203
assert type(list(xlsxinput.get_sheet_lines("main"))[0]["colInt"]) == int
202204
assert type(list(xlsxinput.get_sheet_lines("main"))[0]["colFloat"]) == float
205+
assert (
206+
type(list(xlsxinput.get_sheet_lines("main"))[0]["colFloatComma"]) == float
207+
)
203208
assert xlsxinput.sub_sheet_names == ["main"]
204209

205210
def test_ods_input_types(self):
@@ -210,13 +215,14 @@ def test_ods_input_types(self):
210215
assert list(odsinput.get_sheet_lines("main")) == [
211216
{
212217
"colInt": 1,
213-
"colFloat": 1.2,
214-
"colDate": "2020-03-05",
215-
"colDateTime": "2020-02-07T16:41:00Z",
218+
"colFloat": 1000.2,
219+
"colFloatComma": 1000.2,
220+
"colDate": datetime.datetime(2020, 3, 5),
221+
"colDateTime": datetime.datetime(2020, 2, 7, 16, 41),
216222
}
217223
]
218224
assert type(list(odsinput.get_sheet_lines("main"))[0]["colInt"]) == int
219-
assert type(list(odsinput.get_sheet_lines("main"))[0]["colFloat"]) == float
225+
assert type(list(odsinput.get_sheet_lines("main"))[0]["colFloatComma"]) == float
220226
assert list(odsinput.sub_sheet_names) == ["main"]
221227

222228
def test_xlsx_input_integer2(self):

flattentool/tests/test_unflatten.py

Lines changed: 13 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -66,15 +66,24 @@ def test_360_fields_case_insensitive(tmpdir):
6666
assert output_json_grants == output_json_space_case
6767

6868

69-
@pytest.mark.parametrize("dirname", ["examples/iati", "examples/iati_multilang"])
70-
def test_unflatten_xml(tmpdir, dirname):
69+
@pytest.mark.parametrize(
70+
"dirname,input_format",
71+
[
72+
("examples/iati", "csv"),
73+
("examples/iati", "ods"),
74+
("examples/iati", "xlsx"),
75+
("examples/iati_multilang", "csv"),
76+
],
77+
)
78+
def test_unflatten_xml(tmpdir, dirname, input_format):
7179
schema_path = "examples/iati"
7280
schemas = ["iati-activities-schema.xsd", "iati-common.xsd"]
7381
schema_filepaths = ["{}/{}".format(schema_path, schema) for schema in schemas]
7482
unflatten(
75-
input_name=dirname,
83+
input_name=dirname
84+
+ (".{}".format(input_format) if input_format != "csv" else ""),
7685
output_name=tmpdir.join("output.xml").strpath,
77-
input_format="csv",
86+
input_format=input_format,
7887
root_list_path="iati-activity",
7988
id_name="iati-identifier",
8089
xml=True,

flattentool/tests/test_xml_input.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ def test_xml_basic_example():
3333
parser.parse()
3434
assert list(parser.main_sheet) == [
3535
"iati-identifier",
36+
"@last-updated-datetime",
3637
"reporting-org/@ref",
3738
"reporting-org/@type",
3839
"reporting-org/narrative",
@@ -46,6 +47,7 @@ def test_xml_basic_example():
4647
]
4748
assert parser.main_sheet.lines == [
4849
{
50+
"@last-updated-datetime": "2011-10-01T00:00:00+00:00",
4951
"activity-date/@type": "1",
5052
"reporting-org/narrative": "Organisation name",
5153
"participating-org/@ref": "AA-AAA-123456789",
@@ -59,6 +61,7 @@ def test_xml_basic_example():
5961
"activity-status/@code": "2",
6062
},
6163
{
64+
"@last-updated-datetime": "2016-01-01T00:00:00+00:00",
6265
"activity-date/@type": "2",
6366
"reporting-org/narrative": "Organisation name",
6467
"participating-org/@ref": "AA-AAA-123456789",

setup.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,11 +35,12 @@ def run(self):
3535
"xmltodict",
3636
"lxml",
3737
"odfpy",
38+
"backports-datetime-fromisoformat",
3839
]
3940

4041
setup(
4142
name="flattentool",
42-
version="0.15.3",
43+
version="0.15.4",
4344
author="Open Data Services",
4445
author_email="[email protected]",
4546
packages=["flattentool"],

0 commit comments

Comments
 (0)