From 0fcc00e55855e2cc896371cd415b3a969c0aea95 Mon Sep 17 00:00:00 2001 From: James B Date: Wed, 1 Jan 2025 14:30:12 +0000 Subject: [PATCH 1/6] Cache results of processing. Repeated calls to get methods will be faster. Also it really helps with the next commit. --- CHANGELOG.md | 3 ++ compiletojsonschema/compiletojsonschema.py | 38 ++++++++++++++-------- 2 files changed, 28 insertions(+), 13 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index e95f37c..fc5fce6 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +### Added + +- Cache results of processing. Repeated calls to get methods will be faster. ### Removed diff --git a/compiletojsonschema/compiletojsonschema.py b/compiletojsonschema/compiletojsonschema.py index a670635..52235af 100644 --- a/compiletojsonschema/compiletojsonschema.py +++ b/compiletojsonschema/compiletojsonschema.py @@ -27,8 +27,23 @@ def __init__( self.codelist_base_directory = os.path.expanduser(codelist_base_directory) else: self.codelist_base_directory = os.getcwd() + # These vars hold output + self._processed = False + self._output_json = None def get(self): + self.__process() + return self._output_json + + def get_as_string(self): + return json.dumps(self.get(), indent=2) + + def __process(self): + # If already processed, return ..... + if self._processed: + return + + # Process now .... if self.input_filename: with open(self.input_filename) as fp: resolved = jsonref.load( @@ -42,15 +57,10 @@ def get(self): resolved = jsonref.JsonRef.replace_refs(self.input_schema) else: raise Exception("Must pass input_filename or input_schema") + self._output_json = self.__process_data(resolved) + self._processed = True - resolved = self.__process(resolved) - - return resolved - - def get_as_string(self): - return json.dumps(self.get(), indent=2) - - def __process(self, source): + def __process_data(self, source): out = deepcopy(source) @@ -61,24 +71,26 @@ def __process(self, source): if "properties" in source: for leaf in list(source["properties"]): - out["properties"][leaf] = self.__process(source["properties"][leaf]) + out["properties"][leaf] = self.__process_data( + source["properties"][leaf] + ) if self.set_additional_properties_false_everywhere: out["additionalProperties"] = False if "items" in source: - out["items"] = self.__process(source["items"]) + out["items"] = self.__process_data(source["items"]) if "oneOf" in source: for idx, data in enumerate(list(source["oneOf"])): - out["oneOf"][idx] = self.__process(source["oneOf"][idx]) + out["oneOf"][idx] = self.__process_data(source["oneOf"][idx]) if "anyOf" in source: for idx, data in enumerate(list(source["anyOf"])): - out["anyOf"][idx] = self.__process(source["anyOf"][idx]) + out["anyOf"][idx] = self.__process_data(source["anyOf"][idx]) if "allOf" in source: for idx, data in enumerate(list(source["allOf"])): - out["allOf"][idx] = self.__process(source["allOf"][idx]) + out["allOf"][idx] = self.__process_data(source["allOf"][idx]) if "codelist" in source and ( "openCodelist" not in source or not source["openCodelist"] From 319edb858bdc482ebbe98035ae460bdf6148f567 Mon Sep 17 00:00:00 2001 From: James B Date: Wed, 1 Jan 2025 14:50:02 +0000 Subject: [PATCH 2/6] Added audit functionality --- CHANGELOG.md | 1 + compiletojsonschema/compiletojsonschema.py | 24 ++++++++++++++++++++++ tests/test_simple.py | 19 +++++++++++++++++ 3 files changed, 44 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index fc5fce6..367121a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ### Added - Cache results of processing. Repeated calls to get methods will be faster. +- Added audit functionality. Thanks to Duncan and https://github.com/OpenDataServices/json-schema-auditor ### Removed diff --git a/compiletojsonschema/compiletojsonschema.py b/compiletojsonschema/compiletojsonschema.py index 52235af..bbd0b52 100644 --- a/compiletojsonschema/compiletojsonschema.py +++ b/compiletojsonschema/compiletojsonschema.py @@ -30,6 +30,8 @@ def __init__( # These vars hold output self._processed = False self._output_json = None + self._output_types_used = None + self._output_keywords_used = None def get(self): self.__process() @@ -38,12 +40,22 @@ def get(self): def get_as_string(self): return json.dumps(self.get(), indent=2) + def get_types_used(self): + self.__process() + return sorted(self._output_types_used.keys()) + + def get_keywords_used(self): + self.__process() + return sorted(self._output_keywords_used.keys()) + def __process(self): # If already processed, return ..... if self._processed: return # Process now .... + self._output_types_used = {} + self._output_keywords_used = {} if self.input_filename: with open(self.input_filename) as fp: resolved = jsonref.load( @@ -64,7 +76,19 @@ def __process_data(self, source): out = deepcopy(source) + for keyword in source: + self._output_keywords_used[keyword] = {} + + if "type" in source: + if isinstance(source["type"], str): + self._output_types_used[source["type"]] = {} + elif isinstance(source["type"], list): + for t in source["type"]: + if isinstance(t, str): + self._output_types_used[t] = {} + if hasattr(source, "__reference__"): + self._output_keywords_used["$ref"] = {} for attr in list(source.__reference__): if not attr == "$ref": out[attr] = source.__reference__[attr] diff --git a/tests/test_simple.py b/tests/test_simple.py index a769d60..09febb1 100644 --- a/tests/test_simple.py +++ b/tests/test_simple.py @@ -23,6 +23,16 @@ def test_in_file(): assert out["properties"]["home_address"]["title"] == "Home Address" assert out["properties"]["home_address"]["description"] == "Where the person lives" + assert ["object", "string"] == ctjs.get_types_used() + assert [ + "$ref", + "definitions", + "description", + "properties", + "title", + "type", + ] == ctjs.get_keywords_used() + def test_in_file_pass_as_schema(): @@ -67,6 +77,15 @@ def test_file_main(): assert out["properties"]["home_address"]["title"] == "Home Address" assert out["properties"]["home_address"]["description"] == "Where the person lives" + assert ["object", "string"] == ctjs.get_types_used() + assert [ + "$ref", + "description", + "properties", + "title", + "type", + ] == ctjs.get_keywords_used() + def test_file_list_anyof(): From 21e2d9065ac0d86db34fab07ccf93c099457eb79 Mon Sep 17 00:00:00 2001 From: James B Date: Wed, 1 Jan 2025 14:57:56 +0000 Subject: [PATCH 3/6] Add Audit option to CLI --- compiletojsonschema/cli/__main__.py | 20 +++++++++++++++++++- docs/cli.rst | 11 +++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) diff --git a/compiletojsonschema/cli/__main__.py b/compiletojsonschema/cli/__main__.py index 993fb33..6bce6d1 100644 --- a/compiletojsonschema/cli/__main__.py +++ b/compiletojsonschema/cli/__main__.py @@ -6,6 +6,7 @@ def main(): parser = argparse.ArgumentParser(description="Compile To JSON Schema CLI") + # Input Arguments parser.add_argument("input_file") parser.add_argument( "-s", @@ -19,6 +20,15 @@ def main(): help="Which directory we should look in for codelists", ) + # Output Arguments + parser.add_argument( + "-a", + "--audit", + help="Instead of complied schema, output an audit of the input schema", + action="store_true", + ) + + # Process args = parser.parse_args() ctjs = CompileToJsonSchema( @@ -26,4 +36,12 @@ def main(): set_additional_properties_false_everywhere=args.set_additional_properties_false_everywhere, codelist_base_directory=args.codelist_base_directory, ) - print(ctjs.get_as_string()) + + if args.audit: + print("\nKeywords used in the schema:") + print(", ".join(ctjs.get_keywords_used())) + + print("\nTypes used in the schema:") + print(", ".join(ctjs.get_types_used())) + else: + print(ctjs.get_as_string()) diff --git a/docs/cli.rst b/docs/cli.rst index 6dc3e1f..3cb8c81 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -37,3 +37,14 @@ To enable this mode, pass the `--set-additional-properties-false-everywhere` or compiletojsonschema -s input.json compiletojsonschema --set-additional-properties-false-everywhere input.json + +Get Audit output +---------------- + +Instead of the compiled JSON schema, you can get an audit output by passing the `--audit` or `-a` flag. + + +.. code-block:: shell-session + + compiletojsonschema -a input.json + compiletojsonschema --audit input.json From 502547d7417e38ab50f6fd37f99a1ae6e5089ee9 Mon Sep 17 00:00:00 2001 From: James B Date: Wed, 1 Jan 2025 15:09:54 +0000 Subject: [PATCH 4/6] Can process dependentSchemas keyword --- CHANGELOG.md | 1 + compiletojsonschema/compiletojsonschema.py | 6 ++++ .../simple/file-dependentSchemas.json | 19 ++++++++++++ tests/test_simple.py | 29 +++++++++++++++++++ 4 files changed, 55 insertions(+) create mode 100644 tests/fixtures/simple/file-dependentSchemas.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 367121a..94425c9 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Cache results of processing. Repeated calls to get methods will be faster. - Added audit functionality. Thanks to Duncan and https://github.com/OpenDataServices/json-schema-auditor +- Can process dependentSchemas keyword ### Removed diff --git a/compiletojsonschema/compiletojsonschema.py b/compiletojsonschema/compiletojsonschema.py index bbd0b52..841d480 100644 --- a/compiletojsonschema/compiletojsonschema.py +++ b/compiletojsonschema/compiletojsonschema.py @@ -116,6 +116,12 @@ def __process_data(self, source): for idx, data in enumerate(list(source["allOf"])): out["allOf"][idx] = self.__process_data(source["allOf"][idx]) + if "dependentSchemas" in source and isinstance( + source["dependentSchemas"], dict + ): + for k, v in source["dependentSchemas"].items(): + out["dependentSchemas"][k] = self.__process_data(v) + if "codelist" in source and ( "openCodelist" not in source or not source["openCodelist"] ): diff --git a/tests/fixtures/simple/file-dependentSchemas.json b/tests/fixtures/simple/file-dependentSchemas.json new file mode 100644 index 0000000..e72c20d --- /dev/null +++ b/tests/fixtures/simple/file-dependentSchemas.json @@ -0,0 +1,19 @@ +{ + "type": "object", + "properties": { + "credit_card": { + "type": "number" + } + }, + "dependentSchemas": { + "credit_card": { + "properties": { + "address": { + "$ref": "file-definitions.json#/definition/address", + "title": "Credit card number (with optional address elements)" + } + }, + "required": ["address"] + } + } +} diff --git a/tests/test_simple.py b/tests/test_simple.py index 09febb1..1144b3a 100644 --- a/tests/test_simple.py +++ b/tests/test_simple.py @@ -160,3 +160,32 @@ def test_file_list_allof(): def test_passing_empty_schema_is_ok(): ctjs = CompileToJsonSchema(input_schema={}) assert "{}" == ctjs.get_as_string() + + +def test_file_dependentSchemas(): + + input_filename = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "fixtures", + "simple", + "file-dependentSchemas.json", + ) + + ctjs = CompileToJsonSchema(input_filename=input_filename) + out = ctjs.get() + + assert ( + out["dependentSchemas"]["credit_card"]["properties"]["address"]["title"] + == "Credit card number (with optional address elements)" + ) + + assert ["number", "object", "string"] == ctjs.get_types_used() + assert [ + "$ref", + "dependentSchemas", + "description", + "properties", + "required", + "title", + "type", + ] == ctjs.get_keywords_used() From 5baafc39cffb49901114ed482e9d5aeb5cb0a014 Mon Sep 17 00:00:00 2001 From: James B Date: Wed, 1 Jan 2025 15:49:03 +0000 Subject: [PATCH 5/6] Can process if / then / else keywords --- CHANGELOG.md | 1 + compiletojsonschema/compiletojsonschema.py | 4 +++ tests/fixtures/simple/file-if-then-else.json | 33 ++++++++++++++++++++ tests/test_simple.py | 33 ++++++++++++++++++++ 4 files changed, 71 insertions(+) create mode 100644 tests/fixtures/simple/file-if-then-else.json diff --git a/CHANGELOG.md b/CHANGELOG.md index 94425c9..ef4f145 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Cache results of processing. Repeated calls to get methods will be faster. - Added audit functionality. Thanks to Duncan and https://github.com/OpenDataServices/json-schema-auditor - Can process dependentSchemas keyword +- Can process if / then / else keywords ### Removed diff --git a/compiletojsonschema/compiletojsonschema.py b/compiletojsonschema/compiletojsonschema.py index 841d480..3c686d4 100644 --- a/compiletojsonschema/compiletojsonschema.py +++ b/compiletojsonschema/compiletojsonschema.py @@ -122,6 +122,10 @@ def __process_data(self, source): for k, v in source["dependentSchemas"].items(): out["dependentSchemas"][k] = self.__process_data(v) + for keyword in ["if", "then", "else"]: + if keyword in source: + out[keyword] = self.__process_data(source[keyword]) + if "codelist" in source and ( "openCodelist" not in source or not source["openCodelist"] ): diff --git a/tests/fixtures/simple/file-if-then-else.json b/tests/fixtures/simple/file-if-then-else.json new file mode 100644 index 0000000..7acadf7 --- /dev/null +++ b/tests/fixtures/simple/file-if-then-else.json @@ -0,0 +1,33 @@ +{ + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "credit_card": { + "type": "string" + } + }, + "if": { + "properties": { + "credit_card": { + "pattern": "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]" + } + } + }, + "then": { + "properties": { + "address": { + "$ref": "file-definitions.json#/definition/address", + "title": "Must have an address with a credit card" + } + }, + "required": ["address"] + }, + "else": { + "properties": { + "cash_on_delivery": { "const": "Yup" } + }, + "required": ["cash_on_delivery"] + } +} diff --git a/tests/test_simple.py b/tests/test_simple.py index 1144b3a..59618ff 100644 --- a/tests/test_simple.py +++ b/tests/test_simple.py @@ -189,3 +189,36 @@ def test_file_dependentSchemas(): "title", "type", ] == ctjs.get_keywords_used() + + +def test_file_if_then_else(): + + input_filename = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "fixtures", + "simple", + "file-if-then-else.json", + ) + + ctjs = CompileToJsonSchema(input_filename=input_filename) + out = ctjs.get() + + assert ( + out["then"]["properties"]["address"]["title"] + == "Must have an address with a credit card" + ) + + assert ["object", "string"] == ctjs.get_types_used() + assert [ + "$ref", + "const", + "description", + "else", + "if", + "pattern", + "properties", + "required", + "then", + "title", + "type", + ] == ctjs.get_keywords_used() From 379c884b4d03506399d049fe7277b86403688236 Mon Sep 17 00:00:00 2001 From: James B Date: Wed, 1 Jan 2025 16:38:13 +0000 Subject: [PATCH 6/6] Can process not keyword --- CHANGELOG.md | 1 + compiletojsonschema/compiletojsonschema.py | 2 +- tests/fixtures/simple/file-not.json | 33 ++++++++++++++++++++++ tests/test_simple.py | 31 ++++++++++++++++++++ 4 files changed, 66 insertions(+), 1 deletion(-) create mode 100644 tests/fixtures/simple/file-not.json diff --git a/CHANGELOG.md b/CHANGELOG.md index ef4f145..b2dc59d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. - Added audit functionality. Thanks to Duncan and https://github.com/OpenDataServices/json-schema-auditor - Can process dependentSchemas keyword - Can process if / then / else keywords +- Can process not keyword ### Removed diff --git a/compiletojsonschema/compiletojsonschema.py b/compiletojsonschema/compiletojsonschema.py index 3c686d4..7c186ac 100644 --- a/compiletojsonschema/compiletojsonschema.py +++ b/compiletojsonschema/compiletojsonschema.py @@ -122,7 +122,7 @@ def __process_data(self, source): for k, v in source["dependentSchemas"].items(): out["dependentSchemas"][k] = self.__process_data(v) - for keyword in ["if", "then", "else"]: + for keyword in ["if", "then", "else", "not"]: if keyword in source: out[keyword] = self.__process_data(source[keyword]) diff --git a/tests/fixtures/simple/file-not.json b/tests/fixtures/simple/file-not.json new file mode 100644 index 0000000..18d8c34 --- /dev/null +++ b/tests/fixtures/simple/file-not.json @@ -0,0 +1,33 @@ +{ + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "credit_card": { + "type": "string" + } + }, + "oneOf": [ + { + "not": { + "properties": { + "credit_card": { + "pattern": "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]" + } + } + } + }, + { + "properties": { + "address": { + "$ref": "file-definitions.json#/definition/address", + "title": "Must have an address with a credit card" + } + }, + "required": [ + "address" + ] + } + ] +} diff --git a/tests/test_simple.py b/tests/test_simple.py index 59618ff..a2ceed2 100644 --- a/tests/test_simple.py +++ b/tests/test_simple.py @@ -222,3 +222,34 @@ def test_file_if_then_else(): "title", "type", ] == ctjs.get_keywords_used() + + +def test_file_not(): + + input_filename = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "fixtures", + "simple", + "file-not.json", + ) + + ctjs = CompileToJsonSchema(input_filename=input_filename) + out = ctjs.get() + + assert ( + out["oneOf"][1]["properties"]["address"]["title"] + == "Must have an address with a credit card" + ) + + assert ["object", "string"] == ctjs.get_types_used() + assert [ + "$ref", + "description", + "not", + "oneOf", + "pattern", + "properties", + "required", + "title", + "type", + ] == ctjs.get_keywords_used()