diff --git a/CHANGELOG.md b/CHANGELOG.md index e95f37c..b2dc59d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0. ## [Unreleased] +### Added + +- Cache results of processing. Repeated calls to get methods will be faster. +- Added audit functionality. Thanks to Duncan and https://github.com/OpenDataServices/json-schema-auditor +- Can process dependentSchemas keyword +- Can process if / then / else keywords +- Can process not keyword ### Removed diff --git a/compiletojsonschema/cli/__main__.py b/compiletojsonschema/cli/__main__.py index 993fb33..6bce6d1 100644 --- a/compiletojsonschema/cli/__main__.py +++ b/compiletojsonschema/cli/__main__.py @@ -6,6 +6,7 @@ def main(): parser = argparse.ArgumentParser(description="Compile To JSON Schema CLI") + # Input Arguments parser.add_argument("input_file") parser.add_argument( "-s", @@ -19,6 +20,15 @@ def main(): help="Which directory we should look in for codelists", ) + # Output Arguments + parser.add_argument( + "-a", + "--audit", + help="Instead of complied schema, output an audit of the input schema", + action="store_true", + ) + + # Process args = parser.parse_args() ctjs = CompileToJsonSchema( @@ -26,4 +36,12 @@ def main(): set_additional_properties_false_everywhere=args.set_additional_properties_false_everywhere, codelist_base_directory=args.codelist_base_directory, ) - print(ctjs.get_as_string()) + + if args.audit: + print("\nKeywords used in the schema:") + print(", ".join(ctjs.get_keywords_used())) + + print("\nTypes used in the schema:") + print(", ".join(ctjs.get_types_used())) + else: + print(ctjs.get_as_string()) diff --git a/compiletojsonschema/compiletojsonschema.py b/compiletojsonschema/compiletojsonschema.py index a670635..7c186ac 100644 --- a/compiletojsonschema/compiletojsonschema.py +++ b/compiletojsonschema/compiletojsonschema.py @@ -27,8 +27,35 @@ def __init__( self.codelist_base_directory = os.path.expanduser(codelist_base_directory) else: self.codelist_base_directory = os.getcwd() + # These vars hold output + self._processed = False + self._output_json = None + self._output_types_used = None + self._output_keywords_used = None def get(self): + self.__process() + return self._output_json + + def get_as_string(self): + return json.dumps(self.get(), indent=2) + + def get_types_used(self): + self.__process() + return sorted(self._output_types_used.keys()) + + def get_keywords_used(self): + self.__process() + return sorted(self._output_keywords_used.keys()) + + def __process(self): + # If already processed, return ..... + if self._processed: + return + + # Process now .... + self._output_types_used = {} + self._output_keywords_used = {} if self.input_filename: with open(self.input_filename) as fp: resolved = jsonref.load( @@ -42,43 +69,62 @@ def get(self): resolved = jsonref.JsonRef.replace_refs(self.input_schema) else: raise Exception("Must pass input_filename or input_schema") + self._output_json = self.__process_data(resolved) + self._processed = True - resolved = self.__process(resolved) - - return resolved + def __process_data(self, source): - def get_as_string(self): - return json.dumps(self.get(), indent=2) + out = deepcopy(source) - def __process(self, source): + for keyword in source: + self._output_keywords_used[keyword] = {} - out = deepcopy(source) + if "type" in source: + if isinstance(source["type"], str): + self._output_types_used[source["type"]] = {} + elif isinstance(source["type"], list): + for t in source["type"]: + if isinstance(t, str): + self._output_types_used[t] = {} if hasattr(source, "__reference__"): + self._output_keywords_used["$ref"] = {} for attr in list(source.__reference__): if not attr == "$ref": out[attr] = source.__reference__[attr] if "properties" in source: for leaf in list(source["properties"]): - out["properties"][leaf] = self.__process(source["properties"][leaf]) + out["properties"][leaf] = self.__process_data( + source["properties"][leaf] + ) if self.set_additional_properties_false_everywhere: out["additionalProperties"] = False if "items" in source: - out["items"] = self.__process(source["items"]) + out["items"] = self.__process_data(source["items"]) if "oneOf" in source: for idx, data in enumerate(list(source["oneOf"])): - out["oneOf"][idx] = self.__process(source["oneOf"][idx]) + out["oneOf"][idx] = self.__process_data(source["oneOf"][idx]) if "anyOf" in source: for idx, data in enumerate(list(source["anyOf"])): - out["anyOf"][idx] = self.__process(source["anyOf"][idx]) + out["anyOf"][idx] = self.__process_data(source["anyOf"][idx]) if "allOf" in source: for idx, data in enumerate(list(source["allOf"])): - out["allOf"][idx] = self.__process(source["allOf"][idx]) + out["allOf"][idx] = self.__process_data(source["allOf"][idx]) + + if "dependentSchemas" in source and isinstance( + source["dependentSchemas"], dict + ): + for k, v in source["dependentSchemas"].items(): + out["dependentSchemas"][k] = self.__process_data(v) + + for keyword in ["if", "then", "else", "not"]: + if keyword in source: + out[keyword] = self.__process_data(source[keyword]) if "codelist" in source and ( "openCodelist" not in source or not source["openCodelist"] diff --git a/docs/cli.rst b/docs/cli.rst index 6dc3e1f..3cb8c81 100644 --- a/docs/cli.rst +++ b/docs/cli.rst @@ -37,3 +37,14 @@ To enable this mode, pass the `--set-additional-properties-false-everywhere` or compiletojsonschema -s input.json compiletojsonschema --set-additional-properties-false-everywhere input.json + +Get Audit output +---------------- + +Instead of the compiled JSON schema, you can get an audit output by passing the `--audit` or `-a` flag. + + +.. code-block:: shell-session + + compiletojsonschema -a input.json + compiletojsonschema --audit input.json diff --git a/tests/fixtures/simple/file-dependentSchemas.json b/tests/fixtures/simple/file-dependentSchemas.json new file mode 100644 index 0000000..e72c20d --- /dev/null +++ b/tests/fixtures/simple/file-dependentSchemas.json @@ -0,0 +1,19 @@ +{ + "type": "object", + "properties": { + "credit_card": { + "type": "number" + } + }, + "dependentSchemas": { + "credit_card": { + "properties": { + "address": { + "$ref": "file-definitions.json#/definition/address", + "title": "Credit card number (with optional address elements)" + } + }, + "required": ["address"] + } + } +} diff --git a/tests/fixtures/simple/file-if-then-else.json b/tests/fixtures/simple/file-if-then-else.json new file mode 100644 index 0000000..7acadf7 --- /dev/null +++ b/tests/fixtures/simple/file-if-then-else.json @@ -0,0 +1,33 @@ +{ + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "credit_card": { + "type": "string" + } + }, + "if": { + "properties": { + "credit_card": { + "pattern": "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]" + } + } + }, + "then": { + "properties": { + "address": { + "$ref": "file-definitions.json#/definition/address", + "title": "Must have an address with a credit card" + } + }, + "required": ["address"] + }, + "else": { + "properties": { + "cash_on_delivery": { "const": "Yup" } + }, + "required": ["cash_on_delivery"] + } +} diff --git a/tests/fixtures/simple/file-not.json b/tests/fixtures/simple/file-not.json new file mode 100644 index 0000000..18d8c34 --- /dev/null +++ b/tests/fixtures/simple/file-not.json @@ -0,0 +1,33 @@ +{ + "type": "object", + "properties": { + "name": { + "type": "string" + }, + "credit_card": { + "type": "string" + } + }, + "oneOf": [ + { + "not": { + "properties": { + "credit_card": { + "pattern": "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]" + } + } + } + }, + { + "properties": { + "address": { + "$ref": "file-definitions.json#/definition/address", + "title": "Must have an address with a credit card" + } + }, + "required": [ + "address" + ] + } + ] +} diff --git a/tests/test_simple.py b/tests/test_simple.py index a769d60..a2ceed2 100644 --- a/tests/test_simple.py +++ b/tests/test_simple.py @@ -23,6 +23,16 @@ def test_in_file(): assert out["properties"]["home_address"]["title"] == "Home Address" assert out["properties"]["home_address"]["description"] == "Where the person lives" + assert ["object", "string"] == ctjs.get_types_used() + assert [ + "$ref", + "definitions", + "description", + "properties", + "title", + "type", + ] == ctjs.get_keywords_used() + def test_in_file_pass_as_schema(): @@ -67,6 +77,15 @@ def test_file_main(): assert out["properties"]["home_address"]["title"] == "Home Address" assert out["properties"]["home_address"]["description"] == "Where the person lives" + assert ["object", "string"] == ctjs.get_types_used() + assert [ + "$ref", + "description", + "properties", + "title", + "type", + ] == ctjs.get_keywords_used() + def test_file_list_anyof(): @@ -141,3 +160,96 @@ def test_file_list_allof(): def test_passing_empty_schema_is_ok(): ctjs = CompileToJsonSchema(input_schema={}) assert "{}" == ctjs.get_as_string() + + +def test_file_dependentSchemas(): + + input_filename = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "fixtures", + "simple", + "file-dependentSchemas.json", + ) + + ctjs = CompileToJsonSchema(input_filename=input_filename) + out = ctjs.get() + + assert ( + out["dependentSchemas"]["credit_card"]["properties"]["address"]["title"] + == "Credit card number (with optional address elements)" + ) + + assert ["number", "object", "string"] == ctjs.get_types_used() + assert [ + "$ref", + "dependentSchemas", + "description", + "properties", + "required", + "title", + "type", + ] == ctjs.get_keywords_used() + + +def test_file_if_then_else(): + + input_filename = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "fixtures", + "simple", + "file-if-then-else.json", + ) + + ctjs = CompileToJsonSchema(input_filename=input_filename) + out = ctjs.get() + + assert ( + out["then"]["properties"]["address"]["title"] + == "Must have an address with a credit card" + ) + + assert ["object", "string"] == ctjs.get_types_used() + assert [ + "$ref", + "const", + "description", + "else", + "if", + "pattern", + "properties", + "required", + "then", + "title", + "type", + ] == ctjs.get_keywords_used() + + +def test_file_not(): + + input_filename = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + "fixtures", + "simple", + "file-not.json", + ) + + ctjs = CompileToJsonSchema(input_filename=input_filename) + out = ctjs.get() + + assert ( + out["oneOf"][1]["properties"]["address"]["title"] + == "Must have an address with a credit card" + ) + + assert ["object", "string"] == ctjs.get_types_used() + assert [ + "$ref", + "description", + "not", + "oneOf", + "pattern", + "properties", + "required", + "title", + "type", + ] == ctjs.get_keywords_used()