Skip to content

2025 01 01 #43

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 6 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,13 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.

## [Unreleased]

### Added

- Cache results of processing. Repeated calls to get methods will be faster.
- Added audit functionality. Thanks to Duncan and https://github.com/OpenDataServices/json-schema-auditor
- Can process dependentSchemas keyword
- Can process if / then / else keywords
- Can process not keyword

### Removed

Expand Down
20 changes: 19 additions & 1 deletion compiletojsonschema/cli/__main__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
def main():
parser = argparse.ArgumentParser(description="Compile To JSON Schema CLI")

# Input Arguments
parser.add_argument("input_file")
parser.add_argument(
"-s",
Expand All @@ -19,11 +20,28 @@ def main():
help="Which directory we should look in for codelists",
)

# Output Arguments
parser.add_argument(
"-a",
"--audit",
help="Instead of complied schema, output an audit of the input schema",
action="store_true",
)

# Process
args = parser.parse_args()

ctjs = CompileToJsonSchema(
input_filename=args.input_file,
set_additional_properties_false_everywhere=args.set_additional_properties_false_everywhere,
codelist_base_directory=args.codelist_base_directory,
)
print(ctjs.get_as_string())

if args.audit:
print("\nKeywords used in the schema:")
print(", ".join(ctjs.get_keywords_used()))

print("\nTypes used in the schema:")
print(", ".join(ctjs.get_types_used()))
else:
print(ctjs.get_as_string())
70 changes: 58 additions & 12 deletions compiletojsonschema/compiletojsonschema.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,35 @@ def __init__(
self.codelist_base_directory = os.path.expanduser(codelist_base_directory)
else:
self.codelist_base_directory = os.getcwd()
# These vars hold output
self._processed = False
self._output_json = None
self._output_types_used = None
self._output_keywords_used = None

def get(self):
self.__process()
return self._output_json

def get_as_string(self):
return json.dumps(self.get(), indent=2)

def get_types_used(self):
self.__process()
return sorted(self._output_types_used.keys())

def get_keywords_used(self):
self.__process()
return sorted(self._output_keywords_used.keys())

def __process(self):
# If already processed, return .....
if self._processed:
return

# Process now ....
self._output_types_used = {}
self._output_keywords_used = {}
if self.input_filename:
with open(self.input_filename) as fp:
resolved = jsonref.load(
Expand All @@ -42,43 +69,62 @@ def get(self):
resolved = jsonref.JsonRef.replace_refs(self.input_schema)
else:
raise Exception("Must pass input_filename or input_schema")
self._output_json = self.__process_data(resolved)
self._processed = True

resolved = self.__process(resolved)

return resolved
def __process_data(self, source):

def get_as_string(self):
return json.dumps(self.get(), indent=2)
out = deepcopy(source)

def __process(self, source):
for keyword in source:
self._output_keywords_used[keyword] = {}

out = deepcopy(source)
if "type" in source:
if isinstance(source["type"], str):
self._output_types_used[source["type"]] = {}
elif isinstance(source["type"], list):
for t in source["type"]:
if isinstance(t, str):
self._output_types_used[t] = {}

if hasattr(source, "__reference__"):
self._output_keywords_used["$ref"] = {}
for attr in list(source.__reference__):
if not attr == "$ref":
out[attr] = source.__reference__[attr]

if "properties" in source:
for leaf in list(source["properties"]):
out["properties"][leaf] = self.__process(source["properties"][leaf])
out["properties"][leaf] = self.__process_data(
source["properties"][leaf]
)
if self.set_additional_properties_false_everywhere:
out["additionalProperties"] = False

if "items" in source:
out["items"] = self.__process(source["items"])
out["items"] = self.__process_data(source["items"])

if "oneOf" in source:
for idx, data in enumerate(list(source["oneOf"])):
out["oneOf"][idx] = self.__process(source["oneOf"][idx])
out["oneOf"][idx] = self.__process_data(source["oneOf"][idx])

if "anyOf" in source:
for idx, data in enumerate(list(source["anyOf"])):
out["anyOf"][idx] = self.__process(source["anyOf"][idx])
out["anyOf"][idx] = self.__process_data(source["anyOf"][idx])

if "allOf" in source:
for idx, data in enumerate(list(source["allOf"])):
out["allOf"][idx] = self.__process(source["allOf"][idx])
out["allOf"][idx] = self.__process_data(source["allOf"][idx])

if "dependentSchemas" in source and isinstance(
source["dependentSchemas"], dict
):
for k, v in source["dependentSchemas"].items():
out["dependentSchemas"][k] = self.__process_data(v)

for keyword in ["if", "then", "else", "not"]:
if keyword in source:
out[keyword] = self.__process_data(source[keyword])

if "codelist" in source and (
"openCodelist" not in source or not source["openCodelist"]
Expand Down
11 changes: 11 additions & 0 deletions docs/cli.rst
Original file line number Diff line number Diff line change
Expand Up @@ -37,3 +37,14 @@ To enable this mode, pass the `--set-additional-properties-false-everywhere` or

compiletojsonschema -s input.json
compiletojsonschema --set-additional-properties-false-everywhere input.json

Get Audit output
----------------

Instead of the compiled JSON schema, you can get an audit output by passing the `--audit` or `-a` flag.


.. code-block:: shell-session

compiletojsonschema -a input.json
compiletojsonschema --audit input.json
19 changes: 19 additions & 0 deletions tests/fixtures/simple/file-dependentSchemas.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
{
"type": "object",
"properties": {
"credit_card": {
"type": "number"
}
},
"dependentSchemas": {
"credit_card": {
"properties": {
"address": {
"$ref": "file-definitions.json#/definition/address",
"title": "Credit card number (with optional address elements)"
}
},
"required": ["address"]
}
}
}
33 changes: 33 additions & 0 deletions tests/fixtures/simple/file-if-then-else.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"type": "object",
"properties": {
"name": {
"type": "string"
},
"credit_card": {
"type": "string"
}
},
"if": {
"properties": {
"credit_card": {
"pattern": "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]"
}
}
},
"then": {
"properties": {
"address": {
"$ref": "file-definitions.json#/definition/address",
"title": "Must have an address with a credit card"
}
},
"required": ["address"]
},
"else": {
"properties": {
"cash_on_delivery": { "const": "Yup" }
},
"required": ["cash_on_delivery"]
}
}
33 changes: 33 additions & 0 deletions tests/fixtures/simple/file-not.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
{
"type": "object",
"properties": {
"name": {
"type": "string"
},
"credit_card": {
"type": "string"
}
},
"oneOf": [
{
"not": {
"properties": {
"credit_card": {
"pattern": "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]"
}
}
}
},
{
"properties": {
"address": {
"$ref": "file-definitions.json#/definition/address",
"title": "Must have an address with a credit card"
}
},
"required": [
"address"
]
}
]
}
112 changes: 112 additions & 0 deletions tests/test_simple.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,16 @@ def test_in_file():
assert out["properties"]["home_address"]["title"] == "Home Address"
assert out["properties"]["home_address"]["description"] == "Where the person lives"

assert ["object", "string"] == ctjs.get_types_used()
assert [
"$ref",
"definitions",
"description",
"properties",
"title",
"type",
] == ctjs.get_keywords_used()


def test_in_file_pass_as_schema():

Expand Down Expand Up @@ -67,6 +77,15 @@ def test_file_main():
assert out["properties"]["home_address"]["title"] == "Home Address"
assert out["properties"]["home_address"]["description"] == "Where the person lives"

assert ["object", "string"] == ctjs.get_types_used()
assert [
"$ref",
"description",
"properties",
"title",
"type",
] == ctjs.get_keywords_used()


def test_file_list_anyof():

Expand Down Expand Up @@ -141,3 +160,96 @@ def test_file_list_allof():
def test_passing_empty_schema_is_ok():
ctjs = CompileToJsonSchema(input_schema={})
assert "{}" == ctjs.get_as_string()


def test_file_dependentSchemas():

input_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"fixtures",
"simple",
"file-dependentSchemas.json",
)

ctjs = CompileToJsonSchema(input_filename=input_filename)
out = ctjs.get()

assert (
out["dependentSchemas"]["credit_card"]["properties"]["address"]["title"]
== "Credit card number (with optional address elements)"
)

assert ["number", "object", "string"] == ctjs.get_types_used()
assert [
"$ref",
"dependentSchemas",
"description",
"properties",
"required",
"title",
"type",
] == ctjs.get_keywords_used()


def test_file_if_then_else():

input_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"fixtures",
"simple",
"file-if-then-else.json",
)

ctjs = CompileToJsonSchema(input_filename=input_filename)
out = ctjs.get()

assert (
out["then"]["properties"]["address"]["title"]
== "Must have an address with a credit card"
)

assert ["object", "string"] == ctjs.get_types_used()
assert [
"$ref",
"const",
"description",
"else",
"if",
"pattern",
"properties",
"required",
"then",
"title",
"type",
] == ctjs.get_keywords_used()


def test_file_not():

input_filename = os.path.join(
os.path.dirname(os.path.realpath(__file__)),
"fixtures",
"simple",
"file-not.json",
)

ctjs = CompileToJsonSchema(input_filename=input_filename)
out = ctjs.get()

assert (
out["oneOf"][1]["properties"]["address"]["title"]
== "Must have an address with a credit card"
)

assert ["object", "string"] == ctjs.get_types_used()
assert [
"$ref",
"description",
"not",
"oneOf",
"pattern",
"properties",
"required",
"title",
"type",
] == ctjs.get_keywords_used()
Loading