From 0fcc00e55855e2cc896371cd415b3a969c0aea95 Mon Sep 17 00:00:00 2001
From: James B <james.baster@opendataservices.coop>
Date: Wed, 1 Jan 2025 14:30:12 +0000
Subject: [PATCH 1/6] Cache results of processing.

Repeated calls to get methods will be faster.

Also it really helps with the next commit.
---
 CHANGELOG.md                               |  3 ++
 compiletojsonschema/compiletojsonschema.py | 38 ++++++++++++++--------
 2 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index e95f37c..fc5fce6 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -6,6 +6,9 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 ## [Unreleased]
 
+### Added
+
+- Cache results of processing. Repeated calls to get methods will be faster.
 
 ### Removed
 
diff --git a/compiletojsonschema/compiletojsonschema.py b/compiletojsonschema/compiletojsonschema.py
index a670635..52235af 100644
--- a/compiletojsonschema/compiletojsonschema.py
+++ b/compiletojsonschema/compiletojsonschema.py
@@ -27,8 +27,23 @@ def __init__(
             self.codelist_base_directory = os.path.expanduser(codelist_base_directory)
         else:
             self.codelist_base_directory = os.getcwd()
+        # These vars hold output
+        self._processed = False
+        self._output_json = None
 
     def get(self):
+        self.__process()
+        return self._output_json
+
+    def get_as_string(self):
+        return json.dumps(self.get(), indent=2)
+
+    def __process(self):
+        # If already processed, return .....
+        if self._processed:
+            return
+
+        # Process now ....
         if self.input_filename:
             with open(self.input_filename) as fp:
                 resolved = jsonref.load(
@@ -42,15 +57,10 @@ def get(self):
             resolved = jsonref.JsonRef.replace_refs(self.input_schema)
         else:
             raise Exception("Must pass input_filename or input_schema")
+        self._output_json = self.__process_data(resolved)
+        self._processed = True
 
-        resolved = self.__process(resolved)
-
-        return resolved
-
-    def get_as_string(self):
-        return json.dumps(self.get(), indent=2)
-
-    def __process(self, source):
+    def __process_data(self, source):
 
         out = deepcopy(source)
 
@@ -61,24 +71,26 @@ def __process(self, source):
 
         if "properties" in source:
             for leaf in list(source["properties"]):
-                out["properties"][leaf] = self.__process(source["properties"][leaf])
+                out["properties"][leaf] = self.__process_data(
+                    source["properties"][leaf]
+                )
             if self.set_additional_properties_false_everywhere:
                 out["additionalProperties"] = False
 
         if "items" in source:
-            out["items"] = self.__process(source["items"])
+            out["items"] = self.__process_data(source["items"])
 
         if "oneOf" in source:
             for idx, data in enumerate(list(source["oneOf"])):
-                out["oneOf"][idx] = self.__process(source["oneOf"][idx])
+                out["oneOf"][idx] = self.__process_data(source["oneOf"][idx])
 
         if "anyOf" in source:
             for idx, data in enumerate(list(source["anyOf"])):
-                out["anyOf"][idx] = self.__process(source["anyOf"][idx])
+                out["anyOf"][idx] = self.__process_data(source["anyOf"][idx])
 
         if "allOf" in source:
             for idx, data in enumerate(list(source["allOf"])):
-                out["allOf"][idx] = self.__process(source["allOf"][idx])
+                out["allOf"][idx] = self.__process_data(source["allOf"][idx])
 
         if "codelist" in source and (
             "openCodelist" not in source or not source["openCodelist"]

From 319edb858bdc482ebbe98035ae460bdf6148f567 Mon Sep 17 00:00:00 2001
From: James B <james.baster@opendataservices.coop>
Date: Wed, 1 Jan 2025 14:50:02 +0000
Subject: [PATCH 2/6] Added audit functionality

---
 CHANGELOG.md                               |  1 +
 compiletojsonschema/compiletojsonschema.py | 24 ++++++++++++++++++++++
 tests/test_simple.py                       | 19 +++++++++++++++++
 3 files changed, 44 insertions(+)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index fc5fce6..367121a 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -9,6 +9,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 ### Added
 
 - Cache results of processing. Repeated calls to get methods will be faster.
+- Added audit functionality. Thanks to Duncan and https://github.com/OpenDataServices/json-schema-auditor
 
 ### Removed
 
diff --git a/compiletojsonschema/compiletojsonschema.py b/compiletojsonschema/compiletojsonschema.py
index 52235af..bbd0b52 100644
--- a/compiletojsonschema/compiletojsonschema.py
+++ b/compiletojsonschema/compiletojsonschema.py
@@ -30,6 +30,8 @@ def __init__(
         # These vars hold output
         self._processed = False
         self._output_json = None
+        self._output_types_used = None
+        self._output_keywords_used = None
 
     def get(self):
         self.__process()
@@ -38,12 +40,22 @@ def get(self):
     def get_as_string(self):
         return json.dumps(self.get(), indent=2)
 
+    def get_types_used(self):
+        self.__process()
+        return sorted(self._output_types_used.keys())
+
+    def get_keywords_used(self):
+        self.__process()
+        return sorted(self._output_keywords_used.keys())
+
     def __process(self):
         # If already processed, return .....
         if self._processed:
             return
 
         # Process now ....
+        self._output_types_used = {}
+        self._output_keywords_used = {}
         if self.input_filename:
             with open(self.input_filename) as fp:
                 resolved = jsonref.load(
@@ -64,7 +76,19 @@ def __process_data(self, source):
 
         out = deepcopy(source)
 
+        for keyword in source:
+            self._output_keywords_used[keyword] = {}
+
+        if "type" in source:
+            if isinstance(source["type"], str):
+                self._output_types_used[source["type"]] = {}
+            elif isinstance(source["type"], list):
+                for t in source["type"]:
+                    if isinstance(t, str):
+                        self._output_types_used[t] = {}
+
         if hasattr(source, "__reference__"):
+            self._output_keywords_used["$ref"] = {}
             for attr in list(source.__reference__):
                 if not attr == "$ref":
                     out[attr] = source.__reference__[attr]
diff --git a/tests/test_simple.py b/tests/test_simple.py
index a769d60..09febb1 100644
--- a/tests/test_simple.py
+++ b/tests/test_simple.py
@@ -23,6 +23,16 @@ def test_in_file():
     assert out["properties"]["home_address"]["title"] == "Home Address"
     assert out["properties"]["home_address"]["description"] == "Where the person lives"
 
+    assert ["object", "string"] == ctjs.get_types_used()
+    assert [
+        "$ref",
+        "definitions",
+        "description",
+        "properties",
+        "title",
+        "type",
+    ] == ctjs.get_keywords_used()
+
 
 def test_in_file_pass_as_schema():
 
@@ -67,6 +77,15 @@ def test_file_main():
     assert out["properties"]["home_address"]["title"] == "Home Address"
     assert out["properties"]["home_address"]["description"] == "Where the person lives"
 
+    assert ["object", "string"] == ctjs.get_types_used()
+    assert [
+        "$ref",
+        "description",
+        "properties",
+        "title",
+        "type",
+    ] == ctjs.get_keywords_used()
+
 
 def test_file_list_anyof():
 

From 21e2d9065ac0d86db34fab07ccf93c099457eb79 Mon Sep 17 00:00:00 2001
From: James B <james.baster@opendataservices.coop>
Date: Wed, 1 Jan 2025 14:57:56 +0000
Subject: [PATCH 3/6] Add Audit option to CLI

---
 compiletojsonschema/cli/__main__.py | 20 +++++++++++++++++++-
 docs/cli.rst                        | 11 +++++++++++
 2 files changed, 30 insertions(+), 1 deletion(-)

diff --git a/compiletojsonschema/cli/__main__.py b/compiletojsonschema/cli/__main__.py
index 993fb33..6bce6d1 100644
--- a/compiletojsonschema/cli/__main__.py
+++ b/compiletojsonschema/cli/__main__.py
@@ -6,6 +6,7 @@
 def main():
     parser = argparse.ArgumentParser(description="Compile To JSON Schema CLI")
 
+    # Input Arguments
     parser.add_argument("input_file")
     parser.add_argument(
         "-s",
@@ -19,6 +20,15 @@ def main():
         help="Which directory we should look in for codelists",
     )
 
+    # Output Arguments
+    parser.add_argument(
+        "-a",
+        "--audit",
+        help="Instead of complied schema, output an audit of the input schema",
+        action="store_true",
+    )
+
+    # Process
     args = parser.parse_args()
 
     ctjs = CompileToJsonSchema(
@@ -26,4 +36,12 @@ def main():
         set_additional_properties_false_everywhere=args.set_additional_properties_false_everywhere,
         codelist_base_directory=args.codelist_base_directory,
     )
-    print(ctjs.get_as_string())
+
+    if args.audit:
+        print("\nKeywords used in the schema:")
+        print(", ".join(ctjs.get_keywords_used()))
+
+        print("\nTypes used in the schema:")
+        print(", ".join(ctjs.get_types_used()))
+    else:
+        print(ctjs.get_as_string())
diff --git a/docs/cli.rst b/docs/cli.rst
index 6dc3e1f..3cb8c81 100644
--- a/docs/cli.rst
+++ b/docs/cli.rst
@@ -37,3 +37,14 @@ To enable this mode, pass the `--set-additional-properties-false-everywhere` or
 
     compiletojsonschema -s input.json
     compiletojsonschema --set-additional-properties-false-everywhere input.json
+
+Get Audit output
+----------------
+
+Instead of the compiled JSON schema, you can get an audit output by passing the `--audit` or `-a` flag.
+
+
+.. code-block:: shell-session
+
+    compiletojsonschema -a input.json
+    compiletojsonschema --audit input.json

From 502547d7417e38ab50f6fd37f99a1ae6e5089ee9 Mon Sep 17 00:00:00 2001
From: James B <james.baster@opendataservices.coop>
Date: Wed, 1 Jan 2025 15:09:54 +0000
Subject: [PATCH 4/6] Can process dependentSchemas keyword

---
 CHANGELOG.md                                  |  1 +
 compiletojsonschema/compiletojsonschema.py    |  6 ++++
 .../simple/file-dependentSchemas.json         | 19 ++++++++++++
 tests/test_simple.py                          | 29 +++++++++++++++++++
 4 files changed, 55 insertions(+)
 create mode 100644 tests/fixtures/simple/file-dependentSchemas.json

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 367121a..94425c9 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -10,6 +10,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 
 - Cache results of processing. Repeated calls to get methods will be faster.
 - Added audit functionality. Thanks to Duncan and https://github.com/OpenDataServices/json-schema-auditor
+- Can process dependentSchemas keyword
 
 ### Removed
 
diff --git a/compiletojsonschema/compiletojsonschema.py b/compiletojsonschema/compiletojsonschema.py
index bbd0b52..841d480 100644
--- a/compiletojsonschema/compiletojsonschema.py
+++ b/compiletojsonschema/compiletojsonschema.py
@@ -116,6 +116,12 @@ def __process_data(self, source):
             for idx, data in enumerate(list(source["allOf"])):
                 out["allOf"][idx] = self.__process_data(source["allOf"][idx])
 
+        if "dependentSchemas" in source and isinstance(
+            source["dependentSchemas"], dict
+        ):
+            for k, v in source["dependentSchemas"].items():
+                out["dependentSchemas"][k] = self.__process_data(v)
+
         if "codelist" in source and (
             "openCodelist" not in source or not source["openCodelist"]
         ):
diff --git a/tests/fixtures/simple/file-dependentSchemas.json b/tests/fixtures/simple/file-dependentSchemas.json
new file mode 100644
index 0000000..e72c20d
--- /dev/null
+++ b/tests/fixtures/simple/file-dependentSchemas.json
@@ -0,0 +1,19 @@
+{
+  "type": "object",
+  "properties": {
+    "credit_card": {
+      "type": "number"
+    }
+  },
+  "dependentSchemas": {
+    "credit_card": {
+      "properties": {
+        "address": {
+          "$ref": "file-definitions.json#/definition/address",
+          "title": "Credit card number (with optional address elements)"
+        }
+      },
+      "required": ["address"]
+    }
+  }
+}
diff --git a/tests/test_simple.py b/tests/test_simple.py
index 09febb1..1144b3a 100644
--- a/tests/test_simple.py
+++ b/tests/test_simple.py
@@ -160,3 +160,32 @@ def test_file_list_allof():
 def test_passing_empty_schema_is_ok():
     ctjs = CompileToJsonSchema(input_schema={})
     assert "{}" == ctjs.get_as_string()
+
+
+def test_file_dependentSchemas():
+
+    input_filename = os.path.join(
+        os.path.dirname(os.path.realpath(__file__)),
+        "fixtures",
+        "simple",
+        "file-dependentSchemas.json",
+    )
+
+    ctjs = CompileToJsonSchema(input_filename=input_filename)
+    out = ctjs.get()
+
+    assert (
+        out["dependentSchemas"]["credit_card"]["properties"]["address"]["title"]
+        == "Credit card number (with optional address elements)"
+    )
+
+    assert ["number", "object", "string"] == ctjs.get_types_used()
+    assert [
+        "$ref",
+        "dependentSchemas",
+        "description",
+        "properties",
+        "required",
+        "title",
+        "type",
+    ] == ctjs.get_keywords_used()

From 5baafc39cffb49901114ed482e9d5aeb5cb0a014 Mon Sep 17 00:00:00 2001
From: James B <james.baster@opendataservices.coop>
Date: Wed, 1 Jan 2025 15:49:03 +0000
Subject: [PATCH 5/6] Can process if / then / else keywords

---
 CHANGELOG.md                                 |  1 +
 compiletojsonschema/compiletojsonschema.py   |  4 +++
 tests/fixtures/simple/file-if-then-else.json | 33 ++++++++++++++++++++
 tests/test_simple.py                         | 33 ++++++++++++++++++++
 4 files changed, 71 insertions(+)
 create mode 100644 tests/fixtures/simple/file-if-then-else.json

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 94425c9..ef4f145 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -11,6 +11,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 - Cache results of processing. Repeated calls to get methods will be faster.
 - Added audit functionality. Thanks to Duncan and https://github.com/OpenDataServices/json-schema-auditor
 - Can process dependentSchemas keyword
+- Can process if / then / else keywords
 
 ### Removed
 
diff --git a/compiletojsonschema/compiletojsonschema.py b/compiletojsonschema/compiletojsonschema.py
index 841d480..3c686d4 100644
--- a/compiletojsonschema/compiletojsonschema.py
+++ b/compiletojsonschema/compiletojsonschema.py
@@ -122,6 +122,10 @@ def __process_data(self, source):
             for k, v in source["dependentSchemas"].items():
                 out["dependentSchemas"][k] = self.__process_data(v)
 
+        for keyword in ["if", "then", "else"]:
+            if keyword in source:
+                out[keyword] = self.__process_data(source[keyword])
+
         if "codelist" in source and (
             "openCodelist" not in source or not source["openCodelist"]
         ):
diff --git a/tests/fixtures/simple/file-if-then-else.json b/tests/fixtures/simple/file-if-then-else.json
new file mode 100644
index 0000000..7acadf7
--- /dev/null
+++ b/tests/fixtures/simple/file-if-then-else.json
@@ -0,0 +1,33 @@
+{
+  "type": "object",
+  "properties": {
+    "name": {
+      "type": "string"
+    },
+    "credit_card": {
+      "type": "string"
+    }
+  },
+  "if": {
+    "properties": {
+      "credit_card": {
+        "pattern": "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]"
+      }
+    }
+  },
+  "then": {
+    "properties": {
+      "address": {
+        "$ref": "file-definitions.json#/definition/address",
+        "title": "Must have an address with a credit card"
+      }
+    },
+    "required": ["address"]
+  },
+  "else": {
+    "properties": {
+      "cash_on_delivery": { "const": "Yup" }
+    },
+    "required": ["cash_on_delivery"]
+  }
+}
diff --git a/tests/test_simple.py b/tests/test_simple.py
index 1144b3a..59618ff 100644
--- a/tests/test_simple.py
+++ b/tests/test_simple.py
@@ -189,3 +189,36 @@ def test_file_dependentSchemas():
         "title",
         "type",
     ] == ctjs.get_keywords_used()
+
+
+def test_file_if_then_else():
+
+    input_filename = os.path.join(
+        os.path.dirname(os.path.realpath(__file__)),
+        "fixtures",
+        "simple",
+        "file-if-then-else.json",
+    )
+
+    ctjs = CompileToJsonSchema(input_filename=input_filename)
+    out = ctjs.get()
+
+    assert (
+        out["then"]["properties"]["address"]["title"]
+        == "Must have an address with a credit card"
+    )
+
+    assert ["object", "string"] == ctjs.get_types_used()
+    assert [
+        "$ref",
+        "const",
+        "description",
+        "else",
+        "if",
+        "pattern",
+        "properties",
+        "required",
+        "then",
+        "title",
+        "type",
+    ] == ctjs.get_keywords_used()

From 379c884b4d03506399d049fe7277b86403688236 Mon Sep 17 00:00:00 2001
From: James B <james.baster@opendataservices.coop>
Date: Wed, 1 Jan 2025 16:38:13 +0000
Subject: [PATCH 6/6] Can process not keyword

---
 CHANGELOG.md                               |  1 +
 compiletojsonschema/compiletojsonschema.py |  2 +-
 tests/fixtures/simple/file-not.json        | 33 ++++++++++++++++++++++
 tests/test_simple.py                       | 31 ++++++++++++++++++++
 4 files changed, 66 insertions(+), 1 deletion(-)
 create mode 100644 tests/fixtures/simple/file-not.json

diff --git a/CHANGELOG.md b/CHANGELOG.md
index ef4f145..b2dc59d 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -12,6 +12,7 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
 - Added audit functionality. Thanks to Duncan and https://github.com/OpenDataServices/json-schema-auditor
 - Can process dependentSchemas keyword
 - Can process if / then / else keywords
+- Can process not keyword
 
 ### Removed
 
diff --git a/compiletojsonschema/compiletojsonschema.py b/compiletojsonschema/compiletojsonschema.py
index 3c686d4..7c186ac 100644
--- a/compiletojsonschema/compiletojsonschema.py
+++ b/compiletojsonschema/compiletojsonschema.py
@@ -122,7 +122,7 @@ def __process_data(self, source):
             for k, v in source["dependentSchemas"].items():
                 out["dependentSchemas"][k] = self.__process_data(v)
 
-        for keyword in ["if", "then", "else"]:
+        for keyword in ["if", "then", "else", "not"]:
             if keyword in source:
                 out[keyword] = self.__process_data(source[keyword])
 
diff --git a/tests/fixtures/simple/file-not.json b/tests/fixtures/simple/file-not.json
new file mode 100644
index 0000000..18d8c34
--- /dev/null
+++ b/tests/fixtures/simple/file-not.json
@@ -0,0 +1,33 @@
+{
+  "type": "object",
+  "properties": {
+    "name": {
+      "type": "string"
+    },
+    "credit_card": {
+      "type": "string"
+    }
+  },
+  "oneOf": [
+    {
+      "not": {
+        "properties": {
+          "credit_card": {
+            "pattern": "[0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]"
+          }
+        }
+      }
+    },
+    {
+      "properties": {
+        "address": {
+          "$ref": "file-definitions.json#/definition/address",
+          "title": "Must have an address with a credit card"
+        }
+      },
+      "required": [
+        "address"
+      ]
+    }
+  ]
+}
diff --git a/tests/test_simple.py b/tests/test_simple.py
index 59618ff..a2ceed2 100644
--- a/tests/test_simple.py
+++ b/tests/test_simple.py
@@ -222,3 +222,34 @@ def test_file_if_then_else():
         "title",
         "type",
     ] == ctjs.get_keywords_used()
+
+
+def test_file_not():
+
+    input_filename = os.path.join(
+        os.path.dirname(os.path.realpath(__file__)),
+        "fixtures",
+        "simple",
+        "file-not.json",
+    )
+
+    ctjs = CompileToJsonSchema(input_filename=input_filename)
+    out = ctjs.get()
+
+    assert (
+        out["oneOf"][1]["properties"]["address"]["title"]
+        == "Must have an address with a credit card"
+    )
+
+    assert ["object", "string"] == ctjs.get_types_used()
+    assert [
+        "$ref",
+        "description",
+        "not",
+        "oneOf",
+        "pattern",
+        "properties",
+        "required",
+        "title",
+        "type",
+    ] == ctjs.get_keywords_used()