Skip to content

Conversation

@pvillard31
Copy link
Contributor

Summary

NIFI-15047 - Initial JSON Schemas support for NiFi Records

Note - the PR is currently in DRAFT given this is a very large change that will likely require many rounds of discussions and review cycles.

  • New converter and validator classes under nifi-json-schema-shared, plus test coverage for conversion and keyword handling
  • Extend RecordField/SimpleRecordSchema with validator support and invoke them from StandardSchemaValidator
  • Implement JsonSchemaRegistryService with JSON schema registration, retrieval, and definition exposure; register service and allow JSON schema type
  • Update ValidateRecord to capture field- and record-level validation issues with richer provenance messaging; add integration tests using the new registry
  • Harden temporal compatibility checks ensuring explicit formats are required for coercion; expand unit tests accordingly

In addition to the tests added in the code, I used a flow in the form of:

GenerateFlowFile -> ValidateRecord

To confirm the handling of valid/invalid records with the below scenarios:

Minimal Person

Schema

{
  "$id": "https://example.com/person.schema.json",
  "$schema": "https://json-schema.org/draft/2020-12/schema",
  "title": "Person",
  "type": "object",
  "properties": {
    "firstName": {
      "type": "string",
      "description": "The person's first name."
    },
    "lastName": {
      "type": "string",
      "description": "The person's last name."
    },
    "age": {
      "description": "Age in years which must be equal to or greater than zero.",
      "type": "integer",
      "minimum": 0
    }
  }
}

Payload

[{
  "firstName": "John",
  "lastName": "Doe",
  "age": 21
},{
  "firstName": "Jane",
  "lastName": "Doe",
  "age": -1
}]

Object Constraints

Schema

{
  "$id": "urn:nifi:test:object-constraints",
  "title": "ObjectConstraints",
  "type": "object",
  "additionalProperties": true,
  "minProperties": 4,
  "maxProperties": 4,
  "required": [
    "id",
    "nickname",
    "metadata"
  ],
  "properties": {
    "id": {
      "type": "string",
      "minLength": 3
    },
    "nickname": {
      "type": [
        "string",
        "null"
      ],
      "maxLength": 10
    },
    "metadata": {
      "type": "object",
      "required": [
        "active"
      ],
      "additionalProperties": false,
      "properties": {
        "active": {
          "type": "boolean"
        },
        "level": {
          "type": [
            "integer",
            "null"
          ],
          "minimum": 1
        }
      }
    },
    "note": {
      "type": [
        "string",
        "null"
      ]
    }
  }
}

Payload

[
  {
    "testCase": "valid",
    "id": "abc123",
    "nickname": null,
    "metadata": {
      "active": true,
      "level": 5
    }
  },
  {
    "testCase": "invalid-missing-nickname",
    "id": "short",
    "metadata": {
      "active": true
    }
  },
  {
    "testCase": "invalid-too-many-properties",
    "id": "abc123",
    "nickname": "ally",
    "metadata": {
      "active": false,
      "level": 3
    },
    "note": "extra info"
  },
  {
    "testCase": "invalid-additional-property",
    "id": "abc123",
    "nickname": "ally",
    "metadata": {
      "active": true
    },
    "unexpected": "not allowed"
  }
]

String Constraints

Schema

{
  "$id": "urn:nifi:test:string-constraints",
  "title": "StringConstraints",
  "type": "object",
  "additionalProperties": true,
  "required": [
    "sku",
    "releaseDate",
    "releaseTime",
    "lastUpdated",
    "itemUuid"
  ],
  "properties": {
    "sku": {
      "type": "string",
      "minLength": 7,
      "maxLength": 7,
      "pattern": "^[A-Z]{3}-[0-9]{3}$"
    },
    "releaseDate": {
      "type": "string",
      "format": "date"
    },
    "releaseTime": {
      "type": "string",
      "format": "time"
    },
    "lastUpdated": {
      "type": "string",
      "format": "date-time"
    },
    "itemUuid": {
      "type": "string",
      "format": "uuid"
    },
    "description": {
      "type": [
        "string",
        "null"
      ],
      "maxLength": 20
    }
  }
}

Payload

[
  {
    "testCase": "valid",
    "sku": "ABC-123",
    "releaseDate": "2024-01-15",
    "releaseTime": "13:45:00",
    "lastUpdated": "2024-01-15T13:45:00Z",
    "itemUuid": "123e4567-e89b-12d3-a456-426614174000",
    "description": "Launch batch"
  },
  {
    "testCase": "invalid-pattern-and-format",
    "sku": "ab-12",
    "releaseDate": "2024-13-01",
    "releaseTime": "25:61:00",
    "lastUpdated": "not-a-timestamp",
    "itemUuid": "not-a-uuid",
    "description": "This description is way too long to be allowed"
  }
]

Numeric Constraints

Schema

{
  "$id": "urn:nifi:test:numeric-constraints",
  "title": "NumericConstraints",
  "type": "object",
  "additionalProperties": true,
  "required": [
    "quantity",
    "ratio",
    "step"
  ],
  "properties": {
    "quantity": {
      "type": "integer",
      "minimum": 1,
      "maximum": 100
    },
    "ratio": {
      "type": "number",
      "exclusiveMinimum": 0,
      "exclusiveMaximum": 1
    },
    "step": {
      "type": "number",
      "multipleOf": 0.25
    }
  }
}

Payload

[
  {
    "testCase": "valid",
    "quantity": 10,
    "ratio": 0.5,
    "step": 1.75
  },
  {
    "testCase": "invalid-range-and-step",
    "quantity": 0,
    "ratio": 1,
    "step": 0.3
  }
]

Enum and Constants

Schema

{
  "$id": "urn:nifi:test:enum-const",
  "title": "EnumAndConst",
  "type": "object",
  "additionalProperties": true,
  "required": [
    "status",
    "region"
  ],
  "properties": {
    "status": {
      "type": "string",
      "enum": [
        "OPEN",
        "CLOSED",
        "ON_HOLD"
      ]
    },
    "region": {
      "const": "NA"
    },
    "priority": {
      "type": "integer",
      "enum": [
        1,
        2,
        3
      ]
    },
    "alias": {
      "type": [
        "string",
        "null"
      ]
    }
  }
}

Payload

[
  {
    "testCase": "valid",
    "status": "OPEN",
    "region": "NA",
    "priority": 2,
    "alias": null
  },
  {
    "testCase": "invalid-enum-and-const",
    "status": "INVALID",
    "region": "EU",
    "priority": 5
  }
]

Arrays and Nested Objects

Schema

{
  "$id": "urn:nifi:test:array-constraints",
  "title": "ArrayConstraints",
  "type": "object",
  "additionalProperties": true,
  "required": [
    "tags",
    "participants"
  ],
  "properties": {
    "tags": {
      "type": "array",
      "items": {
        "type": "string",
        "minLength": 2
      },
      "minItems": 1,
      "maxItems": 4,
      "uniqueItems": true
    },
    "participants": {
      "type": "array",
      "minItems": 1,
      "items": {
        "type": "object",
        "required": [
          "name"
        ],
        "additionalProperties": false,
        "properties": {
          "name": {
            "type": "string",
            "minLength": 1
          },
          "age": {
            "type": [
              "integer",
              "null"
            ],
            "minimum": 0
          }
        }
      }
    }
  }
}

Payload

[
  {
    "testCase": "valid",
    "tags": [
      "alpha",
      "beta"
    ],
    "participants": [
      {
        "name": "Alice",
        "age": 30
      },
      {
        "name": "Bob",
        "age": null
      }
    ]
  },
  {
    "testCase": "invalid-duplicate-tags",
    "tags": [
      "alpha",
      "alpha"
    ],
    "participants": [
      {
        "name": "Carol",
        "age": 25
      }
    ]
  },
  {
    "testCase": "invalid-empty-tags",
    "tags": [],
    "participants": [
      {
        "name": "Dana",
        "age": 28
      }
    ]
  },
  {
    "testCase": "invalid-too-many-tags",
    "tags": [
      "aa",
      "bb",
      "cc",
      "dd",
      "ee"
    ],
    "participants": [
      {
        "name": "Evan",
        "age": 32
      }
    ]
  },
  {
    "testCase": "invalid-missing-participant-name",
    "tags": [
      "alpha",
      "delta"
    ],
    "participants": [
      {
        "age": 22
      }
    ]
  },
  {
    "testCase": "invalid-empty-participants",
    "tags": [
      "alpha",
      "gamma"
    ],
    "participants": []
  }
]

Dynamic Headers 1

Schema

{
  "$id": "urn:test:dynamic-headers",
  "type": "object",
  "additionalProperties": false,
  "properties": {
    "fixed": {
      "type": "string"
    }
  },
  "patternProperties": {
    "^x-": {
      "type": "string",
      "minLength": 2
    }
  },
  "required": [
    "fixed"
  ]
}

Payload

[
  {
    "fixed": "value",
    "x-trace": "abc123",
    "x-user": "js"
  },
  {
    "fixed": "value",
    "x-short": "a",
    "z-other": "not allowed"
  }
]

Dynamic Headers 2

Schema

{
  "$id": "urn:test:device-metrics",
  "type": "object",
  "additionalProperties": false,
  "minProperties": 2,
  "patternProperties": {
    "^device_[0-9]+$": {
      "type": "object",
      "required": [
        "temperature",
        "status"
      ],
      "properties": {
        "temperature": {
          "type": "number",
          "minimum": -40,
          "maximum": 85
        },
        "status": {
          "type": "string",
          "enum": [
            "OK",
            "WARN",
            "FAIL"
          ]
        }
      },
      "additionalProperties": false
    }
  },
  "properties": {
    "batchId": {
      "type": "string",
      "minLength": 8
    }
  },
  "required": [
    "batchId"
  ]
}

Payload

[
  {
    "batchId": "2024-09-30",
    "device_101": {
      "temperature": 21.5,
      "status": "OK"
    },
    "device_202": {
      "temperature": 42,
      "status": "WARN"
    }
  },
  {
    "batchId": "2024-09-30",
    "device_A12": {
      "temperature": 20,
      "status": "OK"
    },
    "device_303": {
      "temperature": 120,
      "status": "OK"
    }
  }
]

@pvillard31 pvillard31 marked this pull request as draft October 3, 2025 14:15
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

1 participant