NIFI-15047 - Initial JSON Schemas support for NiFi Records #10380
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Summary
NIFI-15047 - Initial JSON Schemas support for NiFi Records
Note - the PR is currently in DRAFT given this is a very large change that will likely require many rounds of discussions and review cycles.
nifi-json-schema-shared, plus test coverage for conversion and keyword handlingRecordField/SimpleRecordSchemawith validator support and invoke them fromStandardSchemaValidatorJsonSchemaRegistryServicewith JSON schema registration, retrieval, and definition exposure; register service and allow JSON schema typeValidateRecordto capture field- and record-level validation issues with richer provenance messaging; add integration tests using the new registryIn addition to the tests added in the code, I used a flow in the form of:
To confirm the handling of valid/invalid records with the below scenarios:
Minimal Person
Schema
{ "$id": "https://example.com/person.schema.json", "$schema": "https://json-schema.org/draft/2020-12/schema", "title": "Person", "type": "object", "properties": { "firstName": { "type": "string", "description": "The person's first name." }, "lastName": { "type": "string", "description": "The person's last name." }, "age": { "description": "Age in years which must be equal to or greater than zero.", "type": "integer", "minimum": 0 } } }Payload
[{ "firstName": "John", "lastName": "Doe", "age": 21 },{ "firstName": "Jane", "lastName": "Doe", "age": -1 }]Object Constraints
Schema
{ "$id": "urn:nifi:test:object-constraints", "title": "ObjectConstraints", "type": "object", "additionalProperties": true, "minProperties": 4, "maxProperties": 4, "required": [ "id", "nickname", "metadata" ], "properties": { "id": { "type": "string", "minLength": 3 }, "nickname": { "type": [ "string", "null" ], "maxLength": 10 }, "metadata": { "type": "object", "required": [ "active" ], "additionalProperties": false, "properties": { "active": { "type": "boolean" }, "level": { "type": [ "integer", "null" ], "minimum": 1 } } }, "note": { "type": [ "string", "null" ] } } }Payload
[ { "testCase": "valid", "id": "abc123", "nickname": null, "metadata": { "active": true, "level": 5 } }, { "testCase": "invalid-missing-nickname", "id": "short", "metadata": { "active": true } }, { "testCase": "invalid-too-many-properties", "id": "abc123", "nickname": "ally", "metadata": { "active": false, "level": 3 }, "note": "extra info" }, { "testCase": "invalid-additional-property", "id": "abc123", "nickname": "ally", "metadata": { "active": true }, "unexpected": "not allowed" } ]String Constraints
Schema
{ "$id": "urn:nifi:test:string-constraints", "title": "StringConstraints", "type": "object", "additionalProperties": true, "required": [ "sku", "releaseDate", "releaseTime", "lastUpdated", "itemUuid" ], "properties": { "sku": { "type": "string", "minLength": 7, "maxLength": 7, "pattern": "^[A-Z]{3}-[0-9]{3}$" }, "releaseDate": { "type": "string", "format": "date" }, "releaseTime": { "type": "string", "format": "time" }, "lastUpdated": { "type": "string", "format": "date-time" }, "itemUuid": { "type": "string", "format": "uuid" }, "description": { "type": [ "string", "null" ], "maxLength": 20 } } }Payload
[ { "testCase": "valid", "sku": "ABC-123", "releaseDate": "2024-01-15", "releaseTime": "13:45:00", "lastUpdated": "2024-01-15T13:45:00Z", "itemUuid": "123e4567-e89b-12d3-a456-426614174000", "description": "Launch batch" }, { "testCase": "invalid-pattern-and-format", "sku": "ab-12", "releaseDate": "2024-13-01", "releaseTime": "25:61:00", "lastUpdated": "not-a-timestamp", "itemUuid": "not-a-uuid", "description": "This description is way too long to be allowed" } ]Numeric Constraints
Schema
{ "$id": "urn:nifi:test:numeric-constraints", "title": "NumericConstraints", "type": "object", "additionalProperties": true, "required": [ "quantity", "ratio", "step" ], "properties": { "quantity": { "type": "integer", "minimum": 1, "maximum": 100 }, "ratio": { "type": "number", "exclusiveMinimum": 0, "exclusiveMaximum": 1 }, "step": { "type": "number", "multipleOf": 0.25 } } }Payload
[ { "testCase": "valid", "quantity": 10, "ratio": 0.5, "step": 1.75 }, { "testCase": "invalid-range-and-step", "quantity": 0, "ratio": 1, "step": 0.3 } ]Enum and Constants
Schema
{ "$id": "urn:nifi:test:enum-const", "title": "EnumAndConst", "type": "object", "additionalProperties": true, "required": [ "status", "region" ], "properties": { "status": { "type": "string", "enum": [ "OPEN", "CLOSED", "ON_HOLD" ] }, "region": { "const": "NA" }, "priority": { "type": "integer", "enum": [ 1, 2, 3 ] }, "alias": { "type": [ "string", "null" ] } } }Payload
[ { "testCase": "valid", "status": "OPEN", "region": "NA", "priority": 2, "alias": null }, { "testCase": "invalid-enum-and-const", "status": "INVALID", "region": "EU", "priority": 5 } ]Arrays and Nested Objects
Schema
{ "$id": "urn:nifi:test:array-constraints", "title": "ArrayConstraints", "type": "object", "additionalProperties": true, "required": [ "tags", "participants" ], "properties": { "tags": { "type": "array", "items": { "type": "string", "minLength": 2 }, "minItems": 1, "maxItems": 4, "uniqueItems": true }, "participants": { "type": "array", "minItems": 1, "items": { "type": "object", "required": [ "name" ], "additionalProperties": false, "properties": { "name": { "type": "string", "minLength": 1 }, "age": { "type": [ "integer", "null" ], "minimum": 0 } } } } } }Payload
[ { "testCase": "valid", "tags": [ "alpha", "beta" ], "participants": [ { "name": "Alice", "age": 30 }, { "name": "Bob", "age": null } ] }, { "testCase": "invalid-duplicate-tags", "tags": [ "alpha", "alpha" ], "participants": [ { "name": "Carol", "age": 25 } ] }, { "testCase": "invalid-empty-tags", "tags": [], "participants": [ { "name": "Dana", "age": 28 } ] }, { "testCase": "invalid-too-many-tags", "tags": [ "aa", "bb", "cc", "dd", "ee" ], "participants": [ { "name": "Evan", "age": 32 } ] }, { "testCase": "invalid-missing-participant-name", "tags": [ "alpha", "delta" ], "participants": [ { "age": 22 } ] }, { "testCase": "invalid-empty-participants", "tags": [ "alpha", "gamma" ], "participants": [] } ]Dynamic Headers 1
Schema
{ "$id": "urn:test:dynamic-headers", "type": "object", "additionalProperties": false, "properties": { "fixed": { "type": "string" } }, "patternProperties": { "^x-": { "type": "string", "minLength": 2 } }, "required": [ "fixed" ] }Payload
[ { "fixed": "value", "x-trace": "abc123", "x-user": "js" }, { "fixed": "value", "x-short": "a", "z-other": "not allowed" } ]Dynamic Headers 2
Schema
{ "$id": "urn:test:device-metrics", "type": "object", "additionalProperties": false, "minProperties": 2, "patternProperties": { "^device_[0-9]+$": { "type": "object", "required": [ "temperature", "status" ], "properties": { "temperature": { "type": "number", "minimum": -40, "maximum": 85 }, "status": { "type": "string", "enum": [ "OK", "WARN", "FAIL" ] } }, "additionalProperties": false } }, "properties": { "batchId": { "type": "string", "minLength": 8 } }, "required": [ "batchId" ] }Payload
[ { "batchId": "2024-09-30", "device_101": { "temperature": 21.5, "status": "OK" }, "device_202": { "temperature": 42, "status": "WARN" } }, { "batchId": "2024-09-30", "device_A12": { "temperature": 20, "status": "OK" }, "device_303": { "temperature": 120, "status": "OK" } } ]