8
8
9
9
import relecov_tools .utils
10
10
import relecov_tools .assets .schema_utils .jsonschema_draft
11
+ import relecov_tools .assets .schema_utils .custom_validators
11
12
from relecov_tools .config_json import ConfigJson
12
13
from relecov_tools .log_summary import LogSum
13
14
15
+
14
16
log = logging .getLogger (__name__ )
15
17
stderr = rich .console .Console (
16
18
stderr = True ,
@@ -112,9 +114,9 @@ def get_sample_id_field(self):
112
114
return sample_id_field
113
115
114
116
def validate_instances (self ):
115
- """Validate data instances against a validated json schema"""
117
+ """Validate data instances against a validated JSON schema"""
116
118
117
- # create validator
119
+ # Create validator
118
120
validator = Draft202012Validator (
119
121
self .json_schema , format_checker = FormatChecker ()
120
122
)
@@ -124,52 +126,65 @@ def validate_instances(self):
124
126
invalid_json = []
125
127
errors = {}
126
128
error_keys = {}
129
+
127
130
if self .sample_id_field is None :
128
131
log_text = f"Logs keys set to None. Reason: { self .SAMPLE_FIELD_ERROR } "
129
132
self .logsum .add_warning (sample = self .sample_id_field , entry = log_text )
130
- stderr .print ("[blue] Start processing the json file" )
131
- log .info ("Start processing the json file" )
133
+
134
+ stderr .print ("[blue] Start processing the JSON file" )
135
+ log .info ("Start processing the JSON file" )
136
+
132
137
for item_row in self .json_data :
133
- # validate(instance=item_row, schema=json_schema)
134
138
sample_id_value = item_row .get (self .sample_id_field )
135
- if validator .is_valid (item_row ):
139
+
140
+ # Collect all errors (don't raise immediately)
141
+ validation_errors = list (validator .iter_errors (item_row ))
142
+
143
+ # Run the custom validator to check if errors should be ignored
144
+ validation_errors = relecov_tools .assets .schema_utils .custom_validators .validate_with_exceptions (
145
+ self .json_schema , item_row , validation_errors
146
+ )
147
+ if not validation_errors :
136
148
validated_json_data .append (item_row )
137
149
self .logsum .feed_key (sample = sample_id_value )
138
150
else :
139
- # Count error types
140
- for error in validator .iter_errors (item_row ):
151
+ # Process remaining errors
152
+ for error in validation_errors :
153
+ # Extract the error field name
141
154
if error .validator == "required" :
142
155
error_field = [
143
156
f for f in error .validator_value if f in error .message
144
157
][0 ]
145
158
else :
146
159
error_field = error .absolute_path [0 ]
160
+
161
+ # Try to get the human-readable label from the schema
147
162
try :
148
163
err_field_label = schema_props [error_field ]["label" ]
149
164
except KeyError :
150
- log .error ("Could not extract label for %s" % error_field )
165
+ log .error (f "Could not extract label for { error_field } " )
151
166
err_field_label = error_field
152
- error .message .replace (error_field , err_field_label )
167
+
168
+ # Format the error message
169
+ error .message = error .message .replace (error_field , err_field_label )
153
170
error_text = f"Error in column { err_field_label } : { error .message } "
171
+
172
+ # Log errors for summary
154
173
error_keys [error .message ] = error_field
155
- if error .message in errors :
156
- errors [error .message ] += 1
157
- else :
158
- errors [error .message ] = 1
174
+ errors [error .message ] = errors .get (error .message , 0 ) + 1
159
175
self .logsum .add_error (sample = sample_id_value , entry = error_text )
160
- # append row with errors
176
+
177
+ # Add the invalid row to the list
161
178
invalid_json .append (item_row )
162
179
163
180
# Summarize errors
164
181
stderr .print ("[blue] --------------------" )
165
182
stderr .print ("[blue] VALIDATION SUMMARY" )
166
183
stderr .print ("[blue] --------------------" )
167
184
log .info ("Validation summary:" )
168
- for error_type in errors .keys ():
169
- num_of_errors = str (errors [error_type ])
170
- field_with_error = str (error_keys [error_type ])
171
- error_text = "{} samples failed validation for {}:\n {}"
172
- error_text = error_text .format (num_of_errors , field_with_error , error_type )
185
+ for error_type , count in errors .items ():
186
+ field_with_error = error_keys [error_type ]
187
+ error_text = f"{ count } samples failed validation for { field_with_error } :\n { error_type } "
173
188
self .logsum .add_warning (entry = error_text )
174
189
stderr .print (f"[red]{ error_text } " )
175
190
stderr .print ("[red] --------------------" )
0 commit comments