diff --git a/app/error_messages.py b/app/error_messages.py index 7ef737e2..fd73c438 100644 --- a/app/error_messages.py +++ b/app/error_messages.py @@ -1,4 +1,5 @@ DUMB_QUOTES_FOUND = "Found dumb quotes(s) in schema text" +HTML_FOUND = "Found invalid HTML in schema text" INVALID_WHITESPACE_FOUND = "Found invalid white space(s) in schema text" DUPLICATE_ID_FOUND = "Duplicate id found" FOR_LIST_NEVER_POPULATED = "for_list is not populated by any ListCollector blocks or supplementary data sources" diff --git a/app/validators/questionnaire_validator.py b/app/validators/questionnaire_validator.py index a4d4bec6..1ff7a96d 100644 --- a/app/validators/questionnaire_validator.py +++ b/app/validators/questionnaire_validator.py @@ -35,6 +35,7 @@ def validate(self): self.validate_duplicates() self.validate_smart_quotes() self.validate_white_spaces() + self.validate_html() self.validate_answer_references() self.validate_list_references() @@ -114,6 +115,47 @@ def validate_smart_quotes(self): pointer=translatable_item.pointer, ) + def validate_html(self): + html_strings = [] + schema_object = SurveySchema(self.schema_element) + + # pylint: disable=invalid-string-quote + html_regex = re.compile(r"<[^>]*>") + + for translatable_item in schema_object.translatable_items: + schema_text = translatable_item.value + + values_to_check = [schema_text] + + if isinstance(schema_text, dict): + values_to_check = schema_text.values() + + html_strings.extend( + {"pointer": translatable_item.pointer, "text": schema_text} + for schema_text in values_to_check + if schema_text and html_regex.search(schema_text) + ) + if html_strings: + self.check_invalid_html_tags(html_strings) + + def check_invalid_html_tags(self, html_strings): + strong = re.compile(r"(?:(?!).)*") + anchor = re.compile(r"]*>.*?") + all_tags = re.compile( + r"<([a-z0-9]+)(?=[\s>])(?:[^>=]|='[^']*'|=\"[^\"]*\"|=[^'\"\s]*)*\s?/?>" + ) + + for html_string in html_strings: + if len(strong.findall(html_string["text"])) + len( + anchor.findall(html_string["text"]) + ) != len(all_tags.findall(html_string["text"])): + + self.add_error( + error_messages.HTML_FOUND, + pointer=html_string["pointer"], + text=html_string["text"], + ) + def validate_white_spaces(self): schema_object = SurveySchema(self.schema_element) diff --git a/tests/schemas/invalid/test_invalid_html_in_schema_text.json b/tests/schemas/invalid/test_invalid_html_in_schema_text.json new file mode 100644 index 00000000..3e387f32 --- /dev/null +++ b/tests/schemas/invalid/test_invalid_html_in_schema_text.json @@ -0,0 +1,133 @@ +{ + "mime_type": "application/json/ons/eq", + "language": "en", + "schema_version": "0.0.1", + "data_version": "0.0.3", + "survey_id": "144", + "theme": "default", + "title": "Test invalid html", + "legal_basis": "Notice is given under section 999 of the Test Act 2000", + "metadata": [ + { + "name": "user_id", + "type": "string" + }, + { + "name": "period_id", + "type": "string" + }, + { + "name": "ru_name", + "type": "string" + }, + { + "name": "ru_ref", + "type": "string" + }, + { + "name": "trad_as", + "type": "string", + "optional": true + } + ], + "questionnaire_flow": { + "type": "Linear", + "options": {} + }, + "sections": [ + { + "id": "introduction-section", + "title": "Introduction", + "groups": [ + { + "id": "introduction-group", + "title": "General Business Information", + "blocks": [ + { + "id": "introduction", + "type": "Introduction", + "primary_content": [ + { + "id": "business-details", + "title": "Introduction with valid and invalid HTML", + "contents": [ + { + "guidance": { + "contents": [ + { + "title": "Coronavirus (COVID-19) guidance", + "description": "Explain your figures in the comment section to minimise us contacting you and to help us tell an industry story" + } + ] + } + } + ] + } + ] + }, + { + "type": "Interstitial", + "id": "intersitital-one", + "content": { + "title": "Page with invalid html", + "contents": [ + { + "description": "

You have successfully completed this section

" + } + ] + } + }, + { + "type": "Interstitial", + "id": "interstitial-two", + "content": { + "title": "Page with link", + "contents": [ + { + "description": "Anchor" + } + ] + } + }, + { + "type": "Interstitial", + "id": "interstitial-three", + "content": { + "title": "Page with mixed invalid tags", + "contents": [ + { + "description": "

Title

Not valid tag" + } + ] + } + }, + { + "type": "Interstitial", + "id": "interstitial-four", + "content": { + "title": "Valid double strong with another strong.", + "contents": [ + { + "description": "TitleNot valid tag" + } + ] + } + }, + { + "type": "Interstitial", + "id": "interstitial-five", + "content": { + "title": "Valid double anchor.", + "contents": [ + { + "description": "Title and Not valid tag" + } + ] + } + } + ] + } + ] + } + ] +} diff --git a/tests/schemas/valid/test_introduction_with_guidance.json b/tests/schemas/valid/test_introduction_with_guidance.json index bb2e21b5..a3bbab15 100644 --- a/tests/schemas/valid/test_introduction_with_guidance.json +++ b/tests/schemas/valid/test_introduction_with_guidance.json @@ -72,7 +72,7 @@ "title": "Section complete", "contents": [ { - "description": "

You have successfully completed this section

" + "description": "You have successfully completed this section" } ] } diff --git a/tests/test_questionnaire_validator.py b/tests/test_questionnaire_validator.py index d60a2410..d6c4089d 100644 --- a/tests/test_questionnaire_validator.py +++ b/tests/test_questionnaire_validator.py @@ -340,6 +340,43 @@ def test_invalid_whitespaces_in_schema(): assert validator.errors == expected_error_messages +def test_invalid_html_in_schema(): + filename = "schemas/invalid/test_invalid_html_in_schema_text.json" + + validator = QuestionnaireValidator(_open_and_load_schema_file(filename)) + + expected_error_messages = [ + { + "message": error_messages.HTML_FOUND, + "pointer": "/sections/0/groups/0/blocks/3/content/title", + "text": "Page with mixed invalid tags", + }, + { + "message": error_messages.HTML_FOUND, + "pointer": "/sections/0/groups/0/blocks/1/content/contents/0/description", + "text": "

You have successfully completed this section

", + }, + { + "message": error_messages.HTML_FOUND, + "pointer": "/sections/0/groups/0/blocks/3/content/contents/0/description", + "text": "

Title

Not valid tag", + }, + { + "message": error_messages.HTML_FOUND, + "pointer": "/sections/0/groups/0/blocks/4/content/contents/0/description", + "text": "TitleNot valid tag", + }, + { + "message": error_messages.HTML_FOUND, + "pointer": "/sections/0/groups/0/blocks/0/primary_content/0/contents/0/guidance/contents/0/title", + "text": "Coronavirus (COVID-19) guidance", + }, + ] + validator.validate_html() + + assert validator.errors == expected_error_messages + + def test_invalid_answer_type_for_question_summary_concatenation(): filename = "schemas/invalid/test_invalid_answer_type_for_question_summary.json"