diff --git a/app/error_messages.py b/app/error_messages.py
index 7ef737e2..fd73c438 100644
--- a/app/error_messages.py
+++ b/app/error_messages.py
@@ -1,4 +1,5 @@
DUMB_QUOTES_FOUND = "Found dumb quotes(s) in schema text"
+HTML_FOUND = "Found invalid HTML in schema text"
INVALID_WHITESPACE_FOUND = "Found invalid white space(s) in schema text"
DUPLICATE_ID_FOUND = "Duplicate id found"
FOR_LIST_NEVER_POPULATED = "for_list is not populated by any ListCollector blocks or supplementary data sources"
diff --git a/app/validators/questionnaire_validator.py b/app/validators/questionnaire_validator.py
index a4d4bec6..1ff7a96d 100644
--- a/app/validators/questionnaire_validator.py
+++ b/app/validators/questionnaire_validator.py
@@ -35,6 +35,7 @@ def validate(self):
self.validate_duplicates()
self.validate_smart_quotes()
self.validate_white_spaces()
+ self.validate_html()
self.validate_answer_references()
self.validate_list_references()
@@ -114,6 +115,47 @@ def validate_smart_quotes(self):
pointer=translatable_item.pointer,
)
+ def validate_html(self):
+ html_strings = []
+ schema_object = SurveySchema(self.schema_element)
+
+ # pylint: disable=invalid-string-quote
+ html_regex = re.compile(r"<[^>]*>")
+
+ for translatable_item in schema_object.translatable_items:
+ schema_text = translatable_item.value
+
+ values_to_check = [schema_text]
+
+ if isinstance(schema_text, dict):
+ values_to_check = schema_text.values()
+
+ html_strings.extend(
+ {"pointer": translatable_item.pointer, "text": schema_text}
+ for schema_text in values_to_check
+ if schema_text and html_regex.search(schema_text)
+ )
+ if html_strings:
+ self.check_invalid_html_tags(html_strings)
+
+ def check_invalid_html_tags(self, html_strings):
+ strong = re.compile(r"(?:(?!).)*")
+ anchor = re.compile(r"]*>.*?")
+ all_tags = re.compile(
+ r"<([a-z0-9]+)(?=[\s>])(?:[^>=]|='[^']*'|=\"[^\"]*\"|=[^'\"\s]*)*\s?/?>"
+ )
+
+ for html_string in html_strings:
+ if len(strong.findall(html_string["text"])) + len(
+ anchor.findall(html_string["text"])
+ ) != len(all_tags.findall(html_string["text"])):
+
+ self.add_error(
+ error_messages.HTML_FOUND,
+ pointer=html_string["pointer"],
+ text=html_string["text"],
+ )
+
def validate_white_spaces(self):
schema_object = SurveySchema(self.schema_element)
diff --git a/tests/schemas/invalid/test_invalid_html_in_schema_text.json b/tests/schemas/invalid/test_invalid_html_in_schema_text.json
new file mode 100644
index 00000000..3e387f32
--- /dev/null
+++ b/tests/schemas/invalid/test_invalid_html_in_schema_text.json
@@ -0,0 +1,133 @@
+{
+ "mime_type": "application/json/ons/eq",
+ "language": "en",
+ "schema_version": "0.0.1",
+ "data_version": "0.0.3",
+ "survey_id": "144",
+ "theme": "default",
+ "title": "Test invalid html",
+ "legal_basis": "Notice is given under section 999 of the Test Act 2000",
+ "metadata": [
+ {
+ "name": "user_id",
+ "type": "string"
+ },
+ {
+ "name": "period_id",
+ "type": "string"
+ },
+ {
+ "name": "ru_name",
+ "type": "string"
+ },
+ {
+ "name": "ru_ref",
+ "type": "string"
+ },
+ {
+ "name": "trad_as",
+ "type": "string",
+ "optional": true
+ }
+ ],
+ "questionnaire_flow": {
+ "type": "Linear",
+ "options": {}
+ },
+ "sections": [
+ {
+ "id": "introduction-section",
+ "title": "Introduction",
+ "groups": [
+ {
+ "id": "introduction-group",
+ "title": "General Business Information",
+ "blocks": [
+ {
+ "id": "introduction",
+ "type": "Introduction",
+ "primary_content": [
+ {
+ "id": "business-details",
+ "title": "Introduction with valid and invalid HTML",
+ "contents": [
+ {
+ "guidance": {
+ "contents": [
+ {
+ "title": "
You have successfully completed this section
"
+ }
+ ]
+ }
+ },
+ {
+ "type": "Interstitial",
+ "id": "interstitial-two",
+ "content": {
+ "title": "Page with link",
+ "contents": [
+ {
+ "description": "Anchor"
+ }
+ ]
+ }
+ },
+ {
+ "type": "Interstitial",
+ "id": "interstitial-three",
+ "content": {
+ "title": "Page with mixed invalid tags",
+ "contents": [
+ {
+ "description": " You have successfully completed this section You have successfully completed this section ",
+ },
+ {
+ "message": error_messages.HTML_FOUND,
+ "pointer": "/sections/0/groups/0/blocks/3/content/contents/0/description",
+ "text": "Title
Not valid tag"
+ }
+ ]
+ }
+ },
+ {
+ "type": "Interstitial",
+ "id": "interstitial-four",
+ "content": {
+ "title": "Valid double strong with another strong.",
+ "contents": [
+ {
+ "description": "TitleNot valid tag"
+ }
+ ]
+ }
+ },
+ {
+ "type": "Interstitial",
+ "id": "interstitial-five",
+ "content": {
+ "title": "Valid double anchor.",
+ "contents": [
+ {
+ "description": "Title and Not valid tag"
+ }
+ ]
+ }
+ }
+ ]
+ }
+ ]
+ }
+ ]
+}
diff --git a/tests/schemas/valid/test_introduction_with_guidance.json b/tests/schemas/valid/test_introduction_with_guidance.json
index bb2e21b5..a3bbab15 100644
--- a/tests/schemas/valid/test_introduction_with_guidance.json
+++ b/tests/schemas/valid/test_introduction_with_guidance.json
@@ -72,7 +72,7 @@
"title": "Section complete",
"contents": [
{
- "description": "Title
Not valid tag",
+ },
+ {
+ "message": error_messages.HTML_FOUND,
+ "pointer": "/sections/0/groups/0/blocks/4/content/contents/0/description",
+ "text": "TitleNot valid tag",
+ },
+ {
+ "message": error_messages.HTML_FOUND,
+ "pointer": "/sections/0/groups/0/blocks/0/primary_content/0/contents/0/guidance/contents/0/title",
+ "text": "