Skip to content

Commit c1c28c3

Browse files
authored
Merge pull request #251 from WycliffeAssociates/adjust-frontend-tests-to-pass
Update for STET
2 parents 8d00f7b + 7818d4b commit c1c28c3

File tree

12 files changed

+157
-79
lines changed

12 files changed

+157
-79
lines changed

backend/doc/domain/parsing.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1140,9 +1140,10 @@ def lookup_verse_text(usfm_book: USFMBook, chapter_num: int, verse_ref: str) ->
11401140
return ""
11411141
verse = chapter.verses.get(verse_ref, "")
11421142
logger.info(
1143-
"lang_code: %s, book_code: %s, chapter_num: %s, verse_num: %s, verse: %s",
1143+
"lang_code: %s, book_code: %s, national_book_name: %s, chapter_num: %s, verse_num: %s, verse: %s",
11441144
usfm_book.lang_code,
11451145
usfm_book.book_code,
1146+
usfm_book.national_book_name,
11461147
chapter_num,
11471148
verse_ref,
11481149
verse,

backend/doc/domain/resource_lookup.py

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -177,6 +177,7 @@
177177
# use.
178178
BOOK_NAME_CORRECTION_TABLE: dict[tuple[str, str], str] = {
179179
("pt-br", "1 Corintios"): "1 Coríntios",
180+
("es-419", "I juan"): "1 Juan",
180181
}
181182

182183
# List of languages which do not have USFM available for any books. We use this
@@ -1039,9 +1040,7 @@ def maybe_correct_book_name(
10391040
"""
10401041
Translate incorrect or undesirable book names to a preferred form.
10411042
"""
1042-
logger.debug("book_name to lookup: %s", book_name)
10431043
book_name_ = BOOK_NAME_CORRECTION_TABLE.get((lang_code, book_name), "")
1044-
logger.debug("result from book_name_correction_table: %s", book_name_)
10451044
if not book_name_:
10461045
book_name_ = book_name
10471046
return book_name_

backend/stet/data/stet_es-419.docx

1.27 KB
Binary file not shown.

backend/stet/data/stet_pt-br.docx

1.79 KB
Binary file not shown.

backend/stet/domain/document_generator.py

Lines changed: 63 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
from datetime import datetime
12
from typing import Mapping, Sequence
23

34
import mistune
@@ -28,6 +29,12 @@
2829
from pydantic import Json
2930
from stet.domain.model import VerseEntry, WordEntry
3031
from stet.domain.parser import get_word_entry_dtos
32+
from stet.domain.strings import (
33+
LOCALIZED_DATE_FORMAT_STRINGS,
34+
TRANSLATED_FOOTER_PHRASES_TABLE,
35+
TRANSLATED_HEADER_PHRASES_TABLE,
36+
TRANSLATED_TABLE_COLUMN_HEADERS,
37+
)
3138
from stet.utils.docx_utils import (
3239
add_footer,
3340
add_header,
@@ -59,11 +66,13 @@ def generate_docx_document(
5966
>>> generate_docx_document()
6067
"""
6168
word_entries: list[WordEntry] = []
62-
word_entry_dtos, book_codes_and_names = get_word_entry_dtos(lang0_code, lang1_code)
69+
word_entry_dtos, lang0_book_codes_and_names = get_word_entry_dtos(
70+
lang0_code, lang1_code
71+
)
6372
lang0_resource_types = resource_types(
6473
lang0_code,
6574
",".join(
66-
[book_code_and_name[0] for book_code_and_name in book_codes_and_names]
75+
[book_code_and_name[0] for book_code_and_name in lang0_book_codes_and_names]
6776
),
6877
)
6978
lang0_resource_types_ = [
@@ -73,7 +82,7 @@ def generate_docx_document(
7382
lang1_resource_types = resource_types(
7483
lang1_code,
7584
",".join(
76-
[book_code_and_name[0] for book_code_and_name in book_codes_and_names]
85+
[book_code_and_name[0] for book_code_and_name in lang0_book_codes_and_names]
7786
),
7887
)
7988
lang1_resource_types_ = [
@@ -115,7 +124,7 @@ def generate_docx_document(
115124
if lang0_usfm_resource_type and lang1_usfm_resource_type:
116125
source_usfm_book = None
117126
target_usfm_book = None
118-
for book_code, book_name in book_codes_and_names:
127+
for book_code, book_name in lang0_book_codes_and_names:
119128
current_task.update_state(state="Locating assets")
120129
lang0_resource_lookup_dto_ = resource_lookup_dto(
121130
lang0_code, lang0_usfm_resource_type, book_code
@@ -196,23 +205,13 @@ def generate_docx_document(
196205
)
197206
if target_selected_usfm_books:
198207
target_selected_usfm_book = target_selected_usfm_books[0]
199-
for verse_ref in verse_ref_dto.verse_refs:
200-
if source_selected_usfm_book:
201-
source_verse_text = lookup_verse_text(
202-
source_selected_usfm_book,
203-
verse_ref_dto.chapter_num,
204-
verse_ref.strip(),
205-
)
206-
else:
207-
source_verse_text = ""
208-
if target_selected_usfm_book:
209-
target_verse_text = lookup_verse_text(
210-
target_selected_usfm_book,
211-
verse_ref_dto.chapter_num,
212-
verse_ref.strip(),
213-
)
214-
else:
215-
target_verse_text = ""
208+
target_selected_usfm_book.national_book_name = maybe_correct_book_name(
209+
lang1_code, target_selected_usfm_book.national_book_name
210+
)
211+
logger.debug(
212+
"target_usfm_book.national_book_name: %s",
213+
target_selected_usfm_book.national_book_name,
214+
)
216215
non_book_name_portion_of_source_reference = extract_chapter_and_beyond(
217216
verse_ref_dto.source_reference
218217
)
@@ -231,6 +230,23 @@ def generate_docx_document(
231230
and non_book_name_portion_of_target_reference
232231
else verse_ref_dto.target_reference
233232
)
233+
for verse_ref in verse_ref_dto.verse_refs:
234+
if source_selected_usfm_book:
235+
source_verse_text = lookup_verse_text(
236+
source_selected_usfm_book,
237+
verse_ref_dto.chapter_num,
238+
verse_ref.strip(),
239+
)
240+
else:
241+
source_verse_text = ""
242+
if target_selected_usfm_book:
243+
target_verse_text = lookup_verse_text(
244+
target_selected_usfm_book,
245+
verse_ref_dto.chapter_num,
246+
verse_ref.strip(),
247+
)
248+
else:
249+
target_verse_text = ""
234250
word_entry.verses.append(
235251
VerseEntry(
236252
source_reference=localized_source_reference,
@@ -246,7 +262,16 @@ def generate_docx_document(
246262

247263

248264
def generate_docx(
249-
word_entries: list[WordEntry], docx_filepath: str, lang0_code: str, lang1_code: str
265+
word_entries: list[WordEntry],
266+
docx_filepath: str,
267+
lang0_code: str,
268+
lang1_code: str,
269+
translated_table_column_headers: dict[
270+
str, tuple[str, str, str, str]
271+
] = TRANSLATED_TABLE_COLUMN_HEADERS,
272+
translated_footer_phrases_table: dict[str, str] = TRANSLATED_FOOTER_PHRASES_TABLE,
273+
localized_date_format_strings: dict[str, str] = LOCALIZED_DATE_FORMAT_STRINGS,
274+
translated_header_phrases_table: dict[str, str] = TRANSLATED_HEADER_PHRASES_TABLE,
250275
) -> None:
251276
"""
252277
Generates a DOCX document from a list of word entries and saves it to the given file path.
@@ -273,9 +298,9 @@ def generate_docx(
273298
table.style = "Table Grid"
274299
# Set the header of the table and apply bold formatting
275300
hdr_cells = table.rows[0].cells
276-
hdr_cells[0].text = "Source Reference"
277-
hdr_cells[1].text = "Target Reference"
278-
hdr_cells[2].text = "Status"
301+
hdr_cells[0].text = translated_table_column_headers[lang0_code][0]
302+
hdr_cells[1].text = translated_table_column_headers[lang0_code][1]
303+
hdr_cells[2].text = translated_table_column_headers[lang0_code][2]
279304
hdr_cells[2].paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
280305
for hdr_cell in hdr_cells:
281306
hdr_cell.paragraphs[0].runs[0].bold = True
@@ -287,7 +312,11 @@ def generate_docx(
287312
source_run.bold = True
288313
target_run = row_cells[1].paragraphs[0].add_run(verse.target_reference)
289314
target_run.bold = True
290-
status_run = row_cells[2].paragraphs[0].add_run("OK")
315+
status_run = (
316+
row_cells[2]
317+
.paragraphs[0]
318+
.add_run(translated_table_column_headers[lang0_code][3])
319+
)
291320
status_run.bold = True
292321
row_cells[2].paragraphs[0].alignment = WD_PARAGRAPH_ALIGNMENT.CENTER
293322
# Row for texts
@@ -319,8 +348,14 @@ def generate_docx(
319348
tcPr.append(vAlign) # Append the vertical alignment to cell properties
320349
# Adjust column widths to prioritize the first two columns
321350
adjust_table_columns(table)
322-
doc = add_footer(doc)
323-
doc = add_header(doc, lang0_code, lang1_code)
351+
footer_phrase = translated_footer_phrases_table[lang0_code]
352+
current_datetime = datetime.now().strftime(
353+
localized_date_format_strings[lang0_code]
354+
)
355+
date_text = f"{footer_phrase} {current_datetime}"
356+
doc = add_footer(doc, date_text)
357+
header_phrase = translated_header_phrases_table[lang0_code]
358+
doc = add_header(doc, lang0_code, lang1_code, header_phrase)
324359
doc = add_lined_page_at_end(doc)
325360
reduce_spacing_around_tables(doc)
326361
doc.save(docx_filepath)

backend/stet/domain/parser.py

Lines changed: 27 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,9 @@ def get_word_entry_dtos(
1818
) -> tuple[list[WordEntryDto], list[tuple[str, str]]]:
1919
# Build data from source doc
2020
word_entry_dtos: list[WordEntryDto] = []
21-
book_codes_and_names__: list[tuple[str, str]] = []
21+
lang0_book_codes_and_names = book_codes_for_lang_from_usfm_only(lang0_code)
22+
lang1_book_codes_and_names = book_codes_for_lang_from_usfm_only(lang1_code)
23+
lang0_book_codes_and_names__: list[tuple[str, str]] = []
2224
doc = Document(f"{stet_dir}/stet_{lang0_code}.docx")
2325
for table in doc.tables:
2426
for row in table.rows:
@@ -59,17 +61,12 @@ def get_word_entry_dtos(
5961
book_name = match.group(1)
6062
# Some languages, e.g., bem, have a \n in the book name
6163
book_name = book_name.replace("\n", "")
62-
# Get book codes and names for the language that has been
63-
# requested from DOC.
64-
book_codes_and_names = book_codes_for_lang_from_usfm_only(
65-
lang0_code
66-
)
6764
# We expect this book name to be in localized form according to the
6865
# language of the STET input document (as indicated by the input
6966
# document's filename, stet_[ietf_code].docx).
7067
book_codes_and_names_ = [
7168
(book_code, book_name_)
72-
for book_code, book_name_ in book_codes_and_names
69+
for book_code, book_name_ in lang0_book_codes_and_names
7370
if book_name_
7471
== book_name # Check if DOC and STET input doc agree on book name
7572
]
@@ -85,7 +82,7 @@ def get_word_entry_dtos(
8582
book_codes_and_names_[0] if book_codes_and_names_ else None
8683
)
8784
if book_code_and_name_:
88-
book_codes_and_names__.append(book_code_and_name_)
85+
lang0_book_codes_and_names__.append(book_code_and_name_)
8986
chapter_num = int(match.group(2))
9087
verses = match.group(3)
9188
comment = match.group(4)
@@ -95,7 +92,23 @@ def get_word_entry_dtos(
9592
)
9693
else:
9794
source_reference = f"{book_name} {chapter_num}:{verses}"
98-
target_reference = f"{book_name} {chapter_num}:{verses}"
95+
lang0_book_code = (
96+
book_code_and_name_[0] if book_code_and_name_ else ""
97+
)
98+
lang1_book_code_and_name_ = next(
99+
(
100+
lang1_book_code_and_name
101+
for lang1_book_code_and_name in lang1_book_codes_and_names
102+
if lang1_book_code_and_name[0] == lang0_book_code
103+
),
104+
None,
105+
)
106+
lang1_book_name = (
107+
lang1_book_code_and_name_[1]
108+
if lang1_book_code_and_name_
109+
else ""
110+
)
111+
target_reference = f"{lang1_book_name} {chapter_num}:{verses}"
99112
verse_refs: list[str] = verses.split(",")
100113
valid_verse_refs: list[str] = []
101114
for verse_ref in verse_refs:
@@ -129,4 +142,8 @@ def get_word_entry_dtos(
129142
keyword.strip() for keyword in row.cells[3].text.split(",")
130143
]
131144
word_entry_dtos.append(word_entry_dto)
132-
return word_entry_dtos, list(set(book_codes_and_names__))
145+
# Sort word entry dtos by first word in word list
146+
sorted_word_entry_dtos = sorted(
147+
word_entry_dtos, key=lambda word_entry_dto: word_entry_dto.words[0]
148+
)
149+
return sorted_word_entry_dtos, list(set(lang0_book_codes_and_names__))

backend/stet/domain/strings.py

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,23 @@
1+
TRANSLATED_HEADER_PHRASES_TABLE: dict[str, str] = {
2+
"en": "Spiritual Terms Evaluation Tool (STET)",
3+
"es-419": "Herramienta de Evaluación de Términos Espirituales (STET)",
4+
"pt-br": "Ferramenta de Avaliação de Termos Espirituais (STET)",
5+
}
6+
7+
TRANSLATED_FOOTER_PHRASES_TABLE: dict[str, str] = {
8+
"en": "Generated on",
9+
"es-419": "Generado el",
10+
"pt-br": "Gerado em",
11+
}
12+
13+
LOCALIZED_DATE_FORMAT_STRINGS: dict[str, str] = {
14+
"en": "%m/%d/%Y %H:%M:%S",
15+
"es-419": "%d/%m/%Y %H:%M:%S",
16+
"pt-br": "%d/%m/%Y %H:%M:%S",
17+
}
18+
19+
TRANSLATED_TABLE_COLUMN_HEADERS = {
20+
"en": ("Source Reference", "Target Reference", "Status", "OK"),
21+
"es-419": ("Referencia de origen", "Referencia de destino", "Estado", "OK"),
22+
"pt-br": ("Referência de Origem", "Referência de Destino", "Status", "OK"),
23+
}

backend/stet/utils/docx_utils.py

Lines changed: 2 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,4 @@
11
import re
2-
from datetime import datetime
32
from typing import Optional
43

54
from docx import Document # type: ignore
@@ -114,7 +113,7 @@ def add_header(
114113
doc: Document,
115114
source_lang_code: str,
116115
target_lang_code: str,
117-
header_text: str = "Spiritual Terms Evaluation Tool",
116+
header_text: str = "Spiritual Terms Evaluation Tool (STET)",
118117
) -> Document:
119118
"""
120119
Add a header with:
@@ -148,8 +147,6 @@ def add_header(
148147
return doc
149148

150149

151-
152-
153150
def add_highlighted_html_to_docx_for_words(
154151
html: str, paragraph: Paragraph, keywords: list[str]
155152
) -> None:
@@ -288,7 +285,7 @@ def set_spacing(
288285
previous_element = element
289286

290287

291-
def add_footer(doc: Document) -> Document:
288+
def add_footer(doc: Document, date_text: str) -> Document:
292289
"""
293290
Programmatically add page numbers and a date timestamp in the footer.
294291
Page number will be centered, and the date timestamp will be aligned to the right
@@ -338,8 +335,6 @@ def add_footer(doc: Document) -> Document:
338335
page_run.font.color.rgb = RGBColor(169, 169, 169) # Grey color for page number
339336
# Add the "Generated on" text
340337
footer_paragraph.add_run("\t") # Tab to right position
341-
current_datetime = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
342-
date_text = f"Generated on {current_datetime}"
343338
date_run = footer_paragraph.add_run(date_text)
344339
date_run.font.color.rgb = RGBColor(169, 169, 169) # Grey color for timestamp
345340
date_run.font.size = Pt(10) # Optional: Adjust font size for consistency

backend/stet/utils/util.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -11,7 +11,7 @@ def is_valid_int(text: str) -> bool:
1111

1212

1313
def extract_chapter_and_beyond(text: str) -> Optional[str]:
14-
# Regular expression to match "<chapter_num>:<verse_num> [comment]"
14+
# Regular expression to match "<chapter_num>:<verse_num> (comment)"
1515
match = re.search(r"\d+:\d+(\s*\(\*\*?\))?$", text)
1616
if match:
1717
return match.group()

frontend/playwright.config.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@ import type { PlaywrightTestConfig } from '@playwright/test'
33
const config: PlaywrightTestConfig = {
44
testDir: 'tests',
55
testMatch: '**/*.ts',
6-
timeout: 240000 // Set global timeout to 4 minutes
6+
timeout: 640000 // Set global timeout
77
}
88

99
export default config

0 commit comments

Comments
 (0)