diff --git a/.github/workflows/build.yaml b/.github/workflows/build.yaml index 24399e06..9739672d 100644 --- a/.github/workflows/build.yaml +++ b/.github/workflows/build.yaml @@ -17,7 +17,7 @@ jobs: run: ./configure --disable-static --enable-python-bindings - name: build run: make -j4 V=1 VERBOSE=1 - - name: tests - run: make test - name: make install run: sudo make install + - name: tests + run: make test diff --git a/.gitignore b/.gitignore index bda2353a..3b17de9f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,7 @@ *.pyc **/*.deps/ /build/ +/.vscode/ /.ccls-cache/ /*.pc /aclocal.m4 @@ -79,6 +80,7 @@ /lttoolbox/lt-invert /lttoolbox/lt-restrict /lttoolbox/lt-apply-acx +/lttoolbox/formatmsg /python/Makefile /python/Makefile.in /python/lttoolbox.i @@ -86,6 +88,10 @@ /python/lttoolbox.py /python/setup.py /python/build* +/locales/Makefile +/locales/Makefile.in +*.res +*.dat *.egg-info/ *.egg **/.mypy_cache/ diff --git a/Makefile.am b/Makefile.am index 13a77794..b307c465 100644 --- a/Makefile.am +++ b/Makefile.am @@ -1,6 +1,6 @@ ACLOCAL_AMFLAGS=-I m4 -SUBDIRS = $(PACKAGE_NAME) +SUBDIRS = $(PACKAGE_NAME) locales DIST_SUBDIRS = $(PACKAGE_NAME) if HAVE_PYTHON_BINDINGS @@ -15,3 +15,6 @@ EXTRA_DIST=autogen.sh tests # TODO: the below will use python3 if you run it on Arch Linux with no python2 installed test: tests/run_tests.py $(PYTHON) $< + +export LOCALES_DIR=$(datadir)/$(PACKAGE_NAME) +export ALT_I18N_DATA=$(LOCALES_DIR)/lttoolbox.dat diff --git a/configure.ac b/configure.ac index cd691dcf..06638075 100644 --- a/configure.ac +++ b/configure.ac @@ -105,4 +105,4 @@ then PYTHON_INSTALL_PARAMS="--prefix=\$(prefix) --root=\$(DESTDIR)/" fi -AC_OUTPUT([Makefile lttoolbox.pc lttoolbox/Makefile python/Makefile]) +AC_OUTPUT([Makefile lttoolbox.pc lttoolbox/Makefile python/Makefile locales/Makefile]) diff --git a/locales/Makefile.am b/locales/Makefile.am new file mode 100644 index 00000000..76c2f556 --- /dev/null +++ b/locales/Makefile.am @@ -0,0 +1,7 @@ +lttoolbox.dat: root.txt en.txt es.txt + genrb -d . root.txt en.txt es.txt + echo root.res en.res es.res > package_list.txt + pkgdata -p lttoolbox --mode archive -d . package_list.txt + +localesdir = $(LOCALES_DIR) +dist_locales_DATA = lttoolbox.dat diff --git a/locales/en.txt b/locales/en.txt new file mode 100644 index 00000000..0bd10e4a --- /dev/null +++ b/locales/en.txt @@ -0,0 +1,2 @@ +en{ +} \ No newline at end of file diff --git a/locales/es.txt b/locales/es.txt new file mode 100644 index 00000000..9ca9c6a5 --- /dev/null +++ b/locales/es.txt @@ -0,0 +1,31 @@ +es{ + lt_proc_desc{"procesar una corriente con un transductor de letras"} + analysis_desc{"análisis morfológico (comportamiento predeterminado)"} + bilingual_desc{"transferencia léxica"} + case_sensitive_desc{"utilizar el caso literal de los caracteres entrantes"} + debugged_gen_desc{"transformarse. generación con todas las cosas"} + decompose_nouns_desc{"Intenta descomponer palabras desconocidas."} + generation_desc{"generación morfológica"} + ignored_chars_desc{"especificar archivo con caracteres para ignorar"} + restore_chars_desc{"especificar archivo con caracteres para restauración diacrítica"} + tagged_gen_desc{"generación morfológica manteniendo formas léxicas"} + tagged_nm_gen_desc{"igual que -l pero sin marcas denominativas desconocidas"} + non_marked_gen_desc{"transformarse. generación sin marcas denominativas desconocidas"} + surf_bilingual_desc{"transferencia léxica con formas superficiales"} + post_generation_desc{"posgeneración"} + inter_generation_desc{"intergeneracional"} + sao_desc{"Procesamiento de entrada del sistema de anotaciones SAO"} + transliteration_desc{"aplicar diccionario de transliteración"} + version_desc{"versión"} + null_flush_desc{"salida de vaciado en el carácter nulo"} + dictionary_case_desc{"usar mayúsculas y minúsculas del diccionario en lugar de superficie"} + careful_case_desc{"usar mayúsculas y minúsculas del diccionario si están presentes; de lo contrario, superficie"} + no_default_ignore_desc{"omite la carga de los caracteres ignorados predeterminados"} + show_weights_desc{"Imprimir los pesos del análisis final (si corresponde)"} + analyses_desc{"No generar más de N análisis (si el transductor está ponderado, los N mejores análisis)"} + weight_classes_desc{"No generar más de N mejores clases de ponderación (donde los análisis con igual ponderación constituyen una clase)"} + compound_max_elements_desc{"Establecer elementos máximos compuestos"} + help_desc{"muestra esta ayuda"} + usage{"USO: "} + version{" versión "} +} diff --git a/locales/package_list.txt b/locales/package_list.txt new file mode 100644 index 00000000..4f8959e0 --- /dev/null +++ b/locales/package_list.txt @@ -0,0 +1 @@ +root.res en.res es.res diff --git a/locales/root.txt b/locales/root.txt new file mode 100644 index 00000000..9ba4c722 --- /dev/null +++ b/locales/root.txt @@ -0,0 +1,158 @@ +root{ + lt_proc_desc{"process a stream with a letter transducer"} + analysis_desc{"morphological analysis (default behavior)"} + bilingual_desc{"lexical transfer"} + case_sensitive_desc{"use the literal case of the incoming characters"} + debugged_gen_desc{"morph. generation with all the stuff"} + decompose_nouns_desc{"Try to decompound unknown words"} + generation_desc{"morphological generation"} + ignored_chars_desc{"specify file with characters to ignore"} + restore_chars_desc{"specify file with characters to diacritic restoration"} + tagged_gen_desc{"morphological generation keeping lexical forms"} + tagged_nm_gen_desc{"same as -l but without unknown word marks"} + non_marked_gen_desc{"morph. generation without unknown word marks"} + surf_bilingual_desc{"lexical transfer with surface forms"} + post_generation_desc{"post-generation"} + inter_generation_desc{"inter-generation"} + sao_desc{"SAO annotation system input processing"} + transliteration_desc{"apply transliteration dictionary"} + version_desc{"version"} + null_flush_desc{"flush output on the null character"} + dictionary_case_desc{"use dictionary case instead of surface"} + careful_case_desc{"use dictionary case if present, else surface"} + no_default_ignore_desc{"skips loading the default ignore characters"} + show_weights_desc{"Print final analysis weights (if any)"} + analyses_desc{"Output no more than N analyses (if the transducer is weighted, the N best analyses)"} + weight_classes_desc{"Output no more than N best weight classes (where analyses with equal weight constitute a class)"} + compound_max_elements_desc{"Set compound max elements"} + help_desc{"show this help"} + usage{"USAGE: "} + version{" version "} + modes{"Modes:"} + options{"Options:"} + + + lt_append_desc{"add sections to a compiled transducer"} + keep_desc{"in case of section name conflicts, keep the one from the first transducer"} + single_desc{"treat input transducers as one-sided"} + + lt_apply_acx_desc{"apply an ACX file to a compiled transducer"} + + lt_comp_desc{"build a letter transducer from a dictionary"} + debug_desc{"insert line numbers before each entry"} + keep_boundaries_desc{"keep morpheme boundaries"} + var_desc{"set language variant"} + alt_desc{"set alternative (monodix)"} + var_left_desc{"set left language variant (bidix)"} + var_right_desc{"set right language variant (bidix)"} + expect_hfst_desc{"expect HFST symbols"} + no_split_desc{"don't attempt to split into word and punctuation sections"} + jobs_desc{"use one cpu core per section when minimising, new section after 50k entries"} + verbose_desc{"compile verbosely"} + + lt_compose_desc{"compose transducer1 with transducer2"} + inverted_desc{"run composition right-to-left on transducer1"} + anywhere_desc{"don't require anchored matches, let transducer2 optionally compose at any sub-path"} + + lt_expand{"expand the contents of a dictionary file"} + lt_invert_desc{"reverse the direction of a compiled transducer"} + + lt_paradigm_desc{"generate listings from a compiled transducer"} + analyser_desc{"FST is an analyser (tags on the right)"} + exclude_desc{"disregard paths containing TAG"} + sort_desc{"alphabetize the paths for each pattern"} + + lt_print_desc{"dump a transducer to text in ATT format"} + alpha_desc{"print transducer alphabet"} + use_hfst_desc{"use HFST-compatible character escapes"} + + lt_restrict_desc{"remove paths from a transducer"} + minimise_desc{"minimise transducers after deleting paths"} + + lt_tmxcomp_desc{"build a letter transducer from a TMX translation memory"} + origin_code_desc{"the language code to be taken as lang1"} + meta_code_desc{"the language code to be taken as lang2"} + input_language{"input language"} + output_language{"output language"} + + lt_tmxproc_desc{"process a stream with a letter transducer"} + + lt_trim_desc{"trim a transducer to another transducer"} + match_section_desc{"A section with this name (id@type) will only be trimmed against a section with the same name. This argument may be used multiple times."} + + ALT80000{"ERROR ALT80000: Invalid or no argument for {option}"} + ALT80010{"ERROR ALT80010: In {node_doc_url} on line {line_number}: Missing value attribute."} + ALT80020{"ERROR ALT80020: In {node_doc_url} on line {line_number}: Expected a single character in value attribute, but found {value_size}."} + ALT80030{"ERROR ALT80030: In {node_doc_url} on line {line_number}: Expected <{expected}> but found <{found}>."} + ALT80050{"ERROR ALT80050: Unable to access \"{file_name}\"."} + ALT80060{"ERROR ALT80060: Invalid format in file \"{file_name}\" on line {line_number}."} + ALT60070{"WARNING ALT60070: Multiple fsts in \"{file_name}\" will be disjuncted."} + ALT80080{"ERROR ALT80080: Transducer contains epsilon transition to a final state. Aborting."} + ALT80090{"ERROR ALT80090: Transducer contains initial epsilon loop. Aborting."} + ALT80100{"ERROR ALT80100: Cannot create empty buffer."} + ALT80110{"ERROR ALT80110: Parse error at the end of input."} + ALT80120{"ERROR ALT80120: Invalid dictionary (hint: the right side of an entry is empty)."} + ALT80121{"ERROR ALT80121: Invalid dictionary (hint: entry on the right beginning with whitespace)."} + ALT80122{"ERROR ALT80122: Invalid dictionary (hint: the left side of an entry is empty)."} + ALT80123{"ERROR ALT80123: Invalid dictionary (hint: entry on the left beginning with whitespace)."} + ALT80124{"ERROR ALT80124: Invalid dictionary (hint: entry on the beginning with whitespace)."} + ALT80140{"ERROR ALT80140: In file \"{file_name}\" on line {line_number}: Missing alphabet symbols."} + ALT60150{"WARNING ALT60150: Cannot insert from empty input. Ignoring. (You probably want to specify exact tags when deleting a word.)"} + ALT80160{"ERROR ALT80160: In file \"{file_name}\" on line {line_number}: Non-empty element \"<{name}>\" should be empty."} + ALT80170{"ERROR ALT80170: In file \"{file_name}\" on line {line_number}: Undefined symbol \"{symbol}\"."} + ALT80180{"ERROR ALT80180: In file \"{file_name}\" on line {line_number}: Invalid specification of element \"<{name}>\" in this context."} + ALT80190{"ERROR ALT80190: In file \"{file_name}\" on line {line_number}: Invalid construction."} + ALT80200{"ERROR ALT80200: In file \"{file_name}\" on line {line_number}: Expected \"<{slash_element}>\"."} + ALT60210{"WARNING ALT60210: In file \"{file_name}\" on line {line_number}: Entry begins with space."} + ALT80220{"ERROR ALT80220: In file \"{file_name}\" on line {line_number}: Paradigm refers to itself \"{paradigm_name}\"."} + ALT80230{"ERROR ALT80230: In file \"{file_name}\" on line {line_number}: Undefined paradigm \"{paradigm_name}\"."} + ALT80240{"ERROR ALT80240: In file \"{file_name}\" on line {line_number}: Invalid entry token."} + ALT80250{"ERROR ALT80250: In file \"{file_name}\" on line {line_number}: \"<{element_name}>\" element must specify non-void \"{attr_name}\" attribute."} + ALT80260{"ERROR ALT80260: In file \"{file_name}\" on line {line_number}: Parse error."} + ALT80270{"ERROR ALT80270: In file \"{file_name}\" on line {line_number}: Invalid inclusion of \"<{element_name}>\" into \"<{compiler_entry_element}>\"."} + ALT80280{"ERROR ALT80280: In file \"{file_name}\" on line {line_number}: Invalid node \"<{element_name}>\"."} + ALT80290{"ERROR ALT80290: I/O Error."} + ALT80300{"ERROR ALT80300: Out of range: {value}."} + ALT60320{"WARNING ALT60320: Matching case-sensitively since processor state size >= {max_case_insensitive_state_size}"} + ALT80330{"ERROR ALT80330: Unsupported transducer type for \"{transducer_first}\"."} + ALT60340{"WARNING ALT60340: CompoundAnalysis's MAX_COMBINATIONS exceeded for \"{input_word}\"\n" + " gave up at char {index} \"{char}\"."} + ALT60350{"WARNING ALT60350: Decomposition symbol {symbol} not found."} + ALT80360{"ERROR ALT80360: Unable to rewind file."} + ALT80370{"ERROR ALT80370: Unexpected trailing backslash."} + ALT60380{"WARNING ALT60380: section \"{section}\" appears in both transducers and will be overwritten!"} + ALT80390{"ERROR ALT80390: -l specified, but mode is lr."} + ALT80391{"ERROR ALT80391: -r specified, but mode is rl."} + ALT60410{"WARNING ALT60410: section {section_name} is empty! Skipping it..."} + ALT60420{"WARNING ALT60420: section {section_name} had no final state after composing! Skipping it..."} + ALT80430{"ERROR ALT80430: Composition gave empty transducer!"} + ALT60440{"WARNING ALT60440: unsupported locale, fallback to \"C\""} + ALT60450{"WARNING ALT60450: section {section_name} was not found in both transducers! Skipping if in just one..."} + ALT80460{"ERROR ALT80460: Trimming gave empty transducer!\n" + "Hint: There are no words in bilingual dictionary that match words in both monolingual dictionaries?"} + ALT80470{"ERROR ALT80470: Opening an unended sequence."} + ALT80471{"ERROR ALT80471: Ending an unopened sequence."} + ALT80490{"ERROR ALT80490: Using labels outside of a sequence."} + ALT80500{"ERROR ALT80500: Parsing regexp."} + ALT80510{"ERROR ALT80510: Unable to lowercase string \"{string}\".\n" + "Error code: {error_name}"} + ALT80511{"ERROR ALT80511: Unable to uppercase string \"{string}\".\n" + "Error code: {error_name}"} + ALT80512{"ERROR ALT80512: Unable to titlecase string \"{string}\".\n" + "Error code: {error_name}"} + ALT80513{"ERROR ALT80513: Caseless string comparison failed on \"{string_a}\" and \"{string_b}\".\n" + "Error code: {error_name}"} + ALT80550{"ERROR ALT80550: Trying to link nonexistent states ({source}, {target}, {tag})."} + ALT80560{"ERROR ALT80560: Empty set of final states."} + ALT80570{"ERROR ALT80570: Couldn't find {f_src}, {g_src} in state map."} + ALT80580{"ERROR ALT80580: Failed to read/write uint64_t."} + ALT80590{"ERROR ALT80590: Transducer has features that are unknown to this version of lttoolbox - upgrade!"} + ALT80600{"ERROR ALT80600: Unable to parse {type}."} + ALT80610{"ERROR ALT80610: Malformed input stream."} + ALT80620{"ERROR ALT80620: FST has features that are unknown to this version of lttoolbox - upgrade!"} + ALT80630{"ERROR ALT80630: Could not read {number} expected bytes from stream."} + ALT80640{"ERROR ALT80640: Can't deserialise {size} byte integer type: Can't deserialise size."} + ALT80650{"ERROR ALT80650: Can't deserialise {size} byte integer type: Can't deserialise byte."} + ALT80660{"ERROR ALT80660: Can't serialise const {size_a} byte integer type: Can't serialise size {size_b}."} + ALT80670{"ERROR ALT80670: Can't serialise const {size} byte integer type: Can't serialise byte {byte}."} +} diff --git a/lttoolbox/Makefile.am b/lttoolbox/Makefile.am index e4c1b657..5114049c 100644 --- a/lttoolbox/Makefile.am +++ b/lttoolbox/Makefile.am @@ -4,7 +4,7 @@ h_sources = acx.h alphabet.h att_compiler.h buffer.h cli.h compiler.h compressio match_exe.h match_node.h match_state.h my_stdio.h node.h \ pattern_list.h regexp_compiler.h serialiser.h sorted_vector.h state.h string_utils.h symbol_iter.h \ transducer.h trans_exe.h xml_parse_util.h xml_walk_util.h exception.h tmx_compiler.h \ - ustring.h sorted_vector.hpp + ustring.h sorted_vector.hpp i18n.h cc_sources = acx.cc alphabet.cc att_compiler.cc cli.cc compiler.cc compression.cc entry_token.cc \ expander.cc file_utils.cc fst_processor.cc input_file.cc lt_locale.cc match_exe.cc \ match_node.cc match_state.cc node.cc pattern_list.cc \ @@ -14,7 +14,7 @@ cc_sources = acx.cc alphabet.cc att_compiler.cc cli.cc compiler.cc compression.c library_includedir = $(includedir)/$(PACKAGE_NAME) library_include_HEADERS = $(h_sources) -bin_PROGRAMS = lt-comp lt-proc lt-expand lt-paradigm lt-tmxcomp lt-tmxproc lt-print lt-trim lt-compose lt-append lsx-comp lt-invert lt-restrict lt-apply-acx +bin_PROGRAMS = lt-comp lt-proc lt-expand lt-paradigm lt-tmxcomp lt-tmxproc lt-print lt-trim lt-compose lt-append lsx-comp lt-invert lt-restrict lt-apply-acx formatmsg instdir = lttoolbox lib_LTLIBRARIES= liblttoolbox.la @@ -29,6 +29,7 @@ lttoolboxlib = $(prefix)/lib lttoolbox_DATA = dix.dtd dix.rng dix.rnc acx.rng xsd/dix.xsd xsd/acx.xsd LDADD = liblttoolbox.la $(PTHREAD_LIBS) +AM_CPPFLAGS = -DALT_I18N_DATA='"$(ALT_I18N_DATA)"' AM_LDFLAGS = -llttoolbox $(LIBXML_LIBS) $(ICU_LIBS) lt_append_SOURCES = lt_append.cc @@ -45,6 +46,7 @@ lsx_comp_SOURCES = lt_comp.cc lt_invert_SOURCES = lt_invert.cc lt_restrict_SOURCES = lt_restrict.cc lt_apply_acx_SOURCES = lt_apply_acx.cc +formatmsg_SOURCES = formatmsg.cc #lt-validate-dictionary: Makefile.am validate-header.sh # @echo "Creating lt-validate-dictionary script" diff --git a/lttoolbox/acx.cc b/lttoolbox/acx.cc index 35fef11f..ec56fe22 100644 --- a/lttoolbox/acx.cc +++ b/lttoolbox/acx.cc @@ -16,6 +16,9 @@ */ #include #include +#include +#include +#include const xmlChar* CHAR_NODE = (const xmlChar*)"char"; const xmlChar* EQUIV_NODE = (const xmlChar*)"equiv-char"; @@ -23,33 +26,37 @@ const char* VALUE_ATTR = "value"; int32_t get_val(xmlNode* node) { + I18n i18n {ALT_I18N_DATA, "lttoolbox"}; UString s = getattr(node, VALUE_ATTR); if (s.empty()) { - error_and_die(node, "Missing value attribute."); + i18n.error("ALT80010", {"node_doc_url", "line_number"}, + {(char*)node->doc->URL, node->line}, true); } std::vector v; ustring_to_vec32(s, v); if (v.size() > 1) { - error_and_die(node, "Expected a single character in value attribute, but found %d.", v.size()); + i18n.error("ALT80020", {"node_doc_url", "line_number", "value_size"}, + {(char*)node->doc->URL, node->line, std::to_string(v.size()).c_str()}, true); } return v[0]; } std::map> readACX(const char* file) { + I18n i18n {ALT_I18N_DATA, "lttoolbox"}; std::map> acx; xmlNode* top_node = load_xml(file); for (auto char_node : children(top_node)) { if (!xmlStrEqual(char_node->name, CHAR_NODE)) { - error_and_die(char_node, "Expected but found <%s>.", - (const char*)char_node->name); + i18n.error("ALT80030", {"node_doc_url", "line_number", "expected", "found"}, + {(char*)char_node->doc->URL, char_node->line, "char", (const char*)char_node->name}, true); } int32_t key = get_val(char_node); sorted_vector vec; for (auto equiv_node : children(char_node)) { if (!xmlStrEqual(equiv_node->name, EQUIV_NODE)) { - error_and_die(char_node, "Expected but found <%s>.", - (const char*)equiv_node->name); + i18n.error("ALT80030", {"node_doc_url", "line_number", "expected", "found"}, + {(char*)char_node->doc->URL, char_node->line, "equiv-char", (const char*)equiv_node->name}, true); } vec.insert(get_val(equiv_node)); } diff --git a/lttoolbox/att_compiler.cc b/lttoolbox/att_compiler.cc index db6e283d..5b4347b6 100644 --- a/lttoolbox/att_compiler.cc +++ b/lttoolbox/att_compiler.cc @@ -27,10 +27,12 @@ #include #include #include +#include +#include using namespace icu; -AttCompiler::AttCompiler() +AttCompiler::AttCompiler(): i18n(ALT_I18N_DATA, "lttoolbox") {} AttCompiler::~AttCompiler() @@ -148,7 +150,7 @@ AttCompiler::parse(std::string const &file_name, bool read_rl) UFILE* infile = u_fopen(file_name.c_str(), "r", NULL, NULL); if (infile == NULL) { - std::cerr << "Error: unable to open '" << file_name << "' for reading." << std::endl; + i18n.error("ALT80050", {"file_name"}, {file_name.c_str()}, false); } std::vector tokens; bool first_line_in_fst = true; // First line -- see below @@ -185,15 +187,14 @@ AttCompiler::parse(std::string const &file_name, bool read_rl) if (first_line_in_fst && tokens.size() == 1) { - std::cerr << "Error: invalid format in file '" << file_name << "' on line " << line_number << "." << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80060", {"file_name", "line_number"}, {file_name.c_str(), line_number}, true); } if (tokens[0].find('-') == 0) { if (state_id_offset == 1) { // this is the first split we've seen - std::cerr << "Warning: Multiple fsts in '" << file_name << "' will be disjuncted." << std::endl; + i18n.error("ALT60070", {"file_name"}, {file_name.c_str()}, false); multiple_transducers = true; } // Update the offset for the new FST @@ -421,8 +422,7 @@ TransducerType AttCompiler::classify_backwards(int state, std::set& path) { if(finals.find(state) != finals.end()) { - std::cerr << "ERROR: Transducer contains epsilon transition to a final state. Aborting." << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80080", true); } AttNode* node = get_node(state); TransducerType type = UNDECIDED; @@ -430,8 +430,7 @@ AttCompiler::classify_backwards(int state, std::set& path) if(t1.type != UNDECIDED) { type |= t1.type; } else if(path.find(t1.to) != path.end()) { - std::cerr << "ERROR: Transducer contains initial epsilon loop. Aborting." << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80090", true); } else { path.insert(t1.to); t1.type = classify_backwards(t1.to, path); diff --git a/lttoolbox/att_compiler.h b/lttoolbox/att_compiler.h index 4d4af152..6490b200 100644 --- a/lttoolbox/att_compiler.h +++ b/lttoolbox/att_compiler.h @@ -29,6 +29,7 @@ #include #include +#include #define UNDECIDED 0 #define WORD 1 @@ -137,6 +138,8 @@ class AttCompiler /** Stores the transducer graph. */ std::map graph; + I18n i18n; + /** Clears the data associated with the current transducer. */ void clear(); diff --git a/lttoolbox/buffer.h b/lttoolbox/buffer.h index 6ea1f64d..8dfee400 100644 --- a/lttoolbox/buffer.h +++ b/lttoolbox/buffer.h @@ -20,7 +20,8 @@ #include #include #include - +#include +#include /** * Generic circular buffer class */ @@ -71,10 +72,10 @@ class Buffer */ Buffer(unsigned int const buf_size=2048) { + I18n i18n {ALT_I18N_DATA, "lttoolbox"}; if(buf_size == 0) { - std::cerr << "Error: Cannot create empty buffer." << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80100", true); } buf = new T[buf_size]; size = buf_size; diff --git a/lttoolbox/cli.cc b/lttoolbox/cli.cc index ef079493..b3bc3f16 100644 --- a/lttoolbox/cli.cc +++ b/lttoolbox/cli.cc @@ -22,14 +22,17 @@ #include #include #include +#include +#include +#include -CLI::CLI(std::string desc, std::string ver) +CLI::CLI(icu::UnicodeString desc, std::string ver) { description = desc; version = ver; } -CLI::CLI(std::string desc) +CLI::CLI(icu::UnicodeString desc) { description = desc; } @@ -39,14 +42,14 @@ CLI::~CLI() } void CLI::add_str_arg(char short_flag, std::string long_flag, - std::string desc, std::string arg) + icu::UnicodeString desc, std::string arg) { options.push_back({.short_opt=short_flag, .long_opt=long_flag, .desc=desc, .is_bool=false, .var=arg}); } void CLI::add_bool_arg(char short_flag, std::string long_flag, - std::string desc) + icu::UnicodeString desc) { options.push_back({.short_opt=short_flag, .long_opt=long_flag, .desc=desc, .is_bool=true, .var=""}); @@ -58,20 +61,21 @@ void CLI::add_file_arg(std::string name, bool optional) if (!optional) min_file_args++; } -void CLI::set_epilog(std::string e) +void CLI::set_epilog(icu::UnicodeString e) { epilog = e; } void CLI::print_usage(std::ostream& out) { + I18n i18n {ALT_I18N_DATA, "lttoolbox"}; if (!prog_name.empty()) { out << prog_name; if (!version.empty()) { out << " v" << version; } out << ": " << description << std::endl; - out << "USAGE: " << prog_name; + out << i18n.format("usage") << prog_name; std::string bargs; std::string sargs; for (auto& it : options) { @@ -112,7 +116,7 @@ void CLI::print_usage(std::ostream& out) #endif out << it.desc << std::endl; } - if (!epilog.empty()) { + if (!epilog.isEmpty()) { out << epilog << std::endl; } } @@ -121,6 +125,7 @@ void CLI::print_usage(std::ostream& out) void CLI::parse_args(int argc, char* argv[]) { + I18n i18n {ALT_I18N_DATA, "lttoolbox"}; prog_name = basename(argv[0]); std::string arg_str; #if HAVE_GETOPT_LONG @@ -151,7 +156,7 @@ void CLI::parse_args(int argc, char* argv[]) if (it.short_opt == cnt) { found = true; if (it.short_opt == 'v' && it.long_opt == "version") { - std::cout << prog_name << " version " << version << std::endl; + std::cout << prog_name << i18n.format("version") << version << std::endl; exit(EXIT_SUCCESS); } if (it.is_bool) { diff --git a/lttoolbox/cli.h b/lttoolbox/cli.h index 4b1cdbf5..862627ec 100644 --- a/lttoolbox/cli.h +++ b/lttoolbox/cli.h @@ -19,20 +19,22 @@ #include #include #include +#include +#include class CLI { private: struct CLIOption { char short_opt; std::string long_opt; - std::string desc; + icu::UnicodeString desc; bool is_bool; std::string var; }; - std::string description; + icu::UnicodeString description; std::string version; - std::string epilog; + icu::UnicodeString epilog; std::vector options; std::vector> file_args; @@ -45,14 +47,14 @@ class CLI { std::string prog_name; public: - CLI(std::string desc, std::string version); - CLI(std::string desc); + CLI(icu::UnicodeString desc, std::string version); + CLI(icu::UnicodeString desc); ~CLI(); - void add_str_arg(char short_flag, std::string long_flag, std::string desc, + void add_str_arg(char short_flag, std::string long_flag, icu::UnicodeString desc, std::string arg); - void add_bool_arg(char short_flag, std::string long_flag, std::string desc); + void add_bool_arg(char short_flag, std::string long_flag, icu::UnicodeString desc); void add_file_arg(std::string name, bool optional = true); - void set_epilog(std::string e); + void set_epilog(icu::UnicodeString e); void print_usage(std::ostream& out = std::cerr); void parse_args(int argc, char* argv[]); std::map>& get_strs(); diff --git a/lttoolbox/compiler.cc b/lttoolbox/compiler.cc index 20f75c91..7ef248f7 100644 --- a/lttoolbox/compiler.cc +++ b/lttoolbox/compiler.cc @@ -23,8 +23,10 @@ #include #include +#include +#include -Compiler::Compiler() +Compiler::Compiler(): i18n(ALT_I18N_DATA, "lttoolbox") { } @@ -61,7 +63,7 @@ Compiler::parse(std::string const &file, UStringView dir) if(ret != 0) { - std::cerr << "Error: Parse error at the end of input." << std::endl; + i18n.error("ALT80110", true); } xmlFreeTextReader(reader); @@ -109,6 +111,7 @@ Compiler::parse(std::string const &file, UStringView dir) bool Compiler::valid(UStringView dir) const { + I18n i18n {ALT_I18N_DATA, "lttoolbox"}; const char* side = (dir == COMPILER_RESTRICTION_RL_VAL ? "right" : "left"); const std::set epsilonSymbols = alphabet.symbolsWhereLeftIs(0); const std::set spaceSymbols = alphabet.symbolsWhereLeftIs(' '); @@ -118,11 +121,17 @@ Compiler::valid(UStringView dir) const auto initial = fst.getInitial(); for(const auto i : fst.closure(initial, epsilonSymbols)) { if (finals.count(i)) { - std::cerr << "Error: Invalid dictionary (hint: the " << side << " side of an entry is empty)" << std::endl; + if (side = "right") + i18n.error("ALT80120", false); + else + i18n.error("ALT80122", false); return false; } if(fst.closure(i, spaceSymbols).size() > 1) { // >1 since closure always includes self - std::cerr << "Error: Invalid dictionary (hint: entry on the " << side << " beginning with whitespace)" << std::endl; + if (side = "right") + i18n.error("ALT80121", false); + else + i18n.error("ALT80123", false); return false; } } @@ -157,9 +166,8 @@ Compiler::procAlphabet() } else { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Missing alphabet symbols." << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80140", {"file_name", "line_number"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, xmlTextReaderGetParserLineNumber(reader)}, true); } } } @@ -273,7 +281,7 @@ Compiler::matchTransduction(std::vector const &pi, // rl compilation of a badly written rule // having an epsilon with wildcard output will produce // garbage output -- see https://github.com/apertium/apertium-separable/issues/8 - std::cerr << "Warning: Cannot insert from empty input. Ignoring. (You probably want to specify exact tags when deleting a word.)" << std::endl; + i18n.error("ALT60150", false); } else if (tag == alphabet(any_tag, any_tag) || tag == alphabet(any_char, any_char) || tag == alphabet(any_tag, 0) || @@ -302,9 +310,9 @@ Compiler::requireEmptyError(UStringView name) { if(!xmlTextReaderIsEmptyElement(reader)) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Non-empty element '<" << name << ">' should be empty." << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80160", {"file_name", "line_number", "name"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), icu::UnicodeString(name.data())}, true); } } @@ -359,9 +367,9 @@ Compiler::readString(std::vector &result, UStringView name) if(!alphabet.isSymbolDefined(symbol)) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Undefined symbol '" << symbol << "'." << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80170", {"file_name", "line_number", "symbol"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), icu::UnicodeString(symbol.data())}, true); } result.push_back(alphabet(symbol)); @@ -387,10 +395,9 @@ Compiler::readString(std::vector &result, UStringView name) } else { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid specification of element '<" << name; - std::cerr << ">' in this context." << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80180", {"file_name", "line_number", "name"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), icu::UnicodeString(name.data())}, true); } } @@ -403,9 +410,8 @@ Compiler::skipBlanks(UString &name) { if(!allBlanks()) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid construction." << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80190", {"file_name", "line_number"}, {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader)}, true); } } @@ -432,9 +438,8 @@ Compiler::skip(UString &name, UStringView elem, bool open) { if(!allBlanks()) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid construction." << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80190", {"file_name", "line_number"}, {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader)}, true); } } xmlTextReaderRead(reader); @@ -443,9 +448,10 @@ Compiler::skip(UString &name, UStringView elem, bool open) if(name != elem) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Expected '<" << slash << elem << ">'." << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80200", {"file_name", "line_number", "slash_element"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), icu::UnicodeString(slash.data()) + + icu::UnicodeString(elem.data())}, true); } } @@ -472,8 +478,8 @@ Compiler::procIdentity(double const entry_weight, bool ig) if(verbose && first_element && (both_sides.front() == (int)' ')) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Entry begins with space." << std::endl; + i18n.error("ALT60210", {"file_name", "line_number"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, xmlTextReaderGetParserLineNumber(reader)}, false); } first_element = false; EntryToken e; @@ -516,8 +522,8 @@ Compiler::procTransduction(double const entry_weight) if(verbose && first_element && (lhs.front() == (int)' ')) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Entry begins with space." << std::endl; + i18n.error("ALT60210", {"file_name", "line_number"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, xmlTextReaderGetParserLineNumber(reader)}, false); } first_element = false; @@ -560,16 +566,18 @@ Compiler::procPar() if(!current_paradigm.empty() && paradigm_name == current_paradigm) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Paradigm refers to itself '" << paradigm_name << "'." << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80220", {"file_name", "line_number", "paradigm_name"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(paradigm_name.data())}, true); } if(paradigms.find(paradigm_name) == paradigms.end()) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Undefined paradigm '" << paradigm_name << "'." << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80230", {"file_name", "line_number", "paradigm_name"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(paradigm_name.data())}, true); } e.setParadigm(paradigm_name); return e; @@ -604,9 +612,8 @@ Compiler::insertEntryTokens(std::vector const &elements) } else { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid entry token." << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80240", {"file_name", "line_number"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, xmlTextReaderGetParserLineNumber(reader)}, true); } } t.setFinal(e, default_weight); @@ -679,11 +686,11 @@ Compiler::requireAttribute(UStringView value, UStringView attrname, UStringView { if(value.empty()) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): '<" << elemname; - std::cerr << "' element must specify non-void '"; - std::cerr << attrname << "' attribute." << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80250", {"file_name", "line_number", "element_name", "attr_name"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(elemname.data()), + icu::UnicodeString(attrname.data())}, true); } } @@ -869,9 +876,8 @@ Compiler::procEntry() int ret = xmlTextReaderRead(reader); if(ret != 1) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Parse error." << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80260", {"file_name", "line_number"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, xmlTextReaderGetParserLineNumber(reader)}, true); } UString name = XMLParseUtil::readName(reader); skipBlanks(name); @@ -909,9 +915,10 @@ Compiler::procEntry() auto it = paradigms.find(p); if(it == paradigms.end()) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Undefined paradigm '" << p << "'." << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80230", {"file_name", "line_number", "paradigm_name"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(p.data())}, true); } // discard entries with empty paradigms (by the directions, normally) if(it->second.isEmpty()) @@ -936,10 +943,11 @@ Compiler::procEntry() } else { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid inclusion of '<" << name << ">' into '<" << COMPILER_ENTRY_ELEM; - std::cerr << ">'." << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80270", {"file_name", "line_number", "element_name", "compiler_entry_element"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(name.data()), + icu::UnicodeString(COMPILER_ENTRY_ELEM.data())}, true); } } } @@ -1013,9 +1021,10 @@ Compiler::procNode() } else { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid node '<" << name << ">'." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80280", {"file_name", "line_number", "element_name"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(name.data())}, true); } } diff --git a/lttoolbox/compiler.h b/lttoolbox/compiler.h index 9b8d42f8..546d5546 100644 --- a/lttoolbox/compiler.h +++ b/lttoolbox/compiler.h @@ -27,6 +27,8 @@ #include #include +#include + /** * A compiler of dictionaries to letter transducers */ @@ -177,6 +179,8 @@ class Compiler int32_t word_boundary_s = 0; int32_t word_boundary_ns = 0; + I18n i18n; + /** * Method to parse an XML Node */ diff --git a/lttoolbox/compression.cc b/lttoolbox/compression.cc index 79970e60..13f1aa0d 100644 --- a/lttoolbox/compression.cc +++ b/lttoolbox/compression.cc @@ -22,14 +22,16 @@ #include #include #include +#include +#include +#include void Compression::writeByte(unsigned char byte, FILE *output) { if(fwrite_unlocked(&byte, 1, 1, output) != 1) { - std::cerr << "I/O Error writing" << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80290", true); } } @@ -40,8 +42,7 @@ Compression::readByte(FILE *input) if(fread_unlocked(&value, 1, 1, input) != 1) { // Not uncomment this code since -// std::cerr << "I/O Error reading" << std::endl; -// exit(EXIT_FAILURE); +// I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80290", true); } return value; @@ -87,8 +88,7 @@ Compression::multibyte_write(unsigned int value, FILE *output) } else { - std::cerr << "Out of range: " << value << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80300", {"value"}, {std::to_string(value).c_str()}, true); } } @@ -134,8 +134,7 @@ Compression::multibyte_write(unsigned int value, std::ostream &output) } else { - std::cerr << "Out of range: " << value << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80300", {"value"}, {std::to_string(value).c_str()}, true); } } diff --git a/lttoolbox/compression.h b/lttoolbox/compression.h index 4bcba583..f7eea338 100644 --- a/lttoolbox/compression.h +++ b/lttoolbox/compression.h @@ -23,6 +23,7 @@ #include #include #include +#include // Global lttoolbox features constexpr char HEADER_LTTOOLBOX[4]{'L', 'T', 'T', 'B'}; @@ -43,7 +44,7 @@ enum TD_FEATURES : uint64_t { inline auto write_u64(FILE *out, uint64_t value) { auto rv = fwrite_unlocked(reinterpret_cast(&value), 1, sizeof(value), out); if (rv != sizeof(value)) { - throw std::runtime_error("Failed to write uint64_t"); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80580", true); } return rv; } @@ -77,7 +78,7 @@ inline auto write_le(Stream& out, uint64_t value) { inline auto read_u64(FILE *in) { uint64_t value = 0; if (fread_unlocked(reinterpret_cast(&value), 1, sizeof(value), in) != sizeof(value)) { - throw std::runtime_error("Failed to read uint64_t"); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80580", true); } return value; } diff --git a/lttoolbox/deserialiser.h b/lttoolbox/deserialiser.h index 90629301..ab7f221c 100644 --- a/lttoolbox/deserialiser.h +++ b/lttoolbox/deserialiser.h @@ -34,6 +34,7 @@ #include #include +#include template class Deserialiser; @@ -129,7 +130,8 @@ integer_type int_deserialise(std::istream &Stream_) { unsigned char SerialisedTypeSize = Stream_.get(); if (!Stream_) - throw DeserialisationException("can't deserialise size"); + throw DeserialisationException(I18n(ALT_I18N_DATA, "lttoolbox").format("ALT80640", + {"size"}, {std::to_string(sizeof(integer_type)).c_str()})); for (; SerialisedTypeSize != 0;) { SerialisedType_ += @@ -137,14 +139,13 @@ integer_type int_deserialise(std::istream &Stream_) { << std::numeric_limits::digits * --SerialisedTypeSize; if (!Stream_) - throw DeserialisationException("can't deserialise byte"); + throw DeserialisationException(I18n(ALT_I18N_DATA, "lttoolbox").format("ALT80650", {"size"}, + {std::to_string(sizeof(integer_type)).c_str()})); } return SerialisedType_; } catch (const std::exception &exc) { - std::stringstream what_; - what_ << "can't deserialise " << sizeof(integer_type) << " byte integer type: " << exc.what(); - throw DeserialisationException(what_.str().c_str()); + throw DeserialisationException(exc.what()); } } diff --git a/lttoolbox/exception.h b/lttoolbox/exception.h index 17f0806c..9eb8154a 100644 --- a/lttoolbox/exception.h +++ b/lttoolbox/exception.h @@ -19,6 +19,8 @@ #include #include +#include +#include class Exception : public std::exception @@ -29,32 +31,42 @@ class Exception { } + Exception(const icu::UnicodeString &_msg) throw () + : std::exception(), msg(_msg) + { + } + virtual ~Exception() throw () { } const char* what() const throw () { - return msg.c_str(); + std::string res; + msg.toUTF8String(res); + return res.c_str(); } private: - std::string msg; + icu::UnicodeString msg; }; class IOException : public Exception { public: IOException(const char* _msg) throw () : Exception(_msg) {}; + IOException(const icu::UnicodeString &_msg) throw () : Exception(_msg) {}; }; class SerialisationException : public IOException { public: SerialisationException(const char* _msg) throw () : IOException(_msg) {}; + SerialisationException(const icu::UnicodeString &_msg) throw () : IOException(_msg) {}; }; class DeserialisationException : public IOException { public: DeserialisationException(const char* _msg) throw () : IOException(_msg) {}; + DeserialisationException(const icu::UnicodeString &_msg) throw () : IOException(_msg) {}; }; #endif diff --git a/lttoolbox/expander.cc b/lttoolbox/expander.cc index 3ead68c0..304eb8fa 100644 --- a/lttoolbox/expander.cc +++ b/lttoolbox/expander.cc @@ -22,6 +22,7 @@ #include #include #include +#include Expander::Expander() @@ -46,7 +47,7 @@ Expander::expand(std::string const &file, UFILE* output) if(ret != 0) { - std::cerr << "Error: Parse error at the end of input." << std::endl; + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80110", false); } xmlFreeTextReader(reader); @@ -73,9 +74,9 @@ Expander::requireEmptyError(UStringView name) { if(!xmlTextReaderIsEmptyElement(reader)) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Non-empty element '<" << name << ">' should be empty." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80160", {"file_name", "line_number", "name"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), icu::UnicodeString(name.data())}, true); } } @@ -149,10 +150,9 @@ Expander::readString(UString &result, UStringView name) } else { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid specification of element '<" << name; - std::cerr << ">' in this context." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80180", {"file_name", "line_number", "name"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), icu::UnicodeString(name.data())}, true); } } @@ -163,9 +163,8 @@ Expander::skipBlanks(UString &name) { if(!allBlanks()) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid construction." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80190", {"file_name", "line_number"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, xmlTextReaderGetParserLineNumber(reader)}, true); } xmlTextReaderRead(reader); name = XMLParseUtil::readName(reader); @@ -182,9 +181,8 @@ Expander::skip(UString &name, UStringView elem) { if(!allBlanks()) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid construction." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80190", {"file_name", "line_number"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, xmlTextReaderGetParserLineNumber(reader)}, true); } xmlTextReaderRead(reader); name = XMLParseUtil::readName(reader); @@ -192,9 +190,9 @@ Expander::skip(UString &name, UStringView elem) if(name != elem) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Expected '<" << elem << ">'." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80200", {"file_name", "line_number", "slash_element"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), icu::UnicodeString(elem.data())}, true); } } @@ -315,11 +313,11 @@ Expander::requireAttribute(UStringView value, UStringView attrname, UStringView { if(value.empty()) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): '<" << elemname; - std::cerr << "' element must specify non-void '"; - std::cerr<< attrname << "' attribute." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80250", {"file_name", "line_number", "element_name", "attr_name"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(elemname.data()), + icu::UnicodeString(attrname.data())}, true); } } @@ -347,9 +345,8 @@ Expander::procEntry(UFILE* output) int ret = xmlTextReaderRead(reader); if(ret != 1) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Parse error." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80260", {"file_name", "line_number"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, xmlTextReaderGetParserLineNumber(reader)}, true); } myname = XMLParseUtil::readName(reader); } @@ -379,9 +376,8 @@ Expander::procEntry(UFILE* output) int ret = xmlTextReaderRead(reader); if(ret != 1) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Parse error." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80260", {"file_name", "line_number"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, xmlTextReaderGetParserLineNumber(reader)}, true); } UString name = XMLParseUtil::readName(reader); skipBlanks(name); @@ -424,9 +420,10 @@ Expander::procEntry(UFILE* output) paradigm_lr.find(p) == paradigm_lr.end() && paradigm_rl.find(p) == paradigm_rl.end()) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Undefined paradigm '" << p << "'." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80230", {"file_name", "line_number", "paradigm_name"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(p.data())}, true); } if(attribute == Compiler::COMPILER_RESTRICTION_LR_VAL) @@ -506,10 +503,12 @@ Expander::procEntry(UFILE* output) } else { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid inclusion of '<" << name << ">' into '<" << Compiler::COMPILER_ENTRY_ELEM; - std::cerr << ">'." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80270", + {"file_name", "line_number", "element_name", "compiler_entry_element"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(name.data()), + icu::UnicodeString(Compiler::COMPILER_ENTRY_ELEM.data())}, true); } } } @@ -563,9 +562,10 @@ Expander::procNode(UFILE *output) } else { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid node '<" << name << ">'." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80280", {"file_name", "line_number", "element_name"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(name.data())}, true); } } diff --git a/lttoolbox/file_utils.cc b/lttoolbox/file_utils.cc index 2a6907c8..aa2284ec 100644 --- a/lttoolbox/file_utils.cc +++ b/lttoolbox/file_utils.cc @@ -19,6 +19,8 @@ #include #include +#include +#include UFILE* openOutTextFile(const std::string& fname) @@ -28,8 +30,7 @@ openOutTextFile(const std::string& fname) } else { UFILE* ret = u_fopen(fname.c_str(), "wb", NULL, NULL); if (!ret) { - std::cerr << "Error: Cannot open file '" << fname << "' for writing." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80050", {"file_name"}, {fname.c_str()}, true); } return ret; } @@ -43,8 +44,7 @@ openOutBinFile(const std::string& fname) } else { FILE* ret = fopen(fname.c_str(), "wb"); if (!ret) { - std::cerr << "Error: Cannot open file '" << fname << "' for writing." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80050", {"file_name"}, {fname.c_str()}, true); } return ret; } @@ -58,8 +58,7 @@ openInBinFile(const std::string& fname) } else { FILE* ret = fopen(fname.c_str(), "rb"); if (!ret) { - std::cerr << "Error: Cannot open file '" << fname << "' for reading." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80050", {"file_name"}, {fname.c_str()}, true); } return ret; } @@ -103,7 +102,7 @@ readShared(FILE* input, std::set& letters, Alphabet& alpha) if (strncmp(header, HEADER_LTTOOLBOX, 4) == 0) { auto features = read_le(input); if (features >= LTF_UNKNOWN) { - throw std::runtime_error("FST has features that are unknown to this version of lttoolbox - upgrade!"); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80620", true); } } else { // Old binary format diff --git a/lttoolbox/formatmsg.cc b/lttoolbox/formatmsg.cc new file mode 100644 index 00000000..8dc3460b --- /dev/null +++ b/lttoolbox/formatmsg.cc @@ -0,0 +1,29 @@ +#include +#include + +int main(int argc, char* argv[]) +{ + if (argc < 4 || argc % 2 != 0) { + std::cout << I18n(ALT_I18N_DATA, "lttoolbox").format("usage") + << "formatmsg \n"; + return 0; + } + + I18n i18n {argv[1], argv[2]}; + + std::vector arg_names; + std::vector arg_values; + if (argc > 4) { + int arg_values_start = (argc - 4) / 2 + 4; + + for (int i = 4; i < arg_values_start; i++) { + arg_names.push_back(argv[i]); + } + + for (int i = arg_values_start; i < argc; i++) { + arg_values.push_back(argv[i]); + } + } + std::cout << i18n.format(argv[3], arg_names, arg_values) << std::endl; + return 0; +} diff --git a/lttoolbox/fst_processor.cc b/lttoolbox/fst_processor.cc index 198d2be7..356d1a35 100644 --- a/lttoolbox/fst_processor.cc +++ b/lttoolbox/fst_processor.cc @@ -25,6 +25,7 @@ #include #include #include +#include FSTProcessor::FSTProcessor() @@ -51,7 +52,7 @@ FSTProcessor::FSTProcessor() void FSTProcessor::streamError() { - throw Exception("Error: Malformed input stream."); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80610", true); } void @@ -62,8 +63,7 @@ FSTProcessor::parseICX(std::string const &file) reader = xmlReaderForFile(file.c_str(), NULL, 0); if(reader == NULL) { - std::cerr << "Error: cannot open '" << file << "'." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80050", {"file_name"}, {icu::UnicodeString(file.c_str())}, true); } int ret = xmlTextReaderRead(reader); while(ret == 1) @@ -87,8 +87,7 @@ FSTProcessor::parseRCX(std::string const &file) reader = xmlReaderForFile(file.c_str(), NULL, 0); if(reader == NULL) { - std::cerr << "Error: cannot open '" << file << "'." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80050", {"file_name"}, {icu::UnicodeString(file.c_str())}, true); } int ret = xmlTextReaderRead(reader); while(ret == 1) @@ -121,9 +120,10 @@ FSTProcessor::procNodeICX() } else { - std::cerr << "Error in ICX file (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid node '<" << name << ">'." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80280", {"file_name", "line_number", "element_name"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(name.data())}, true); } } @@ -159,9 +159,10 @@ FSTProcessor::procNodeRCX() } else { - std::cerr << "Error in RCX file (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid node '<" << name << ">'." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80280", {"file_name", "line_number", "element_name"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(name.data())}, true); } } @@ -667,9 +668,7 @@ FSTProcessor::classifyFinals() } else { - std::cerr << "Error: Unsupported transducer type for '"; - std::cerr << it.first << "'." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80330", {"transducer_first"}, {icu::UnicodeString(it.first.data())}, true); } } } @@ -908,8 +907,9 @@ FSTProcessor::compoundAnalysis(UString input_word) if(current_state.size() > MAX_COMBINATIONS) { - std::cerr << "Warning: compoundAnalysis's MAX_COMBINATIONS exceeded for '" << input_word << "'" << std::endl; - std::cerr << " gave up at char " << i << " '" << val << "'." << std::endl; + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT60340", {"input_word", "index", "char"}, + {icu::UnicodeString(input_word.data()), + (int)i, val}, false); UString nullString; return nullString; @@ -942,7 +942,7 @@ FSTProcessor::initDecompositionSymbols() && (compoundOnlyLSymbol=alphabet(u"<@compound:only-L>")) == 0 && (compoundOnlyLSymbol=alphabet(u"")) == 0) { - std::cerr << "Warning: Decomposition symbol <:compound:only-L> not found" << std::endl; + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT60350", {"symbol"}, {"<:compound:only-L>"}, false); } else if(!showControlSymbols) { @@ -955,7 +955,7 @@ FSTProcessor::initDecompositionSymbols() && (compoundRSymbol=alphabet(u"<@compound:R>")) == 0 && (compoundRSymbol=alphabet(u"")) == 0) { - std::cerr << "Warning: Decomposition symbol <:compound:R> not found" << std::endl; + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT60350", {"symbol"}, {"<:compound:R>"}, false); } else if(!showControlSymbols) { @@ -2327,7 +2327,7 @@ FSTProcessor::valid() const { if(initial_state.isFinal(all_finals)) { - std::cerr << "Error: Invalid dictionary (hint: the left side of an entry is empty)" << std::endl; + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80122", false); return false; } else @@ -2336,7 +2336,7 @@ FSTProcessor::valid() const s.step(' '); if(s.size() != 0) { - std::cerr << "Error: Invalid dictionary (hint: entry beginning with whitespace)" << std::endl; + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80124", false); return false; } } diff --git a/lttoolbox/fst_processor.h b/lttoolbox/fst_processor.h index f9476c13..88616d73 100644 --- a/lttoolbox/fst_processor.h +++ b/lttoolbox/fst_processor.h @@ -34,6 +34,7 @@ #include #include #include +#include /** * Kind of output of the generator module @@ -460,8 +461,8 @@ class FSTProcessor else { if(!max_case_insensitive_state_size_warned) { max_case_insensitive_state_size_warned = true; // only warn once - UFILE* err_out = u_finit(stderr, NULL, NULL); - u_fprintf(err_out, "Warning: matching case-sensitively since processor state size >= %d\n", max_case_insensitive_state_size); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT60320", {"max_case_insensitive_state_size"}, + {std::to_string(max_case_insensitive_state_size).c_str()}, false); } return true; } diff --git a/lttoolbox/i18n.h b/lttoolbox/i18n.h new file mode 100644 index 00000000..14b3aff9 --- /dev/null +++ b/lttoolbox/i18n.h @@ -0,0 +1,163 @@ +#ifndef _I18N_ +#define _I18N_ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +class I18n +{ +private: + static std::unordered_map> locales_data; + static icu::Locale locale; + icu::ResourceBundle resource; + UErrorCode status; +public: + I18n(const char *locales_path, std::string package_name); + icu::UnicodeString format(const char* key, const std::vector arg_names, + const std::vector arg_values) const; + icu::UnicodeString format(const char* key) const; + void error(const char* key, const std::vector arg_names, + const std::vector arg_values, bool quit) const; + void error(const char* key, bool quit) const; +}; + +// Store .dat files and map it to package name of it. +inline std::unordered_map> I18n::locales_data; + +// Store default language of the system before +// being overriden by LtLocale::tryToSetLocale() function. +inline icu::Locale I18n::locale = icu::Locale().getName(); + +inline I18n::I18n(const char *locales_path, std::string package_name) : resource(status) +{ + // Initialize status. + status = U_ZERO_ERROR; + + // Check if .dat file that is linked to this package_name has already been loaded or not. + if (locales_data.find(package_name) == locales_data.end()) { + // Open .dat file and loads it into pointer that is mapped in hashtable to string that represents + // package name for easy looking later in case of being used again to avoid reloading it. + + std::ifstream file; + file.open(locales_path); + + if (!file.is_open()) { + std::cerr << "Error in opening data file!" << std::endl; + std::cerr << "File: " << locales_path << std::endl; + std::cerr << "Package Name: " << package_name << std::endl; + exit(EXIT_FAILURE); + } + + std::streamsize file_size = std::filesystem::file_size(std::filesystem::path{locales_path}); + + locales_data[package_name] = std::make_unique(file_size); + + file.read(locales_data[package_name].get(), file_size); + + // Link pointer address that points to .dat file + // in memory to package_name that will be used later in Resource Bundles. + udata_setAppData(package_name.c_str(), locales_data[package_name].get(), &status); + + if (!U_SUCCESS(status)) { + std::cerr << "Error in loading data!" << std::endl; + std::cerr << "Package Name: " << package_name << std::endl; + std::cerr << u_errorName(status) << std::endl; + exit(EXIT_FAILURE); + } + } + // Initialize ResourceBundle with package_name that is linked by udata_setAppData function to .dat file + // that contains i18n messages and default locale of the system for use in internationaliztion later + // through format and error functions. + resource = icu::ResourceBundle(package_name.c_str(), locale, status); + + if (!U_SUCCESS(status)) { + std::cerr << "Error in initializing resource bundle" << std::endl; + std::cerr << "Package Name: " << package_name << std::endl; + std::cerr << u_errorName(status) << std::endl; + exit(EXIT_FAILURE); + } +} + +inline icu::UnicodeString I18n::format(const char* key, const std::vector arg_names, + const std::vector arg_values) const +{ + UErrorCode status = U_ZERO_ERROR; + icu::UnicodeString pattern; + icu::UnicodeString output; + + icu::ResourceBundle resource_object = resource.get(key, status); + if (!U_SUCCESS(status)) { + std::cerr << "Error: key not found!" << std::endl; + std::cerr << "Key: " << key << std::endl; + std::cerr << u_errorName(status) << std::endl; + exit(EXIT_FAILURE); + } + + pattern = resource_object.getString(status); + if (!U_SUCCESS(status)) { + std::cerr << "Error in getting key text!" << std::endl; + std::cerr << "Key: " << key << std::endl; + std::cerr << u_errorName(status) << std::endl; + exit(EXIT_FAILURE); + } + + icu::MessageFormat formatter {pattern, status}; + if (!U_SUCCESS(status)) { + std::cerr << "Error in initializing MessageFormat class!" << std::endl; + std::cerr << "Key: " << key << std::endl; + std::cerr << "Pattern: " << pattern << std::endl; + std::cerr << u_errorName(status) << std::endl; + exit(EXIT_FAILURE); + } + + formatter.format(arg_names.data(), arg_values.data(), arg_values.size(), output, status); + + if (!U_SUCCESS(status)) { + std::cerr << "Error in formatting!" << std::endl; + std::cerr << "Key: " << key << std::endl; + + std::cerr << "Argument names: "; + for (int i = 0; i < arg_names.size(); i++) + std::cerr << arg_names[i] << std::endl; + + std::cerr << "Argument values: "; + for (int i = 0; i < arg_values.size(); i++) + std::cerr << arg_values[i].getString() << std::endl; + + std::cerr << u_errorName(status) << std::endl; + exit(EXIT_FAILURE); + } + + return output; +} + +inline icu::UnicodeString I18n::format(const char* key) const +{ + return format(key, {}, {}); +} + +inline void I18n::error(const char* key, const std::vector arg_names, + const std::vector arg_values, bool quit) const +{ + std::cerr << format(key, arg_names, arg_values) << std::endl; + if (quit) { + exit(EXIT_FAILURE); + } +} +inline void I18n::error(const char* key, bool quit) const +{ + error(key, {}, {}, quit); +} + +#endif diff --git a/lttoolbox/input_file.cc b/lttoolbox/input_file.cc index cddb8101..aa8c9944 100644 --- a/lttoolbox/input_file.cc +++ b/lttoolbox/input_file.cc @@ -22,6 +22,7 @@ #include #include #include +#include InputFile::InputFile() : infile(stdin), buffer_size(0) @@ -48,8 +49,7 @@ void InputFile::open_or_exit(const char* fname) { if (!open(fname)) { - std::cerr << "Error: Unable to open '" << fname << "' for reading." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80050", {"file_name"}, {fname}, true); } } @@ -96,17 +96,17 @@ InputFile::internal_read() if ((first & 0xF0) == 0xF0) { i += 3; if (fread_unlocked(cbuffer+1, 1, 3, infile) != 3) { - throw std::runtime_error("Could not read 3 expected bytes from stream"); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80630", {"number"}, {3}, true); } } else if ((first & 0xE0) == 0xE0) { i += 2; if (fread_unlocked(cbuffer+1, 1, 2, infile) != 2) { - throw std::runtime_error("Could not read 2 expected bytes from stream"); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80630", {"number"}, {2}, true); } } else if ((first & 0xC0) == 0xC0) { i += 1; if (fread_unlocked(cbuffer+1, 1, 1, infile) != 1) { - throw std::runtime_error("Could not read 1 expected byte from stream"); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80630", {"number"}, {1}, true); } } memset(ubuffer, 0, 3*sizeof(UChar)); @@ -150,8 +150,7 @@ InputFile::rewind() { if (infile != nullptr) { if (std::fseek(infile, 0, SEEK_SET) != 0) { - std::cerr << "Error: Unable to rewind file" << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80360", true); } } } @@ -226,8 +225,7 @@ InputFile::readBlank(bool readwblank) ret += c; if (c == '\\') { if (eof() || peek() == '\0') { - std::cerr << "Unexpected trailing backslash" << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80370", true); } ret += get(); } diff --git a/lttoolbox/lt_append.cc b/lttoolbox/lt_append.cc index a8358fbe..04edc42d 100644 --- a/lttoolbox/lt_append.cc +++ b/lttoolbox/lt_append.cc @@ -19,14 +19,16 @@ #include #include #include +#include int main(int argc, char *argv[]) { + I18n i18n {ALT_I18N_DATA, "lttoolbox"}; LtLocale::tryToSetLocale(); - CLI cli("add sections to a compiled transducer", PACKAGE_VERSION); - cli.add_bool_arg('k', "keep", "in case of section name conflicts, keep the one from the first transducer"); - cli.add_bool_arg('s', "single", "treat input transducers as one-sided"); - cli.add_bool_arg('h', "help", "print this message and exit"); + CLI cli(i18n.format("lt_append_desc"), PACKAGE_VERSION); + cli.add_bool_arg('k', "keep", i18n.format("keep_desc")); + cli.add_bool_arg('s', "single", i18n.format("single_desc")); + cli.add_bool_arg('h', "help", i18n.format("help_desc")); cli.add_file_arg("bin_file1", false); cli.add_file_arg("bin_file2"); cli.add_file_arg("output_file"); @@ -56,7 +58,7 @@ int main(int argc, char *argv[]) if (keep) { continue; } else { - std::cerr << "WARNING: section '" << it.first << "' appears in both transducers and will be overwritten!" << std::endl; + i18n.error("ALT60380", {"section"}, {icu::UnicodeString(it.first.data())}, false); } } it.second.updateAlphabet(alpha2, alpha1, pairs); diff --git a/lttoolbox/lt_apply_acx.cc b/lttoolbox/lt_apply_acx.cc index fa5d079b..64adbc49 100644 --- a/lttoolbox/lt_apply_acx.cc +++ b/lttoolbox/lt_apply_acx.cc @@ -20,11 +20,12 @@ #include #include #include +#include int main(int argc, char* argv[]) { LtLocale::tryToSetLocale(); - CLI cli("apply an ACX file to a compiled transducer", PACKAGE_VERSION); + CLI cli(I18n(ALT_I18N_DATA, "lttoolbox").format("lt_apply_acx_desc"), PACKAGE_VERSION); cli.add_file_arg("input_file", false); cli.add_file_arg("acx_file"); cli.add_file_arg("output_file"); diff --git a/lttoolbox/lt_comp.cc b/lttoolbox/lt_comp.cc index b9b9ed0c..b54c872f 100644 --- a/lttoolbox/lt_comp.cc +++ b/lttoolbox/lt_comp.cc @@ -21,6 +21,7 @@ #include #include +#include /* * Error function that does nothing so that when we fallback from @@ -34,19 +35,20 @@ void errorFunc(void *ctx, const char *msg, ...) int main(int argc, char *argv[]) { + I18n i18n(ALT_I18N_DATA, "lttoolbox"); LtLocale::tryToSetLocale(); - CLI cli("build a letter transducer from a dictionary", PACKAGE_VERSION); - cli.add_bool_arg('d', "debug", "insert line numbers before each entry"); - cli.add_bool_arg('m', "keep-boundaries", "keep morpheme boundaries"); - cli.add_str_arg('v', "var", "set language variant", "VAR"); - cli.add_str_arg('a', "alt", "set alternative (monodix)", "ALT"); - cli.add_str_arg('l', "var-left", "set left language variant (bidix)", "VAR"); - cli.add_str_arg('r', "var-right", "set right language variant (bidix)", "VAR"); - cli.add_bool_arg('H', "hfst", "expect HFST symbols"); - cli.add_bool_arg('S', "no-split", "don't attempt to split into word and punctuation sections"); - cli.add_bool_arg('j', "jobs", "use one cpu core per section when minimising, new section after 50k entries"); - cli.add_bool_arg('V', "verbose", "compile verbosely"); - cli.add_bool_arg('h', "help", "print this message and exit"); + CLI cli(i18n.format("lt_comp_desc"), PACKAGE_VERSION); + cli.add_bool_arg('d', "debug", i18n.format("debug_desc")); + cli.add_bool_arg('m', "keep-boundaries", i18n.format("keep_boundaries_desc")); + cli.add_str_arg('v', "var", i18n.format("var_desc"), "VAR"); + cli.add_str_arg('a', "alt", i18n.format("alt_desc"), "ALT"); + cli.add_str_arg('l', "var-left", i18n.format("var_left_desc"), "VAR"); + cli.add_str_arg('r', "var-right", i18n.format("var_right_desc"), "VAR"); + cli.add_bool_arg('H', "hfst", i18n.format("expect_hfst_desc")); + cli.add_bool_arg('S', "no-split", i18n.format("no_split_desc")); + cli.add_bool_arg('j', "jobs", i18n.format("jobs_desc")); + cli.add_bool_arg('V', "verbose", i18n.format("verbose_desc")); + cli.add_bool_arg('h', "help", i18n.format("help_desc")); cli.add_file_arg("lr | rl | u", false); cli.add_file_arg("dictionary_file", false); cli.add_file_arg("output_file", false); @@ -116,8 +118,7 @@ int main(int argc, char *argv[]) } else { - std::cerr << "Error: Cannot not open file '" << infile << "'." << std::endl << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80050", {"file_name"}, {infile.c_str()}, true); } initGenericErrorDefaultFunc(NULL); @@ -125,7 +126,7 @@ int main(int argc, char *argv[]) if(opc == "lr") { if (have_vl) { - std::cerr << "Error: -l specified, but mode is lr" << std::endl; + i18n.error("ALT80390", false); cli.print_usage(); } if(ttype == 'a') @@ -144,7 +145,7 @@ int main(int argc, char *argv[]) else if(opc == "rl") { if (have_vr) { - std::cerr << "Error: -r specified, but mode is rl" << std::endl; + i18n.error("ALT80391", false); cli.print_usage(); } if(ttype == 'a') diff --git a/lttoolbox/lt_compose.cc b/lttoolbox/lt_compose.cc index 6ec365d2..c73e0807 100644 --- a/lttoolbox/lt_compose.cc +++ b/lttoolbox/lt_compose.cc @@ -21,10 +21,12 @@ #include #include #include +#include void compose(FILE* file_f, FILE* file_g, FILE* file_gf, bool f_inverted, bool g_anywhere, bool jobs) { + I18n i18n(ALT_I18N_DATA, "lttoolbox"); Alphabet alph_f; std::set letters_f; std::map trans_f; @@ -49,7 +51,7 @@ compose(FILE* file_f, FILE* file_g, FILE* file_gf, bool f_inverted, bool g_anywh std::vector>> compositions; for (auto& it : trans_f) { if (it.second.numberOfTransitions() == 0) { - std::cerr << "Warning: section " << it.first << " is empty! Skipping it..." << std::endl; + i18n.error("ALT60410", {"section_name"}, {icu::UnicodeString(it.first.data())}, false); continue; } if(jobs) { @@ -58,10 +60,7 @@ compose(FILE* file_f, FILE* file_g, FILE* file_gf, bool f_inverted, bool g_anywh bool f_inverted, bool g_anywhere, UString name) { Transducer gf = f.compose(g, alph_f, alph_g, f_inverted, g_anywhere); if (gf.hasNoFinals()) { - std::cerr << "Warning: section " << name - << " had no final state after composing! Skipping it..." - << std::endl; - ; + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT60420", {"section_name"}, {icu::UnicodeString(name.data())}, false); } else { gf.minimize(); } @@ -72,9 +71,7 @@ compose(FILE* file_f, FILE* file_g, FILE* file_gf, bool f_inverted, bool g_anywh } else { Transducer gf = it.second.compose(union_g, alph_f, alph_g, f_inverted, g_anywhere); if (gf.hasNoFinals()) { - std::cerr << "Warning: section " << it.first - << " had no final state after composing! Skipping it..." - << std::endl; + i18n.error("ALT60420", {"section_name"}, {icu::UnicodeString(it.first.data())}, false); continue; } gf.minimize(); @@ -89,8 +86,7 @@ compose(FILE* file_f, FILE* file_g, FILE* file_gf, bool f_inverted, bool g_anywh } if (trans_gf.empty()) { - std::cerr << "Error: Composition gave empty transducer!" << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80430", true); } writeTransducerSet(file_gf, letters_f, alph_f, trans_gf); @@ -99,10 +95,11 @@ compose(FILE* file_f, FILE* file_g, FILE* file_gf, bool f_inverted, bool g_anywh int main(int argc, char *argv[]) { + I18n i18n(ALT_I18N_DATA, "lttoolbox"); LtLocale::tryToSetLocale(); - CLI cli("compose transducer1 with transducer2", PACKAGE_VERSION); - cli.add_bool_arg('i', "inverted", "run composition right-to-left on transducer1"); - cli.add_bool_arg('a', "anywhere", "don't require anchored matches, let transducer2 optionally compose at any sub-path"); + CLI cli(i18n.format("lt_compose_desc"), PACKAGE_VERSION); + cli.add_bool_arg('i', "inverted", i18n.format("inverted_desc")); + cli.add_bool_arg('a', "anywhere", i18n.format("anywhere_desc")); cli.add_file_arg("transducer1_bin_file", false); cli.add_file_arg("transducer2_bin_file"); cli.add_file_arg("trimmed_bin_file"); diff --git a/lttoolbox/lt_expand.cc b/lttoolbox/lt_expand.cc index c472b77c..00d181c0 100644 --- a/lttoolbox/lt_expand.cc +++ b/lttoolbox/lt_expand.cc @@ -19,16 +19,18 @@ #include #include #include +#include int main(int argc, char *argv[]) { + I18n i18n {ALT_I18N_DATA, "lttoolbox"}; LtLocale::tryToSetLocale(); - CLI cli("expand the contents of a dictionary file", PACKAGE_VERSION); - cli.add_bool_arg('m', "keep-boundaries", "keep morpheme boundaries"); - cli.add_str_arg('v', "var", "set language variant", "VAR"); - cli.add_str_arg('a', "alt", "set alternative (monodix)", "ALT"); - cli.add_str_arg('l', "var-left", "set left language variant (bidix)", "VAR"); - cli.add_str_arg('r', "var-right", "set right language variant (bidix)", "VAR"); + CLI cli(i18n.format("lt_expand"), PACKAGE_VERSION); + cli.add_bool_arg('m', "keep-boundaries", i18n.format("keep_boundaries_desc")); + cli.add_str_arg('v', "var", i18n.format("var_desc"), "VAR"); + cli.add_str_arg('a', "alt", i18n.format("alt_desc"), "ALT"); + cli.add_str_arg('l', "var-left", i18n.format("var_left_desc"), "VAR"); + cli.add_str_arg('r', "var-right", i18n.format("var_right_desc"), "VAR"); cli.add_file_arg("dictionary_file", false); cli.add_file_arg("output_file"); cli.parse_args(argc, argv); diff --git a/lttoolbox/lt_invert.cc b/lttoolbox/lt_invert.cc index bbe55848..deab3469 100644 --- a/lttoolbox/lt_invert.cc +++ b/lttoolbox/lt_invert.cc @@ -18,13 +18,15 @@ #include #include #include +#include int main(int argc, char* argv[]) { + I18n i18n {ALT_I18N_DATA, "lttoolbox"}; LtLocale::tryToSetLocale(); - CLI cli("reverse the direction of a compiled transducer", PACKAGE_VERSION); - cli.add_bool_arg('h', "help", "print this message and exit"); + CLI cli(i18n.format("lt_invert_desc"), PACKAGE_VERSION); + cli.add_bool_arg('h', "help", i18n.format("help_desc")); cli.add_file_arg("in_bin"); cli.add_file_arg("out_bin"); cli.parse_args(argc, argv); diff --git a/lttoolbox/lt_locale.cc b/lttoolbox/lt_locale.cc index 9ee16add..0371c7a3 100644 --- a/lttoolbox/lt_locale.cc +++ b/lttoolbox/lt_locale.cc @@ -24,6 +24,7 @@ #include #endif +#include void LtLocale::tryToSetLocale() @@ -45,8 +46,7 @@ LtLocale::tryToSetLocale() return; } - std::cerr << "Warning: unsupported locale, fallback to \"C\"" << std::endl; - + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT60440", false); setlocale(LC_ALL, "C"); #endif #ifdef __CYGWIN__ diff --git a/lttoolbox/lt_paradigm.cc b/lttoolbox/lt_paradigm.cc index 3639c29e..c677eb3e 100644 --- a/lttoolbox/lt_paradigm.cc +++ b/lttoolbox/lt_paradigm.cc @@ -26,6 +26,8 @@ #include +#include + void expand(Transducer& inter, int state, const std::set& past_states, const std::vector& syms, const Alphabet& alpha, UFILE* out, std::set>& outset) @@ -142,13 +144,14 @@ void process(UStringView pattern, std::map& trans, int main(int argc, char* argv[]) { + I18n i18n {ALT_I18N_DATA, "lttoolbox"}; LtLocale::tryToSetLocale(); - CLI cli("generate listings from a compiled transducer", PACKAGE_VERSION); - cli.add_bool_arg('a', "analyser", "FST is an analyser (tags on the right)"); - cli.add_str_arg('e', "exclude", "disregard paths containing TAG", "TAG"); - cli.add_bool_arg('s', "sort", "alphabetize the paths for each pattern"); - cli.add_bool_arg('z', "null-flush", "flush output on \\0"); - cli.add_bool_arg('h', "help", "show this help and exit"); + CLI cli(i18n.format("lt_paradigm_desc"), PACKAGE_VERSION); + cli.add_bool_arg('a', "analyser", i18n.format("analyser_desc")); + cli.add_str_arg('e', "exclude", i18n.format("exclude_desc"), "TAG"); + cli.add_bool_arg('s', "sort", i18n.format("sort_desc")); + cli.add_bool_arg('z', "null-flush", i18n.format("null_flush_desc")); + cli.add_bool_arg('h', "help", i18n.format("help_desc")); cli.add_file_arg("FST", false); cli.add_file_arg("input"); cli.add_file_arg("output"); diff --git a/lttoolbox/lt_print.cc b/lttoolbox/lt_print.cc index 6619530b..d2be884f 100644 --- a/lttoolbox/lt_print.cc +++ b/lttoolbox/lt_print.cc @@ -18,14 +18,16 @@ #include #include #include +#include int main(int argc, char *argv[]) { + I18n i18n {ALT_I18N_DATA, "lttoolbox"}; LtLocale::tryToSetLocale(); - CLI cli("dump a transducer to text in ATT format", PACKAGE_VERSION); - cli.add_bool_arg('a', "alpha", "print transducer alphabet"); - cli.add_bool_arg('H', "hfst", "use HFST-compatible character escapes"); - cli.add_bool_arg('h', "help", "print this message and exit"); + CLI cli(i18n.format("lt_print_desc"), PACKAGE_VERSION); + cli.add_bool_arg('a', "alpha", i18n.format("alpha_desc")); + cli.add_bool_arg('H', "hfst", i18n.format("use_hfst_desc")); + cli.add_bool_arg('h', "help", i18n.format("help_desc")); cli.add_file_arg("bin_file"); cli.add_file_arg("output_file"); cli.parse_args(argc, argv); diff --git a/lttoolbox/lt_proc.cc b/lttoolbox/lt_proc.cc index a7f188f3..d9e86b6b 100644 --- a/lttoolbox/lt_proc.cc +++ b/lttoolbox/lt_proc.cc @@ -18,6 +18,7 @@ #include #include #include +#include void checkValidity(FSTProcessor const &fstp) { @@ -30,37 +31,38 @@ void checkValidity(FSTProcessor const &fstp) int main(int argc, char *argv[]) { LtLocale::tryToSetLocale(); + I18n i18n {ALT_I18N_DATA, "lttoolbox"}; - CLI cli("process a stream with a letter transducer", PACKAGE_VERSION); + CLI cli(i18n.format("lt_proc_desc"), PACKAGE_VERSION); cli.add_file_arg("fst_file", false); cli.add_file_arg("input_file"); cli.add_file_arg("output_file"); - cli.add_bool_arg('a', "analysis", "morphological analysis (default behavior)"); - cli.add_bool_arg('b', "bilingual", "lexical transfer"); - cli.add_bool_arg('c', "case-sensitive", "use the literal case of the incoming characters"); - cli.add_bool_arg('d', "debugged-gen", "morph. generation with all the stuff"); - cli.add_bool_arg('e', "decompose-nouns", "Try to decompound unknown words"); - cli.add_bool_arg('g', "generation", "morphological generation"); - cli.add_str_arg('i', "ignored-chars", "specify file with characters to ignore", "icx_file"); - cli.add_str_arg('r', "restore-chars", "specify file with characters to diacritic restoration", "rcx_file"); - cli.add_bool_arg('l', "tagged-gen", "morphological generation keeping lexical forms"); - cli.add_bool_arg('m', "tagged-nm-gen", "same as -l but without unknown word marks"); - cli.add_bool_arg('n', "non-marked-gen", "morph. generation without unknown word marks"); - cli.add_bool_arg('o', "surf-bilingual", "lexical transfer with surface forms"); - cli.add_bool_arg('p', "post-generation", "post-generation"); - cli.add_bool_arg('x', "inter-generation", "inter-generation"); - cli.add_bool_arg('s', "sao", "SAO annotation system input processing"); - cli.add_bool_arg('t', "transliteration", "apply transliteration dictionary"); - cli.add_bool_arg('v', "version", "version"); - cli.add_bool_arg('z', "null-flush", "flush output on the null character"); - cli.add_bool_arg('w', "dictionary-case", "use dictionary case instead of surface"); - cli.add_bool_arg('C', "careful-case", "use dictionary case if present, else surface"); - cli.add_bool_arg('I', "no-default-ignore", "skips loading the default ignore characters"); - cli.add_bool_arg('W', "show-weights", "Print final analysis weights (if any)"); - cli.add_str_arg('N', "analyses", "Output no more than N analyses (if the transducer is weighted, the N best analyses)", "N"); - cli.add_str_arg('L', "weight-classes", "Output no more than N best weight classes (where analyses with equal weight constitute a class)", "N"); - cli.add_str_arg('M', "compound-max-elements", "Set compound max elements", "N"); - cli.add_bool_arg('h', "help", "show this help"); + cli.add_bool_arg('a', "analysis", i18n.format("analysis_desc")); + cli.add_bool_arg('b', "bilingual", i18n.format("bilingual_desc")); + cli.add_bool_arg('c', "case-sensitive", i18n.format("case_sensitive_desc")); + cli.add_bool_arg('d', "debugged-gen", i18n.format("debugged_gen_desc")); + cli.add_bool_arg('e', "decompose-nouns", i18n.format("decompose_nouns_desc")); + cli.add_bool_arg('g', "generation", i18n.format("generation_desc")); + cli.add_str_arg('i', "ignored-chars", i18n.format("ignored_chars_desc"), "icx_file"); + cli.add_str_arg('r', "restore-chars", i18n.format("restore_chars_desc"), "rcx_file"); + cli.add_bool_arg('l', "tagged-gen", i18n.format("tagged_gen_desc")); + cli.add_bool_arg('m', "tagged-nm-gen", i18n.format("tagged_nm_gen_desc")); + cli.add_bool_arg('n', "non-marked-gen", i18n.format("non_marked_gen_desc")); + cli.add_bool_arg('o', "surf-bilingual", i18n.format("surf_bilingual_desc")); + cli.add_bool_arg('p', "post-generation", i18n.format("post_generation_desc")); + cli.add_bool_arg('x', "inter-generation", i18n.format("inter_generation_desc")); + cli.add_bool_arg('s', "sao", i18n.format("sao_desc")); + cli.add_bool_arg('t', "transliteration", i18n.format("transliteration_desc")); + cli.add_bool_arg('v', "version", i18n.format("version_desc")); + cli.add_bool_arg('z', "null-flush", i18n.format("null_flush_desc")); + cli.add_bool_arg('w', "dictionary-case", i18n.format("dictionary_case_desc")); + cli.add_bool_arg('C', "careful-case", i18n.format("careful_case_desc")); + cli.add_bool_arg('I', "no-default-ignore", i18n.format("no_default_ignore_desc")); + cli.add_bool_arg('W', "show-weights", i18n.format("show_weights_desc")); + cli.add_str_arg('N', "analyses", i18n.format("analyses_desc"), "N"); + cli.add_str_arg('L', "weight-classes", i18n.format("weight_classes_desc"), "N"); + cli.add_str_arg('M', "compound-max-elements", i18n.format("compound_max_elements_desc"), "N"); + cli.add_bool_arg('h', "help", i18n.format("help_desc")); cli.parse_args(argc, argv); FSTProcessor fstp; @@ -145,24 +147,21 @@ int main(int argc, char *argv[]) if (strs.find("analyses") != strs.end()) { int n = atoi(strs["analyses"].back().c_str()); if (n < 1) { - std::cerr << "Invalid or no argument for analyses count" << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80000", {"option"}, {"analyses"}, true); } fstp.setMaxAnalysesValue(n); } if (strs.find("weight-classes") != strs.end()) { int n = atoi(strs["weight-classes"].back().c_str()); if (n < 1) { - std::cerr << "Invalid or no argument for weight class count" << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80000", {"option"}, {"weight-classes"}, true); } fstp.setMaxWeightClassesValue(n); } if (strs.find("compound-max-elements") != strs.end()) { // Test int n = atoi(strs["compound-max-elements"].back().c_str()); if (n < 1) { - std::cerr << "Invalid or no argument for compound max elements" << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80000", {"option"}, {"compound-max-elements"}, true); } fstp.setCompoundMaxElements(n); } diff --git a/lttoolbox/lt_restrict.cc b/lttoolbox/lt_restrict.cc index 0aef3b4c..09260a91 100644 --- a/lttoolbox/lt_restrict.cc +++ b/lttoolbox/lt_restrict.cc @@ -19,6 +19,7 @@ #include #include #include +#include void get_symbol(const std::string& s, Alphabet& alpha, const char* prefix, sorted_vector& vec) @@ -36,13 +37,15 @@ void get_symbol(const std::string& s, Alphabet& alpha, const char* prefix, int main(int argc, char* argv[]) { + I18n i18n {ALT_I18N_DATA, "lttoolbox"}; + LtLocale::tryToSetLocale(); - CLI cli("remove paths from a transducer", PACKAGE_VERSION); - cli.add_bool_arg('m', "minimise", "minimise transducers after deleting paths"); - cli.add_str_arg('v', "var", "set language variant", "VAR"); - cli.add_str_arg('a', "alt", "set alternative (monodix)", "ALT"); - cli.add_str_arg('l', "var-left", "set left language variant (bidix)", "VAR"); - cli.add_str_arg('r', "var-right", "set right language variant (bidix)", "VAR"); + CLI cli(i18n.format("lt_restrict_desc"), PACKAGE_VERSION); + cli.add_bool_arg('m', "minimise", i18n.format("minimise_desc")); + cli.add_str_arg('v', "var", i18n.format("var_desc"), "VAR"); + cli.add_str_arg('a', "alt", i18n.format("alt_desc"), "ALT"); + cli.add_str_arg('l', "var-left", i18n.format("var_left_desc"), "VAR"); + cli.add_str_arg('r', "var-right", i18n.format("var_right_desc"), "VAR"); cli.add_file_arg("lr | rl", false); cli.add_file_arg("input_file"); cli.add_file_arg("output_file"); diff --git a/lttoolbox/lt_tmxcomp.cc b/lttoolbox/lt_tmxcomp.cc index 914f30ab..9c620314 100644 --- a/lttoolbox/lt_tmxcomp.cc +++ b/lttoolbox/lt_tmxcomp.cc @@ -21,24 +21,27 @@ #include #include #include +#include +#include void endProgram(char *name) { + I18n i18n {ALT_I18N_DATA, "lttoolbox"}; if(name != NULL) { - std::cout << basename(name) << " v" << PACKAGE_VERSION <<": build a letter transducer from a TMX translation memory" << std::endl; - std::cout << "USAGE: " << basename(name) << " [OPTIONS] lang1-lang2 tmx_file output_file" << std::endl; - std::cout << "Modes:" << std::endl; - std::cout << " lang1: input language" << std::endl; - std::cout << " lang2: output language" << std::endl; - std::cout << "Options:" << std::endl; + std::cout << basename(name) << " v" << PACKAGE_VERSION << ": " << i18n.format("lt_tmxcomp_desc") << std::endl; + std::cout << i18n.format("usage") << basename(name) << " [OPTIONS] lang1-lang2 tmx_file output_file" << std::endl; + std::cout << i18n.format("modes") << std::endl; + std::cout << " lang1: " << i18n.format("input_language") << std::endl; + std::cout << " lang2: " << i18n.format("output_language") << std::endl; + std::cout << i18n.format("options") << std::endl; #if HAVE_GETOPT_LONG - std::cout << " -o, --origin-code code the language code to be taken as lang1" << std::endl; - std::cout << " -m, --meta-code code the language code to be taken as lang2" << std::endl; + std::cout << " -o, --origin-code code " << i18n.format("origin_code_desc") << std::endl; + std::cout << " -m, --meta-code code " << i18n.format("meta_code_desc") << std::endl; #else - std::cout << " -o code the language code to be taken as lang1" << std::endl; - std::cout << " -m code the language code to be taken as lang2" << std::endl; + std::cout << " -o code " << i18n.format("origin_code_desc") << std::endl; + std::cout << " -m code " << i18n.format("meta_code_desc") << std::endl; #endif } exit(EXIT_FAILURE); @@ -107,8 +110,7 @@ int main(int argc, char *argv[]) FILE *output = fopen(argv[argc-1], "wb"); if(!output) { - std::cerr << "Error: Cannot open file '" << argv[2] << "'." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80050", {"file_name"}, {argv[2]}, true); } c.write(output); fclose(output); diff --git a/lttoolbox/lt_tmxproc.cc b/lttoolbox/lt_tmxproc.cc index 6347e3ba..9c249747 100644 --- a/lttoolbox/lt_tmxproc.cc +++ b/lttoolbox/lt_tmxproc.cc @@ -18,11 +18,13 @@ #include #include #include +#include int main(int argc, char *argv[]) { + I18n i18n {ALT_I18N_DATA, "lttoolbox"}; LtLocale::tryToSetLocale(); - CLI cli("process a stream with a letter transducer"); + CLI cli(i18n.format("lt_tmxproc_desc")); cli.add_file_arg("fst_file", false); cli.add_file_arg("input_file"); cli.add_file_arg("output_file"); diff --git a/lttoolbox/lt_trim.cc b/lttoolbox/lt_trim.cc index 2a18584d..04c3a42f 100644 --- a/lttoolbox/lt_trim.cc +++ b/lttoolbox/lt_trim.cc @@ -19,10 +19,13 @@ #include #include #include +#include +#include void trim(FILE* file_mono, FILE* file_bi, FILE* file_out, std::set match_sections) { + I18n i18n {ALT_I18N_DATA, "lttoolbox"}; Alphabet alph_mono; std::set letters_mono; std::map trans_mono; @@ -66,7 +69,7 @@ trim(FILE* file_mono, FILE* file_bi, FILE* file_out, std::set match_sec for (auto& it : trans_mono) { if (it.second.numberOfTransitions() == 0) { - std::cerr << "Warning: section " << it.first << " is empty! Skipping it..." << std::endl; + i18n.error("ALT60410", {"section_name"}, {icu::UnicodeString(it.first.data())}, false); continue; } if (moved_bi_transducers.count(it.first)) { @@ -79,21 +82,18 @@ trim(FILE* file_mono, FILE* file_bi, FILE* file_out, std::set match_sec alph_mono, alph_prefix); if (trimmed.hasNoFinals()) { - std::cerr << "Warning: section " << it.first << " had no final state after trimming! Skipping it..." << std::endl; + i18n.error("ALT60420", {"section_name"}, {icu::UnicodeString(it.first.data())}, false); continue; } trimmed.minimize(); trans_trim[it.first] = trimmed; } for (const auto &name : sections_unmatched) { - std::cerr << "Warning: section " << name << " was not found in both transducers! Skipping if in just one..." << std::endl; + i18n.error("ALT60450", {"section_name"}, {icu::UnicodeString(name.data())}, false); } if (trans_trim.empty()) { - std::cerr << "Error: Trimming gave empty transducer!" << std::endl; - std::cerr << "Hint: There are no words in bilingual dictionary that match " - "words in both monolingual dictionaries?" << std::endl; - exit(EXIT_FAILURE); + i18n.error("ALT80460", true); } writeTransducerSet(file_out, letters_mono, alph_mono, trans_trim); @@ -102,12 +102,13 @@ trim(FILE* file_mono, FILE* file_bi, FILE* file_out, std::set match_sec int main(int argc, char *argv[]) { + I18n i18n {ALT_I18N_DATA, "lttoolbox"}; LtLocale::tryToSetLocale(); - CLI cli("trim a transducer to another transducer", PACKAGE_VERSION); + CLI cli(i18n.format("lt_trim_desc"), PACKAGE_VERSION); cli.add_file_arg("analyser_bin_file", false); cli.add_file_arg("bidix_bin_file"); cli.add_file_arg("trimmed_bin_file"); - cli.add_str_arg('s', "match-section", "A section with this name (id@type) will only be trimmed against a section with the same name. This argument may be used multiple times.", "section_name"); + cli.add_str_arg('s', "match-section", i18n.format("match_section_desc"), "section_name"); cli.parse_args(argc, argv); auto strs = cli.get_strs(); diff --git a/lttoolbox/pattern_list.cc b/lttoolbox/pattern_list.cc index b476de30..4e5e8f8b 100644 --- a/lttoolbox/pattern_list.cc +++ b/lttoolbox/pattern_list.cc @@ -21,6 +21,7 @@ #include #include +#include void PatternList::copy(PatternList const &o) @@ -76,8 +77,7 @@ PatternList::beginSequence() { if(sequence) { - std::cerr << "Error: opening an unended sequence" << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80470", true); } sequence = true; sequence_data.clear(); @@ -88,8 +88,7 @@ PatternList::endSequence() { if(!sequence) { - std::cerr << "Error: ending an unopened sequence" << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80471", true); } sequence = false; @@ -191,8 +190,7 @@ PatternList::insert(int id, int otherid) { if(!sequence) { - std::cerr << "Error: using labels outside of a sequence" << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80490", true); } sequence_id = id; diff --git a/lttoolbox/regexp_compiler.cc b/lttoolbox/regexp_compiler.cc index 18c7beec..6e3c23d4 100644 --- a/lttoolbox/regexp_compiler.cc +++ b/lttoolbox/regexp_compiler.cc @@ -19,6 +19,7 @@ #include #include #include +#include #define FIN_FICHERO INT_MAX @@ -87,15 +88,13 @@ RegexpCompiler::isReserved(int const t) void RegexpCompiler::error() { - std::cerr << "Error parsing regexp" << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80500", true); } void RegexpCompiler::errorConsuming(int const t) { - std::cerr << "Error parsing regexp" << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80500", true); } void diff --git a/lttoolbox/serialiser.h b/lttoolbox/serialiser.h index a155d915..06f39f7c 100644 --- a/lttoolbox/serialiser.h +++ b/lttoolbox/serialiser.h @@ -30,6 +30,7 @@ #include #include #include +#include namespace { template @@ -154,35 +155,29 @@ void Serialiser >::serialise( template void int_serialise(const integer_type &SerialisedType_, std::ostream &Output) { - try { - Output.put(compressedSize(SerialisedType_)); + Output.put(compressedSize(SerialisedType_)); + if (!Output) { + std::stringstream what_; + what_ << std::hex << /* [1] */ +compressedSize(SerialisedType_) << std::dec; + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80660", {"size_a", "size_b"}, + {std::to_string(sizeof(integer_type)).c_str(), what_.str().c_str()}, true); + } + + for (unsigned char CompressedSize = compressedSize(SerialisedType_); + CompressedSize != 0; Output.put(static_cast( + SerialisedType_ >> + std::numeric_limits::digits * --CompressedSize))) { if (!Output) { std::stringstream what_; - what_ << "can't serialise size " << std::hex - << /* [1] */ +compressedSize(SerialisedType_) << std::dec; - throw SerialisationException(what_.str().c_str()); + what_ << std::hex << /* [1] */ +static_cast( + SerialisedType_ >> + std::numeric_limits::digits * + CompressedSize) << std::dec; + + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80670", {"size", "byte"}, + {std::to_string(sizeof(integer_type)).c_str(), what_.str().c_str()}, true); } - - for (unsigned char CompressedSize = compressedSize(SerialisedType_); - CompressedSize != 0; Output.put(static_cast( - SerialisedType_ >> - std::numeric_limits::digits * --CompressedSize))) { - if (!Output) { - std::stringstream what_; - what_ << "can't serialise byte " << std::hex - << /* [1] */ +static_cast( - SerialisedType_ >> - std::numeric_limits::digits * - CompressedSize) << std::dec; - throw SerialisationException(what_.str().c_str()); - } - } - } catch (const SerialisationException &exc) { - std::stringstream what_; - what_ << "can't serialise const " << sizeof(integer_type) << " byte integer type: " - << exc.what(); - throw SerialisationException(what_.str().c_str()); } } diff --git a/lttoolbox/string_utils.cc b/lttoolbox/string_utils.cc index 26356743..499c0626 100644 --- a/lttoolbox/string_utils.cc +++ b/lttoolbox/string_utils.cc @@ -5,6 +5,8 @@ #include #include #include +#include +#include UStringView StringUtils::trim(UStringView str) @@ -146,7 +148,7 @@ StringUtils::stoi(const UString& str) int ret; int c = u_sscanf(str.c_str(), "%d", &ret); if (c != 1) { - throw std::invalid_argument("unable to parse int"); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80600", {"type"}, {"int"}, true); } return ret; } @@ -165,7 +167,7 @@ StringUtils::stod(const UString& str) c = 1; } if (c != 1) { - throw std::invalid_argument("unable to parse float"); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80600", {"type"}, {"float"}, true); } return ret; } @@ -177,9 +179,8 @@ StringUtils::tolower(UStringView str) UErrorCode err = U_ZERO_ERROR; u_strToLower(buf, str.size()*2, str.data(), str.size(), NULL, &err); if (U_FAILURE(err)) { - std::cerr << "Error: unable to lowercase string '" << str << "'.\n"; - std::cerr << "error code: " << u_errorName(err) << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80510", {"string", "errer_name"}, + {icu::UnicodeString(str.data()), u_errorName(err)}, true); } return buf; } @@ -191,9 +192,8 @@ StringUtils::toupper(UStringView str) UErrorCode err = U_ZERO_ERROR; u_strToUpper(buf, str.size()*2, str.data(), str.size(), NULL, &err); if (U_FAILURE(err)) { - std::cerr << "Error: unable to uppercase string '" << str << "'.\n"; - std::cerr << "error code: " << u_errorName(err) << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80511", {"string", "errer_name"}, + {icu::UnicodeString(str.data()), u_errorName(err)}, true); } return buf; } @@ -205,9 +205,8 @@ StringUtils::totitle(UStringView str) UErrorCode err = U_ZERO_ERROR; u_strToTitle(buf, str.size()*2, str.data(), str.size(), NULL, NULL, &err); if (U_FAILURE(err)) { - std::cerr << "Error: unable to titlecase string '" << str << "'.\n"; - std::cerr << "error code: " << u_errorName(err) << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80512", {"string", "errer_name"}, + {icu::UnicodeString(str.data()), u_errorName(err)}, true); } return buf; } @@ -272,10 +271,9 @@ StringUtils::caseequal(UStringView a, UStringView b) UErrorCode err = U_ZERO_ERROR; int cmp = u_strCaseCompare(a.data(), a.size(), b.data(), b.size(), 0, &err); if (U_FAILURE(err)) { - std::cerr << "Error: caseless string comparison failed on '"; - std::cerr << a << "' and '" << b << "'" << std::endl; - std::cerr << "error code: " << u_errorName(err) << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80513", {"string_a", "string_b", "errer_name"}, + {icu::UnicodeString(a.data()), + icu::UnicodeString(b.data()), u_errorName(err)}, true); } return (cmp == 0); } diff --git a/lttoolbox/tmx_compiler.cc b/lttoolbox/tmx_compiler.cc index dee2fe1e..e78b53d4 100644 --- a/lttoolbox/tmx_compiler.cc +++ b/lttoolbox/tmx_compiler.cc @@ -23,6 +23,7 @@ #include #include #include +#include TMXCompiler::TMXCompiler() : @@ -48,8 +49,7 @@ TMXCompiler::parse(std::string const &file, UStringView lo, UStringView lm) reader = xmlReaderForFile(file.c_str(), NULL, 0); if(reader == NULL) { - std::cerr << "Error: Cannot open '" << file << "'." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80050", {"file_name"}, {file.c_str()}, true); } int ret = xmlTextReaderRead(reader); @@ -61,7 +61,7 @@ TMXCompiler::parse(std::string const &file, UStringView lo, UStringView lm) if(ret != 0) { - std::cerr << "Error: Parse error at the end of input." << std::endl; + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80110", false); } xmlFreeTextReader(reader); @@ -76,9 +76,9 @@ TMXCompiler::requireEmptyError(UStringView name) { if(!xmlTextReaderIsEmptyElement(reader)) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Non-empty element '<" << name << ">' should be empty." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80160", {"file_name", "line_number", "name"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), icu::UnicodeString(name.data())}, true); } } @@ -105,9 +105,8 @@ TMXCompiler::skipBlanks(UString &name) { if(!allBlanks()) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid construction." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80190", {"file_name", "line_number"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, xmlTextReaderGetParserLineNumber(reader)}, true); } } @@ -128,9 +127,8 @@ TMXCompiler::skip(UString &name, UStringView elem) { if(!allBlanks()) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid construction." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80190", {"file_name", "line_number"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, xmlTextReaderGetParserLineNumber(reader)}, true); } } xmlTextReaderRead(reader); @@ -139,9 +137,9 @@ TMXCompiler::skip(UString &name, UStringView elem) if(name != elem) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Expected '<" << elem << ">'." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80200", {"file_name", "line_number", "slash_element"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), icu::UnicodeString(elem.data())}, true); } } @@ -156,11 +154,11 @@ TMXCompiler::requireAttribute(UStringView value, UStringView attrname, UStringVi { if(value.empty()) { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): '<" << elemname; - std::cerr << "' element must specify non-void '"; - std::cerr << attrname << "' attribute." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80250", {"file_name", "line_number", "element_name", "attr_name"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(elemname.data()), + icu::UnicodeString(attrname.data())}, true); } } @@ -406,9 +404,10 @@ TMXCompiler::procNode() } else { - std::cerr << "Error (" << xmlTextReaderGetParserLineNumber(reader); - std::cerr << "): Invalid node '<" << name << ">'." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80280", {"file_name", "line_number", "element_name"}, + {(char*)xmlTextReaderCurrentDoc(reader)->URL, + xmlTextReaderGetParserLineNumber(reader), + icu::UnicodeString(name.data())}, true); } } diff --git a/lttoolbox/trans_exe.cc b/lttoolbox/trans_exe.cc index a1b2c29e..ce4e17d6 100644 --- a/lttoolbox/trans_exe.cc +++ b/lttoolbox/trans_exe.cc @@ -75,7 +75,7 @@ TransExe::read(FILE *input, Alphabet const &alphabet) if (strncmp(header, HEADER_TRANSDUCER, 4) == 0) { auto features = read_le(input); if (features >= TDF_UNKNOWN) { - throw std::runtime_error("Transducer has features that are unknown to this version of lttoolbox - upgrade!"); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80590", true); } read_weights = (features & TDF_WEIGHTS); } diff --git a/lttoolbox/transducer.cc b/lttoolbox/transducer.cc index a7fe0cf0..8e443060 100644 --- a/lttoolbox/transducer.cc +++ b/lttoolbox/transducer.cc @@ -25,6 +25,7 @@ #include #include #include +#include int @@ -169,9 +170,7 @@ Transducer::linkStates(int const source, int const target, } else { - std::cerr << "Error: Trying to link nonexistent states (" << source; - std::cerr << ", " << target << ", " << tag << ")" << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80550", {"source", "target", "tag"}, {source, target, tag}, true); } } @@ -294,8 +293,7 @@ Transducer::joinFinals(int const epsilon_tag) } else if(finals.size() == 0) { - std::cerr << "Error: empty set of final states" << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80560",{}, {}, true); } } @@ -591,7 +589,7 @@ Transducer::read(FILE *input, int const decalage) if (strncmp(header, HEADER_TRANSDUCER, 4) == 0) { auto features = read_le(input); if (features >= TDF_UNKNOWN) { - throw std::runtime_error("Transducer has features that are unknown to this version of lttoolbox - upgrade!"); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80590", true); } read_weights = (features & TDF_WEIGHTS); } @@ -1094,8 +1092,7 @@ Transducer::trim(Transducer &trimmer, trimmer_preplus_next = trimmer_preplus; if(states_this_trimmed.find(current) == states_this_trimmed.end()) { - std::cerr <<"Error: couldn't find "< #include #include +#include xmlTextReaderPtr XMLParseUtil::open_or_exit(const char* fname) { xmlTextReaderPtr reader = xmlReaderForFile(fname, NULL, 0); if (reader == NULL) { - std::cerr << "Error: cannot open '" << fname << "' for reading." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80050", {"file_name"}, {fname}, true); } return reader; } diff --git a/lttoolbox/xml_walk_util.cc b/lttoolbox/xml_walk_util.cc index e4afe730..a3722660 100644 --- a/lttoolbox/xml_walk_util.cc +++ b/lttoolbox/xml_walk_util.cc @@ -1,5 +1,6 @@ #include #include +#include children::children(xmlNode* node_) : node(node_), cur(node->children) @@ -59,8 +60,7 @@ load_xml(const char* fname) { xmlDoc* doc = xmlReadFile(fname, NULL, 0); if (doc == nullptr) { - std::cerr << "Error: Could not parse file '" << fname << "'." << std::endl; - exit(EXIT_FAILURE); + I18n(ALT_I18N_DATA, "lttoolbox").error("ALT80050", {"file_name"}, {fname}, true); } return xmlDocGetRootElement(doc); } diff --git a/python/setup.py.in b/python/setup.py.in index d3bbfd21..22ff5e8f 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -6,7 +6,8 @@ Setup for SWIG Python bindings for lttoolbox from distutils.core import Extension, setup from sys import platform -compile_args = '@CXXFLAGS@'.split() + '@ICU_CFLAGS@'.split() +ALT_I18N_DATA = '-DALT_I18N_DATA=\"@prefix@/share/@PACKAGE_NAME@/lttoolbox.dat\"' +compile_args = '@CXXFLAGS@'.split() + '@ICU_CFLAGS@'.split() + ALT_I18N_DATA.split() link_args = [] if platform == 'darwin': compile_args += ['-stdlib=libc++', '-mmacosx-version-min=10.7']