diff --git a/LICENSE.txt b/LICENSE.txt
new file mode 100644
index 0000000..3754756
--- /dev/null
+++ b/LICENSE.txt
@@ -0,0 +1,17 @@
+MIT License
+Copyright (c) 2018 YOUR NAME
+Permission is hereby granted, free of charge, to any person obtaining a copy
+of this software and associated documentation files (the "Software"), to deal
+in the Software without restriction, including without limitation the rights
+to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the Software is
+furnished to do so, subject to the following conditions:
+The above copyright notice and this permission notice shall be included in all
+copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+SOFTWARE.
\ No newline at end of file
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..c22dcac
--- /dev/null
+++ b/README.md
@@ -0,0 +1,72 @@
+# Dependencies
+
+```bash
+unidecode
+emoji
+*kenlm
+fuzzy
+scikit-learn
+pyxdameraulevenshtein
+pygtrie
+numpy
+```
+
+*Install kenlm wrapper from github:
+
+```bash
+pip install https://github.com/kpu/kenlm/archive/master.zip
+```
+
+# Usage
+
+```python
+
+ # Load english dictionary
+ english_vocab = load_english_vocab(...)
+ english_vocab.update(load_english_vocab(...))
+
+ # Load bilingual lexicon dictionary
+ foreign_dict = load_lexicon_norm(...)
+
+ # Load target language model
+ lm = kenlm.Model(...)
+
+ # Train a ngram model if needed
+ # ngram_train(foreign_dict, 'hin-tfidf-ngram_algo')
+
+ # Ulf's romanizer
+ romanizer = partial(romanize,
+ romanization_path=...,
+ language_code="hin")
+
+ soundex_inst = fuzzy.DMetaphone()
+ soundex_algo = lambda x: soundex_inst(x)[0].decode('utf-8') if soundex_inst(x)[0] is not None else x
+ english_encoded_vocab = {e: soundex_algo(e) for e in english_vocab if e}
+
+ # load the ngram model
+ ngram_algo = pickle.loads(open(..., "rb").read())
+
+ soundex_model = partial(soundex_similarity,
+ encoded_english_vocab=english_encoded_vocab,
+ romanizer=romanizer,
+ soundex=soundex_algo)
+
+ lev_model = partial(lev_similarity, backup=soundex_model)
+ ngram_model = partial(ngram_similarity, model=ngram_algo, backup=lev_model)
+ final_model = partial(exact_similarity, backup=ngram_model)
+
+ for line in open(...):
+ source, target = line.strip('\n').split('\t')
+ oovs = extract_oov(target, source, english_vocab=english_vocab, romanization=True)
+ best, mods = translate_oov(target, oovs, foreign_dict, final_model, lm.score)
+
+ if best != target:
+
+ for oov in oovs:
+ alt = list(mods[oov].keys())[0]
+ trans = mods[oov][alt]
+ debug.debug(f"{romanizer(oov)} -> {romanizer(alt)} : {list(trans)}")
+
+ debug.debug(best)
+ debug.debug("*"*100)
+```
\ No newline at end of file
diff --git a/elisa_dnt/__main__.py b/elisa_dnt/__main__.py
index 591694e..ed07e1a 100644
--- a/elisa_dnt/__main__.py
+++ b/elisa_dnt/__main__.py
@@ -20,20 +20,21 @@
parser.add_argument('--fa_output', type=str,
help="[Post]File path to the output file")
- parser.add_argument('--fb_src', nargs=1,
+ parser.add_argument('--fb_src', type=str,
help='[Pre]File path to the source file')
- parser.add_argument('--fb_src_output', nargs=1,
+ parser.add_argument('--fb_src_output', type=str,
help='[Pre]File path to the source output file')
- parser.add_argument('--fb_ini_output', nargs=1,
+ parser.add_argument('--fb_ini_output', type=str,
help='[Pre]File path to the source ini file')
- parser.add_argument('--fb_tgt', nargs=1, required=False,
+ parser.add_argument('--fb_tgt', type=str, required=False,
help='[Pre]File path to the target file')
parser.add_argument('--pb_cross', dest='pb_cross', default=False, action='store_true',
help='[Pre]Parameter for whether use reference target file for regex extraction')
- parser.add_argument('--fb_visual', nargs=1,
+ parser.add_argument('--fb_visual', type=str,
help="[Pre]File path to visualization html file")
args = parser.parse_args()
+ print(args)
scheme = args.p_scheme
@@ -41,11 +42,11 @@
restore(args.fa_dnt_src, args.fa_dnt_ini, args.fa_output, args.p_scheme)
exit(0)
- RULES = {key: re.compile(value) for key, value in rules[args.scheme].items()}
- RULES["comb"] = re.compile("(" + "|".join(rules[args.scheme].values()) + ")+")
+ RULES = {key: re.compile(value) for key, value in rules[args.p_scheme].items()}
+ RULES["comb"] = re.compile("(" + "|".join(rules[args.p_scheme].values()) + ")+")
if args.fb_visual:
- with open(args.fb_visual[0], "w") as o:
+ with open(args.fb_visual, "w") as o:
o.write("""