diff --git a/dl_translate/_pairs.py b/dl_translate/_pairs.py index d593278..6e14a66 100644 --- a/dl_translate/_pairs.py +++ b/dl_translate/_pairs.py @@ -49,6 +49,7 @@ ("Javanese", "jv"), ("Georgian", "ka"), ("Kazakh", "kk"), + ("Khmer", "km"), ("Central Khmer", "km"), ("Kannada", "kn"), ("Korean", "ko"), diff --git a/dl_translate/lang/__init__.py b/dl_translate/lang/__init__.py index f390301..1254611 100644 --- a/dl_translate/lang/__init__.py +++ b/dl_translate/lang/__init__.py @@ -1,2 +1,2 @@ -from .mbart50 import * -from . import m2m100 +from .m2m100 import * +from . import m2m100, mbart50 diff --git a/dl_translate/lang/m2m100.py b/dl_translate/lang/m2m100.py index 94d7f49..b85a1cd 100644 --- a/dl_translate/lang/m2m100.py +++ b/dl_translate/lang/m2m100.py @@ -48,6 +48,7 @@ JAVANESE = "Javanese" GEORGIAN = "Georgian" KAZAKH = "Kazakh" +KHMER = "Khmer" CENTRAL_KHMER = "Central Khmer" KANNADA = "Kannada" KOREAN = "Korean" diff --git a/docs/available_languages.md b/docs/available_languages.md index 7f81bac..3490157 100644 --- a/docs/available_languages.md +++ b/docs/available_languages.md @@ -109,6 +109,7 @@ This page gives all the languages available for each model family. - Javanese (jv) - Georgian (ka) - Kazakh (kk) +- Khmer (km) - Central Khmer (km) - Kannada (kn) - Korean (ko) diff --git a/scripts/langs_coverage/m2m100.json b/scripts/langs_coverage/m2m100.json index ab5a2ed..411fd6e 100644 --- a/scripts/langs_coverage/m2m100.json +++ b/scripts/langs_coverage/m2m100.json @@ -48,6 +48,7 @@ "Javanese": "jv", "Georgian": "ka", "Kazakh": "kk", + "Khmer": "km", "Central Khmer": "km", "Kannada": "kn", "Korean": "ko", diff --git a/setup.py b/setup.py index f44bdce..05f99ed 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="dl-translate", - version="0.2.1", + version="0.2.2", author="Xing Han Lu", author_email="github@xinghanlu.com", description="A deep learning-based translation library built on Huggingface transformers", diff --git a/tests/quick/test_lang.py b/tests/quick/test_lang.py index 3394d9a..be91236 100644 --- a/tests/quick/test_lang.py +++ b/tests/quick/test_lang.py @@ -1,12 +1,17 @@ import dl_translate as dlt -from dl_translate._pairs import _PAIRS_MBART50 +from dl_translate._pairs import _PAIRS_MBART50, _PAIRS_M2M100 def test_lang(): - for l, _ in _PAIRS_MBART50: - assert getattr(dlt.lang, l.upper()) == l + for l, _ in _PAIRS_M2M100: + assert getattr(dlt.lang, l.upper().replace(" ", "_")) == l + + +def test_lang_m2m100(): + for l, _ in _PAIRS_M2M100: + assert getattr(dlt.lang.m2m100, l.upper().replace(" ", "_")) == l def test_lang_mbart50(): for l, _ in _PAIRS_MBART50: - assert getattr(dlt.lang.mbart50, l.upper()) == l + assert getattr(dlt.lang.mbart50, l.upper().replace(" ", "_")) == l