From 8316b88361abd077fbeed9cfdcc4c6b305bd52ca Mon Sep 17 00:00:00 2001 From: Hareesh Bahuleyan Date: Tue, 27 May 2025 18:18:18 +0200 Subject: [PATCH 1/3] Translation notebook includes LLM as judge eval --- notebooks/translation_demo_wmt24pp.ipynb | 361 ++++++----------------- 1 file changed, 96 insertions(+), 265 deletions(-) diff --git a/notebooks/translation_demo_wmt24pp.ipynb b/notebooks/translation_demo_wmt24pp.ipynb index 0672fd51d..8383c38c4 100644 --- a/notebooks/translation_demo_wmt24pp.ipynb +++ b/notebooks/translation_demo_wmt24pp.ipynb @@ -152,34 +152,20 @@ " Tierra del Sol is pleased to present \"Vicente Siso: Memories of the Land and Water\" at the new gallery location in West Hollywood. Siso has been an artist in the Studio Arts Program since 2012, this marks his debut solo exhibition. Siso was born 1962 in Madrid and raised between Venezuela, Trinidad and Miami; he moved with his family to Southern California in his early 20s.\n", " „Vicente Siso: Memories of the Land and Water“ – das ist der Titel der Ausstellung im neuen Standort der Galerie „Tierra del Sol“ in West Hollywood. Siso ist seit 2012 als Künstler im Studio Arts Progam dabei und zeigt jetzt seine erste Solo-Ausstellung. Siso wurde 1962 in Madrid geboren und ist in Venezuela, Trinidad und Miami aufgewachsen. Anfang zwanzig zog er mit seiner Familie nach Südkalifornien.\n", " \n", - " \n", - " 3\n", - " Masterfully working across subject matter, Siso has generated a prolific series of landscapes, portraits, and still-life works rendered in either acrylic, pastel, pencil or watercolor. Drawing from family portraits, his own reference photographs, and recollection, his colorful compositions demonstrate his range of interests and skill across media. Siso's tropical landscapes and seascapes reflect the geographies of his past, employing rich patterns and incorporating people to make meaningful connections between culture, memory and the environment. Siso titles his artworks in a mix of Spanish and English, signifying the celebrated and integral complexities of his life in Los Angeles County. \"Vicente Siso: Memories of the Land and Water\" opens on Saturday, Jan. 13, with a reception from 6-8 p.m. The exhibition is on view through Sunday, March 3.\n", - " In zahlreichen Serien von Landschaften, Porträts und Stillleben in Acryl, Pastell, Bleistift oder Aquarell zeigt Siso sein meisterhaftes Können. Inspiriert von Familienporträts, seinen eigenen Referenzfotografien und durch Erinnerungen, beweisen seine farbenfreudigen Kompositionen die ganze Bandbreite seiner Interessen und seines Könnens in unterschiedlichen Medien. Seine tropischen Landschaften und Seestücke zeigen die Orte seiner Vergangenheit in detaillierten Mustern. Auch Personen werden einbezogen, um bedeutungsvolle Verbindungen zwischen Kultur, Erinnerung und Natur zu kreieren. Siso betitelt seine Kunstwerke mit einer Mischung aus Spanisch und Englisch – ein Zeichen für die gefeierten und so wichtigen Komplexitäten seines Lebens im Los Angeles County. „Vicente Siso: Memories of the Land and Water“ wird am Samstag, dem 13. Januar, mit einem Empfang von 18 bis 20 Uhr eröffnet. Die Ausstellung wird bis Sonntag, den 3. März, gezeigt.\n", - " \n", - " \n", - " 4\n", - " The Tierra del Sol Gallery is located at 7414 Santa Monica Blvd. For information, visit tierradelsolgallery.org.\n", - " Galerie „Tierra del Sol“, 7414 Santa Monica Blvd. Weitere Informationen finden Sie unter tierradelsolgallery.org.\n", - " \n", " \n", "\n", "" ], "text/plain": [ - " source \\\n", - "0 Siso's depictions of land, water center new gallery exhibition \n", - "1 \"People Swimming in the Swimming Pool\" from 2022 is one Vicente Siso artwork that will display at Tierra del Sol Gallery beginning Jan. 13. (photo courtesy of Vicente Siso) \n", - "2 Tierra del Sol is pleased to present \"Vicente Siso: Memories of the Land and Water\" at the new gallery location in West Hollywood. Siso has been an artist in the Studio Arts Program since 2012, this marks his debut solo exhibition. Siso was born 1962 in Madrid and raised between Venezuela, Trinidad and Miami; he moved with his family to Southern California in his early 20s. \n", - "3 Masterfully working across subject matter, Siso has generated a prolific series of landscapes, portraits, and still-life works rendered in either acrylic, pastel, pencil or watercolor. Drawing from family portraits, his own reference photographs, and recollection, his colorful compositions demonstrate his range of interests and skill across media. Siso's tropical landscapes and seascapes reflect the geographies of his past, employing rich patterns and incorporating people to make meaningful connections between culture, memory and the environment. Siso titles his artworks in a mix of Spanish and English, signifying the celebrated and integral complexities of his life in Los Angeles County. \"Vicente Siso: Memories of the Land and Water\" opens on Saturday, Jan. 13, with a reception from 6-8 p.m. The exhibition is on view through Sunday, March 3. \n", - "4 The Tierra del Sol Gallery is located at 7414 Santa Monica Blvd. For information, visit tierradelsolgallery.org. \n", + " source \\\n", + "0 Siso's depictions of land, water center new gallery exhibition \n", + "1 \"People Swimming in the Swimming Pool\" from 2022 is one Vicente Siso artwork that will display at Tierra del Sol Gallery beginning Jan. 13. (photo courtesy of Vicente Siso) \n", + "2 Tierra del Sol is pleased to present \"Vicente Siso: Memories of the Land and Water\" at the new gallery location in West Hollywood. Siso has been an artist in the Studio Arts Program since 2012, this marks his debut solo exhibition. Siso was born 1962 in Madrid and raised between Venezuela, Trinidad and Miami; he moved with his family to Southern California in his early 20s. \n", "\n", - " target \n", - "0 Sisos Darstellungen von Land und Wasser in neuer Ausstellung \n", - "1 „People Swimming in the Swimming Pool“ aus dem Jahr 2022 ist ein Kunstwerk von Vicente Siso, das ab dem 13. Januar in der Galerie „Tierra del Sol“ ausgestellt wird (Foto bereitgestellt von Vicente Siso) \n", - "2 „Vicente Siso: Memories of the Land and Water“ – das ist der Titel der Ausstellung im neuen Standort der Galerie „Tierra del Sol“ in West Hollywood. Siso ist seit 2012 als Künstler im Studio Arts Progam dabei und zeigt jetzt seine erste Solo-Ausstellung. Siso wurde 1962 in Madrid geboren und ist in Venezuela, Trinidad und Miami aufgewachsen. Anfang zwanzig zog er mit seiner Familie nach Südkalifornien. \n", - "3 In zahlreichen Serien von Landschaften, Porträts und Stillleben in Acryl, Pastell, Bleistift oder Aquarell zeigt Siso sein meisterhaftes Können. Inspiriert von Familienporträts, seinen eigenen Referenzfotografien und durch Erinnerungen, beweisen seine farbenfreudigen Kompositionen die ganze Bandbreite seiner Interessen und seines Könnens in unterschiedlichen Medien. Seine tropischen Landschaften und Seestücke zeigen die Orte seiner Vergangenheit in detaillierten Mustern. Auch Personen werden einbezogen, um bedeutungsvolle Verbindungen zwischen Kultur, Erinnerung und Natur zu kreieren. Siso betitelt seine Kunstwerke mit einer Mischung aus Spanisch und Englisch – ein Zeichen für die gefeierten und so wichtigen Komplexitäten seines Lebens im Los Angeles County. „Vicente Siso: Memories of the Land and Water“ wird am Samstag, dem 13. Januar, mit einem Empfang von 18 bis 20 Uhr eröffnet. Die Ausstellung wird bis Sonntag, den 3. März, gezeigt. \n", - "4 Galerie „Tierra del Sol“, 7414 Santa Monica Blvd. Weitere Informationen finden Sie unter tierradelsolgallery.org. " + " target \n", + "0 Sisos Darstellungen von Land und Wasser in neuer Ausstellung \n", + "1 „People Swimming in the Swimming Pool“ aus dem Jahr 2022 ist ein Kunstwerk von Vicente Siso, das ab dem 13. Januar in der Galerie „Tierra del Sol“ ausgestellt wird (Foto bereitgestellt von Vicente Siso) \n", + "2 „Vicente Siso: Memories of the Land and Water“ – das ist der Titel der Ausstellung im neuen Standort der Galerie „Tierra del Sol“ in West Hollywood. Siso ist seit 2012 als Künstler im Studio Arts Progam dabei und zeigt jetzt seine erste Solo-Ausstellung. Siso wurde 1962 in Madrid geboren und ist in Venezuela, Trinidad und Miami aufgewachsen. Anfang zwanzig zog er mit seiner Familie nach Südkalifornien. " ] }, "execution_count": 5, @@ -190,7 +176,7 @@ "source": [ "df_translation = pd.DataFrame(dataset[1:])[[\"source\", \"target\"]]\n", "print(f\"Loaded {len(df_translation)} sentence pairs\")\n", - "df_translation.head()" + "df_translation.head(3)" ] }, { @@ -225,7 +211,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Dataset uploaded and has ID: 6c82b36a-64b2-42ff-9e7f-cf1e6768bc44\n" + "Dataset uploaded and has ID: d9e23676-8477-4b8c-bb2d-615d9d7e3414\n" ] } ], @@ -260,7 +246,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Experiment created and has ID: 34\n" + "Experiment created and has ID: 12\n" ] } ], @@ -273,7 +259,7 @@ " \"source_language\": source_language,\n", " \"target_language\": target_language,\n", "}\n", - "max_samples = 5\n", + "max_samples = 3\n", "\n", "request = ExperimentCreate(\n", " name=f\"WMT24++ Demo Experiment {source_language} to {target_language}\",\n", @@ -310,10 +296,10 @@ "name": "stdout", "output_type": "stream", "text": [ - "Created workflow Translation with gpt-4o-mini with ID 6b7fb6995e93451b89d63a6af6b5c1f8 for model gpt-4o-mini\n", - "Created workflow Translation with ollama qwen2.5 with ID b08d8b4b45e34742bfab155a6b5a0eb6 for model qwen2.5\n", - "Created workflow Translation with HF m2m100 with ID d999309f328c4169b108fcb5c8cc6a5a for model facebook/m2m100_418M\n", - "Created workflow Translation with Helsinki-NLP/opus-mt with ID 92ee08d58b9e4a4eb69ae56727101a0d for model Helsinki-NLP/opus-mt-en-de\n" + "Created workflow Translation with gpt-4o-mini with ID b8227c2493d845bd8b60b75ac76fbfe6 for model gpt-4o-mini\n", + "Created workflow Translation with ollama qwen2.5 with ID 374a11707d164933bf8c5e60349c509a for model qwen2.5\n", + "Created workflow Translation with HF m2m100 with ID 98a3a72fa8ad4418bf0d80c601ea0cfa for model facebook/m2m100_418M\n", + "Created workflow Translation with Helsinki-NLP/opus-mt with ID e6caaa3a0eb04c7dad68807b3bc7b0e6 for model Helsinki-NLP/opus-mt-en-de\n" ] } ], @@ -326,7 +312,16 @@ "Only generate the translated text. No additional text or explanation needed.\n", "\"\"\"\n", "batch_size = 1\n", - "metrics = [\"bleu\", \"meteor\", \"comet\"]\n", + "metrics = [\n", + " \"bleu\",\n", + " \"meteor\",\n", + " \"comet\",\n", + " # LLM-as-a-judge metrics:\n", + " # Configure \"llm_as_judge\" argument if a judge is a different model\n", + " # See: https://github.com/mozilla-ai/lumigator/pull/1321\n", + " \"g_eval_translation\",\n", + " \"g_eval_translation_noref\"\n", + "]\n", "configurations = [\n", " # OpenAI GPt-4o-mini no explicit system prompt - uses default prompt set by Lumigator under the hood\n", " {\n", @@ -387,7 +382,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 10, "id": "614e0298-5d23-4a7d-97e9-e97b0db915c3", "metadata": {}, "outputs": [ @@ -395,14 +390,6 @@ "name": "stdout", "output_type": "stream", "text": [ - "Workflows still running ...\n", - "Workflows still running ...\n", - "Workflows still running ...\n", - "Workflows still running ...\n", - "Workflows still running ...\n", - "Workflows still running ...\n", - "Workflows still running ...\n", - "Workflows still running ...\n", "Workflows still running ...\n", "Workflows still running ...\n", "Workflows still running ...\n" @@ -432,32 +419,52 @@ " meteor_meteor_mean\n", " bleu_bleu_mean\n", " comet_mean_score\n", + " g_eval_translation_consistency_mean\n", + " g_eval_translation_fluency_mean\n", + " g_eval_translation_noref_consistency_mean\n", + " g_eval_translation_noref_fluency_mean\n", " \n", " \n", " \n", " \n", " gpt-4o-mini\n", - " 0.644\n", - " 0.370\n", " 0.647\n", + " 0.417\n", + " 0.589\n", + " 0.842\n", + " 0.843\n", + " 1.000\n", + " 0.957\n", " \n", " \n", " qwen2.5\n", - " 0.567\n", - " 0.309\n", - " 0.607\n", + " 0.582\n", + " 0.348\n", + " 0.492\n", + " 0.803\n", + " 0.640\n", + " 0.905\n", + " 0.856\n", " \n", " \n", " facebook/m2m100_418M\n", - " 0.532\n", - " 0.259\n", - " 0.455\n", + " 0.542\n", + " 0.271\n", + " 0.469\n", + " 0.690\n", + " 0.578\n", + " 0.805\n", + " 0.678\n", " \n", " \n", " Helsinki-NLP/opus-mt-en-de\n", - " 0.602\n", - " 0.223\n", - " 0.585\n", + " 0.610\n", + " 0.198\n", + " 0.532\n", + " 0.737\n", + " 0.571\n", + " 0.833\n", + " 0.646\n", " \n", " \n", "\n", @@ -465,19 +472,43 @@ ], "text/plain": [ " meteor_meteor_mean bleu_bleu_mean \\\n", - "gpt-4o-mini 0.644 0.370 \n", - "qwen2.5 0.567 0.309 \n", - "facebook/m2m100_418M 0.532 0.259 \n", - "Helsinki-NLP/opus-mt-en-de 0.602 0.223 \n", + "gpt-4o-mini 0.647 0.417 \n", + "qwen2.5 0.582 0.348 \n", + "facebook/m2m100_418M 0.542 0.271 \n", + "Helsinki-NLP/opus-mt-en-de 0.610 0.198 \n", + "\n", + " comet_mean_score \\\n", + "gpt-4o-mini 0.589 \n", + "qwen2.5 0.492 \n", + "facebook/m2m100_418M 0.469 \n", + "Helsinki-NLP/opus-mt-en-de 0.532 \n", + "\n", + " g_eval_translation_consistency_mean \\\n", + "gpt-4o-mini 0.842 \n", + "qwen2.5 0.803 \n", + "facebook/m2m100_418M 0.690 \n", + "Helsinki-NLP/opus-mt-en-de 0.737 \n", "\n", - " comet_mean_score \n", - "gpt-4o-mini 0.647 \n", - "qwen2.5 0.607 \n", - "facebook/m2m100_418M 0.455 \n", - "Helsinki-NLP/opus-mt-en-de 0.585 " + " g_eval_translation_fluency_mean \\\n", + "gpt-4o-mini 0.843 \n", + "qwen2.5 0.640 \n", + "facebook/m2m100_418M 0.578 \n", + "Helsinki-NLP/opus-mt-en-de 0.571 \n", + "\n", + " g_eval_translation_noref_consistency_mean \\\n", + "gpt-4o-mini 1.000 \n", + "qwen2.5 0.905 \n", + "facebook/m2m100_418M 0.805 \n", + "Helsinki-NLP/opus-mt-en-de 0.833 \n", + "\n", + " g_eval_translation_noref_fluency_mean \n", + "gpt-4o-mini 0.957 \n", + "qwen2.5 0.856 \n", + "facebook/m2m100_418M 0.678 \n", + "Helsinki-NLP/opus-mt-en-de 0.646 " ] }, - "execution_count": 11, + "execution_count": 10, "metadata": {}, "output_type": "execute_result" } @@ -502,12 +533,13 @@ "\n", "workflow_results = [client.workflows.get_workflow(workflow_id) for workflow_id in workflow_ids]\n", "df_metrics = pd.concat([pd.DataFrame.from_dict({wr.model: wr.metrics}).T for wr in workflow_results])\n", - "df_metrics[[\"meteor_meteor_mean\", \"bleu_bleu_mean\", \"comet_mean_score\"]]" + "df_metrics[[\"meteor_meteor_mean\", \"bleu_bleu_mean\", \"comet_mean_score\",\n", + " \"g_eval_translation_consistency_mean\", \"g_eval_translation_fluency_mean\", \"g_eval_translation_noref_consistency_mean\", \"g_eval_translation_noref_fluency_mean\"]]" ] }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 17, "id": "e5761bc8-87f1-467a-9d4a-2c7fa68b5af0", "metadata": {}, "outputs": [ @@ -577,207 +609,6 @@ }, "metadata": {}, "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: qwen2.5\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
examplesground_truthpredictions
0Siso's depictions of land, water center new gallery exhibitionSisos Darstellungen von Land und Wasser in neuer AusstellungSisos Darstellungen von Land und Wasser bilden den Mittelpunkt der neuen Galerieausstellung
1\"People Swimming in the Swimming Pool\" from 2022 is one Vicente Siso artwork that will display at Tierra del Sol Gallery beginning Jan. 13. (photo courtesy of Vicente Siso)„People Swimming in the Swimming Pool“ aus dem Jahr 2022 ist ein Kunstwerk von Vicente Siso, das ab dem 13. Januar in der Galerie „Tierra del Sol“ ausgestellt wird (Foto bereitgestellt von Vicente Siso)\"Leute, die im Schwimmbad schwimmen,\" von 2022 ist ein Werk des Künstlers Vicente Siso, das ab dem 13. Januar im Tierra del Sol Gallery ausgestellt sein wird. (Foto courtoisie von Vicente Siso)
\n", - "
" - ], - "text/plain": [ - " examples \\\n", - "0 Siso's depictions of land, water center new gallery exhibition \n", - "1 \"People Swimming in the Swimming Pool\" from 2022 is one Vicente Siso artwork that will display at Tierra del Sol Gallery beginning Jan. 13. (photo courtesy of Vicente Siso) \n", - "\n", - " ground_truth \\\n", - "0 Sisos Darstellungen von Land und Wasser in neuer Ausstellung \n", - "1 „People Swimming in the Swimming Pool“ aus dem Jahr 2022 ist ein Kunstwerk von Vicente Siso, das ab dem 13. Januar in der Galerie „Tierra del Sol“ ausgestellt wird (Foto bereitgestellt von Vicente Siso) \n", - "\n", - " predictions \n", - "0 Sisos Darstellungen von Land und Wasser bilden den Mittelpunkt der neuen Galerieausstellung \n", - "1 \"Leute, die im Schwimmbad schwimmen,\" von 2022 ist ein Werk des Künstlers Vicente Siso, das ab dem 13. Januar im Tierra del Sol Gallery ausgestellt sein wird. (Foto courtoisie von Vicente Siso) " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: facebook/m2m100_418M\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
examplesground_truthpredictions
0Siso's depictions of land, water center new gallery exhibitionSisos Darstellungen von Land und Wasser in neuer AusstellungSiso's Abbildungen von Land, Wasserzentrum neue Galerie Ausstellung
1\"People Swimming in the Swimming Pool\" from 2022 is one Vicente Siso artwork that will display at Tierra del Sol Gallery beginning Jan. 13. (photo courtesy of Vicente Siso)„People Swimming in the Swimming Pool“ aus dem Jahr 2022 ist ein Kunstwerk von Vicente Siso, das ab dem 13. Januar in der Galerie „Tierra del Sol“ ausgestellt wird (Foto bereitgestellt von Vicente Siso)\"People Swimming in the Swimming Pool\" von 2022 ist ein Kunstwerk von Vicente Siso, das am 13. Januar in der Tierra del Sol Gallery ausgestellt wird (Foto von Vicente Siso)
\n", - "
" - ], - "text/plain": [ - " examples \\\n", - "0 Siso's depictions of land, water center new gallery exhibition \n", - "1 \"People Swimming in the Swimming Pool\" from 2022 is one Vicente Siso artwork that will display at Tierra del Sol Gallery beginning Jan. 13. (photo courtesy of Vicente Siso) \n", - "\n", - " ground_truth \\\n", - "0 Sisos Darstellungen von Land und Wasser in neuer Ausstellung \n", - "1 „People Swimming in the Swimming Pool“ aus dem Jahr 2022 ist ein Kunstwerk von Vicente Siso, das ab dem 13. Januar in der Galerie „Tierra del Sol“ ausgestellt wird (Foto bereitgestellt von Vicente Siso) \n", - "\n", - " predictions \n", - "0 Siso's Abbildungen von Land, Wasserzentrum neue Galerie Ausstellung \n", - "1 \"People Swimming in the Swimming Pool\" von 2022 ist ein Kunstwerk von Vicente Siso, das am 13. Januar in der Tierra del Sol Gallery ausgestellt wird (Foto von Vicente Siso) " - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Model: Helsinki-NLP/opus-mt-en-de\n" - ] - }, - { - "data": { - "text/html": [ - "
\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
examplesground_truthpredictions
0Siso's depictions of land, water center new gallery exhibitionSisos Darstellungen von Land und Wasser in neuer AusstellungSiso's Darstellungen von Land, Wasserzentrum neue Galerie Ausstellung
1\"People Swimming in the Swimming Pool\" from 2022 is one Vicente Siso artwork that will display at Tierra del Sol Gallery beginning Jan. 13. (photo courtesy of Vicente Siso)„People Swimming in the Swimming Pool“ aus dem Jahr 2022 ist ein Kunstwerk von Vicente Siso, das ab dem 13. Januar in der Galerie „Tierra del Sol“ ausgestellt wird (Foto bereitgestellt von Vicente Siso)\"People Swimming in the Swimming Pool\" von 2022 ist ein Vicente Siso Kunstwerk, das in der Tierra del Sol Gallery ab dem 13. Januar ausgestellt wird. (Foto mit freundlicher Genehmigung von Vicente Siso)
\n", - "
" - ], - "text/plain": [ - " examples \\\n", - "0 Siso's depictions of land, water center new gallery exhibition \n", - "1 \"People Swimming in the Swimming Pool\" from 2022 is one Vicente Siso artwork that will display at Tierra del Sol Gallery beginning Jan. 13. (photo courtesy of Vicente Siso) \n", - "\n", - " ground_truth \\\n", - "0 Sisos Darstellungen von Land und Wasser in neuer Ausstellung \n", - "1 „People Swimming in the Swimming Pool“ aus dem Jahr 2022 ist ein Kunstwerk von Vicente Siso, das ab dem 13. Januar in der Galerie „Tierra del Sol“ ausgestellt wird (Foto bereitgestellt von Vicente Siso) \n", - "\n", - " predictions \n", - "0 Siso's Darstellungen von Land, Wasserzentrum neue Galerie Ausstellung \n", - "1 \"People Swimming in the Swimming Pool\" von 2022 ist ein Vicente Siso Kunstwerk, das in der Tierra del Sol Gallery ab dem 13. Januar ausgestellt wird. (Foto mit freundlicher Genehmigung von Vicente Siso) " - ] - }, - "metadata": {}, - "output_type": "display_data" } ], "source": [ @@ -811,7 +642,7 @@ }, { "cell_type": "code", - "execution_count": 14, + "execution_count": 18, "id": "3e0d0847-2a33-4dfb-bdc8-af7b1940a64c", "metadata": {}, "outputs": [ From e350cbc4709eedd4e5cdc8a0faf995cf5791ebc4 Mon Sep 17 00:00:00 2001 From: Hareesh Bahuleyan Date: Tue, 27 May 2025 18:18:31 +0200 Subject: [PATCH 2/3] Translation notebook includes LLM as judge eval --- notebooks/translation_demo_wmt24pp.ipynb | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/notebooks/translation_demo_wmt24pp.ipynb b/notebooks/translation_demo_wmt24pp.ipynb index 8383c38c4..c95f425ce 100644 --- a/notebooks/translation_demo_wmt24pp.ipynb +++ b/notebooks/translation_demo_wmt24pp.ipynb @@ -320,7 +320,7 @@ " # Configure \"llm_as_judge\" argument if a judge is a different model\n", " # See: https://github.com/mozilla-ai/lumigator/pull/1321\n", " \"g_eval_translation\",\n", - " \"g_eval_translation_noref\"\n", + " \"g_eval_translation_noref\",\n", "]\n", "configurations = [\n", " # OpenAI GPt-4o-mini no explicit system prompt - uses default prompt set by Lumigator under the hood\n", @@ -533,8 +533,17 @@ "\n", "workflow_results = [client.workflows.get_workflow(workflow_id) for workflow_id in workflow_ids]\n", "df_metrics = pd.concat([pd.DataFrame.from_dict({wr.model: wr.metrics}).T for wr in workflow_results])\n", - "df_metrics[[\"meteor_meteor_mean\", \"bleu_bleu_mean\", \"comet_mean_score\",\n", - " \"g_eval_translation_consistency_mean\", \"g_eval_translation_fluency_mean\", \"g_eval_translation_noref_consistency_mean\", \"g_eval_translation_noref_fluency_mean\"]]" + "df_metrics[\n", + " [\n", + " \"meteor_meteor_mean\",\n", + " \"bleu_bleu_mean\",\n", + " \"comet_mean_score\",\n", + " \"g_eval_translation_consistency_mean\",\n", + " \"g_eval_translation_fluency_mean\",\n", + " \"g_eval_translation_noref_consistency_mean\",\n", + " \"g_eval_translation_noref_fluency_mean\",\n", + " ]\n", + "]" ] }, { From 5ca81e5714a98679b28d17d08f3439eb3a3e6638 Mon Sep 17 00:00:00 2001 From: Hareesh Bahuleyan Date: Mon, 2 Jun 2025 16:53:52 +0200 Subject: [PATCH 3/3] Display LLM-as-judge metrics in table --- notebooks/translation_demo_wmt24pp.ipynb | 372 +++++++++++++++++------ 1 file changed, 281 insertions(+), 91 deletions(-) diff --git a/notebooks/translation_demo_wmt24pp.ipynb b/notebooks/translation_demo_wmt24pp.ipynb index c95f425ce..367e932e8 100644 --- a/notebooks/translation_demo_wmt24pp.ipynb +++ b/notebooks/translation_demo_wmt24pp.ipynb @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "id": "67dc9899-abbe-4ae7-8352-fdfd77acbf5b", "metadata": {}, "outputs": [], @@ -45,7 +45,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 4, "id": "86bbf498-70e1-4e6f-9a3d-5c201fb7fea6", "metadata": {}, "outputs": [ @@ -74,7 +74,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 5, "id": "2d48e754-41ed-4fb4-9650-af1b8538e16a", "metadata": {}, "outputs": [], @@ -87,7 +87,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 6, "id": "a26aa546-48d9-437c-a338-aaaea5e1584e", "metadata": {}, "outputs": [], @@ -100,7 +100,7 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 7, "id": "412a53c6-ed38-4d6a-a26d-2e0e6c5b8987", "metadata": {}, "outputs": [ @@ -168,7 +168,7 @@ "2 „Vicente Siso: Memories of the Land and Water“ – das ist der Titel der Ausstellung im neuen Standort der Galerie „Tierra del Sol“ in West Hollywood. Siso ist seit 2012 als Künstler im Studio Arts Progam dabei und zeigt jetzt seine erste Solo-Ausstellung. Siso wurde 1962 in Madrid geboren und ist in Venezuela, Trinidad und Miami aufgewachsen. Anfang zwanzig zog er mit seiner Familie nach Südkalifornien. " ] }, - "execution_count": 5, + "execution_count": 7, "metadata": {}, "output_type": "execute_result" } @@ -181,7 +181,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 8, "id": "6f67ab2e-4be3-4430-9ceb-46e25db12fc1", "metadata": {}, "outputs": [], @@ -203,7 +203,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "id": "e26ae289-3cca-4805-ab42-3518235c21c3", "metadata": {}, "outputs": [ @@ -211,7 +211,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Dataset uploaded and has ID: d9e23676-8477-4b8c-bb2d-615d9d7e3414\n" + "Dataset uploaded and has ID: 4a5d9fa0-5ce7-440d-8b9d-0faf65c18616\n" ] } ], @@ -238,7 +238,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 10, "id": "80bdca29-2436-48a8-bdb0-cbd19631bafe", "metadata": {}, "outputs": [ @@ -246,7 +246,7 @@ "name": "stdout", "output_type": "stream", "text": [ - "Experiment created and has ID: 12\n" + "Experiment created and has ID: 8\n" ] } ], @@ -259,10 +259,10 @@ " \"source_language\": source_language,\n", " \"target_language\": target_language,\n", "}\n", - "max_samples = 3\n", + "max_samples = 2\n", "\n", "request = ExperimentCreate(\n", - " name=f\"WMT24++ Demo Experiment {source_language} to {target_language}\",\n", + " name=f\"GenAI in Localization Event Demo {source_language} to {target_language}\",\n", " description=\"Experiment for demo\",\n", " dataset=dataset_id,\n", " task_definition=task_definition,\n", @@ -282,13 +282,21 @@ "### Create and Run Workflows\n", "- One with API-based Open AI GPT-4o-mini\n", "- One with locally hosted Ollama model (Pre-requisite: Execute `ollama run qwen2.5` in a terminal window)\n", - "- One with a multilingual model on HuggingFace\n", "- One with a bilingual Opus-MT model on HuggingFace" ] }, + { + "cell_type": "markdown", + "id": "c03ec91d-b871-41ea-9c43-25f3b6884354", + "metadata": {}, + "source": [ + "G-Eval: NLG Evaluation using GPT-4 with Better Human Alignment\n", + "https://arxiv.org/abs/2303.16634" + ] + }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 11, "id": "2286c5e1-71e5-45c5-8554-811dcdc65e95", "metadata": {}, "outputs": [ @@ -296,10 +304,9 @@ "name": "stdout", "output_type": "stream", "text": [ - "Created workflow Translation with gpt-4o-mini with ID b8227c2493d845bd8b60b75ac76fbfe6 for model gpt-4o-mini\n", - "Created workflow Translation with ollama qwen2.5 with ID 374a11707d164933bf8c5e60349c509a for model qwen2.5\n", - "Created workflow Translation with HF m2m100 with ID 98a3a72fa8ad4418bf0d80c601ea0cfa for model facebook/m2m100_418M\n", - "Created workflow Translation with Helsinki-NLP/opus-mt with ID e6caaa3a0eb04c7dad68807b3bc7b0e6 for model Helsinki-NLP/opus-mt-en-de\n" + "Created workflow Translation with gpt-4o-mini with ID 43bf2310542842baa33cde0e6b7c948c for model gpt-4o-mini\n", + "Created workflow Translation with ollama qwen2.5 with ID d1c170dfc33d415c8e77c0bd7a6dcd5d for model qwen2.5\n", + "Created workflow Translation with Helsinki-NLP/opus-mt with ID ffd9de4984ea4968aa55fb46f8d9f4b6 for model Helsinki-NLP/opus-mt-en-de\n" ] } ], @@ -311,7 +318,9 @@ "Please provide a high-quality translation of the following text from {source_language} to {target_language}.\n", "Only generate the translated text. No additional text or explanation needed.\n", "\"\"\"\n", - "batch_size = 1\n", + "\n", + "batch_size = 2\n", + "\n", "metrics = [\n", " \"bleu\",\n", " \"meteor\",\n", @@ -322,6 +331,7 @@ " \"g_eval_translation\",\n", " \"g_eval_translation_noref\",\n", "]\n", + "\n", "configurations = [\n", " # OpenAI GPt-4o-mini no explicit system prompt - uses default prompt set by Lumigator under the hood\n", " {\n", @@ -339,12 +349,6 @@ " \"system_prompt\": custom_system_prompt,\n", " \"secret_key_name\": \"openai_api_key\",\n", " },\n", - " # HuggingFace multi-lingual model\n", - " {\n", - " \"name\": \"Translation with HF m2m100\",\n", - " \"model\": \"facebook/m2m100_418M\",\n", - " \"provider\": \"hf\",\n", - " },\n", " # HuggingFace Opus-MT bi-lingual model\n", " {\n", " \"name\": \"Translation with Helsinki-NLP/opus-mt\",\n", @@ -382,7 +386,7 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 12, "id": "614e0298-5d23-4a7d-97e9-e97b0db915c3", "metadata": {}, "outputs": [ @@ -428,43 +432,33 @@ " \n", " \n", " gpt-4o-mini\n", - " 0.647\n", - " 0.417\n", - " 0.589\n", - " 0.842\n", - " 0.843\n", - " 1.000\n", - " 0.957\n", + " 0.672\n", + " 0.448\n", + " 0.599\n", + " 0.783\n", + " 0.832\n", + " 0.998\n", + " 0.944\n", " \n", " \n", " qwen2.5\n", - " 0.582\n", - " 0.348\n", - " 0.492\n", - " 0.803\n", - " 0.640\n", - " 0.905\n", - " 0.856\n", - " \n", - " \n", - " facebook/m2m100_418M\n", - " 0.542\n", - " 0.271\n", - " 0.469\n", - " 0.690\n", - " 0.578\n", - " 0.805\n", - " 0.678\n", + " 0.598\n", + " 0.362\n", + " 0.466\n", + " 0.774\n", + " 0.659\n", + " 0.983\n", + " 0.872\n", " \n", " \n", " Helsinki-NLP/opus-mt-en-de\n", - " 0.610\n", - " 0.198\n", - " 0.532\n", - " 0.737\n", - " 0.571\n", - " 0.833\n", - " 0.646\n", + " 0.625\n", + " 0.146\n", + " 0.516\n", + " 0.652\n", + " 0.558\n", + " 0.818\n", + " 0.680\n", " \n", " \n", "\n", @@ -472,43 +466,37 @@ ], "text/plain": [ " meteor_meteor_mean bleu_bleu_mean \\\n", - "gpt-4o-mini 0.647 0.417 \n", - "qwen2.5 0.582 0.348 \n", - "facebook/m2m100_418M 0.542 0.271 \n", - "Helsinki-NLP/opus-mt-en-de 0.610 0.198 \n", + "gpt-4o-mini 0.672 0.448 \n", + "qwen2.5 0.598 0.362 \n", + "Helsinki-NLP/opus-mt-en-de 0.625 0.146 \n", "\n", " comet_mean_score \\\n", - "gpt-4o-mini 0.589 \n", - "qwen2.5 0.492 \n", - "facebook/m2m100_418M 0.469 \n", - "Helsinki-NLP/opus-mt-en-de 0.532 \n", + "gpt-4o-mini 0.599 \n", + "qwen2.5 0.466 \n", + "Helsinki-NLP/opus-mt-en-de 0.516 \n", "\n", " g_eval_translation_consistency_mean \\\n", - "gpt-4o-mini 0.842 \n", - "qwen2.5 0.803 \n", - "facebook/m2m100_418M 0.690 \n", - "Helsinki-NLP/opus-mt-en-de 0.737 \n", + "gpt-4o-mini 0.783 \n", + "qwen2.5 0.774 \n", + "Helsinki-NLP/opus-mt-en-de 0.652 \n", "\n", " g_eval_translation_fluency_mean \\\n", - "gpt-4o-mini 0.843 \n", - "qwen2.5 0.640 \n", - "facebook/m2m100_418M 0.578 \n", - "Helsinki-NLP/opus-mt-en-de 0.571 \n", + "gpt-4o-mini 0.832 \n", + "qwen2.5 0.659 \n", + "Helsinki-NLP/opus-mt-en-de 0.558 \n", "\n", " g_eval_translation_noref_consistency_mean \\\n", - "gpt-4o-mini 1.000 \n", - "qwen2.5 0.905 \n", - "facebook/m2m100_418M 0.805 \n", - "Helsinki-NLP/opus-mt-en-de 0.833 \n", + "gpt-4o-mini 0.998 \n", + "qwen2.5 0.983 \n", + "Helsinki-NLP/opus-mt-en-de 0.818 \n", "\n", " g_eval_translation_noref_fluency_mean \n", - "gpt-4o-mini 0.957 \n", - "qwen2.5 0.856 \n", - "facebook/m2m100_418M 0.678 \n", - "Helsinki-NLP/opus-mt-en-de 0.646 " + "gpt-4o-mini 0.944 \n", + "qwen2.5 0.872 \n", + "Helsinki-NLP/opus-mt-en-de 0.680 " ] }, - "execution_count": 10, + "execution_count": 12, "metadata": {}, "output_type": "execute_result" } @@ -548,7 +536,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 14, "id": "e5761bc8-87f1-467a-9d4a-2c7fa68b5af0", "metadata": {}, "outputs": [ @@ -583,6 +571,8 @@ " examples\n", " ground_truth\n", " predictions\n", + " g_eval_consistency\n", + " g_eval_fluency\n", " \n", " \n", " \n", @@ -591,12 +581,16 @@ " Siso's depictions of land, water center new gallery exhibition\n", " Sisos Darstellungen von Land und Wasser in neuer Ausstellung\n", " Sisos Darstellungen von Land und Wasser stehen im Mittelpunkt der neuen Galerieausstellung.\n", + " {'score': 0.783501935092053, 'reason': 'The Actual Output closely matches the Expected Output but includes 'stehen im Mittelpunkt', adding emphasis on the centrality of the depictions in the exhibition not mentioned in the Expected Output.'}\n", + " {'score': 0.8507981545435841, 'reason': 'The translation is mostly accurate and follows the structure closely, with no grammatical or spelling errors. However, the phrase 'stehen im Mittelpunkt der neuen Galerieausstellung' adds 'stehen im Mittelpunkt der' which is not present in the expected output, slightly affecting fluency.'}\n", " \n", " \n", " 1\n", " \"People Swimming in the Swimming Pool\" from 2022 is one Vicente Siso artwork that will display at Tierra del Sol Gallery beginning Jan. 13. (photo courtesy of Vicente Siso)\n", " „People Swimming in the Swimming Pool“ aus dem Jahr 2022 ist ein Kunstwerk von Vicente Siso, das ab dem 13. Januar in der Galerie „Tierra del Sol“ ausgestellt wird (Foto bereitgestellt von Vicente Siso)\n", " \"Menschen, die im Schwimmbad schwimmen\" aus dem Jahr 2022 ist ein Kunstwerk von Vicente Siso, das ab dem 13. Januar in der Tierra del Sol Gallery ausgestellt wird. (Foto mit freundlicher Genehmigung von Vicente Siso)\n", + " {'score': 0.809247416736514, 'reason': 'The translation is mostly accurate but deviates by translating the title into German, which doesn't remain consistent with the Expected Output. The gallery name is translated incorrectly and the photo credit phrasing is also slightly off.'}\n", + " {'score': 0.7965320893502961, 'reason': 'The translation is fluent with minor deviations in word choice; 'Menschen, die im Schwimmbad schwimmen' deviates from 'People Swimming in the Swimming Pool' by translating rather than keeping the title as is. Additionally, 'Tierra del Sol Gallery' was not translated to 'Galerie', and 'Foto mit freundlicher Genehmigung' is slightly different from 'Foto bereitgestellt von'. Overall, the translation is mostly clear and easy to understand.'}\n", " \n", " \n", "\n", @@ -611,9 +605,179 @@ "0 Sisos Darstellungen von Land und Wasser in neuer Ausstellung \n", "1 „People Swimming in the Swimming Pool“ aus dem Jahr 2022 ist ein Kunstwerk von Vicente Siso, das ab dem 13. Januar in der Galerie „Tierra del Sol“ ausgestellt wird (Foto bereitgestellt von Vicente Siso) \n", "\n", - " predictions \n", - "0 Sisos Darstellungen von Land und Wasser stehen im Mittelpunkt der neuen Galerieausstellung. \n", - "1 \"Menschen, die im Schwimmbad schwimmen\" aus dem Jahr 2022 ist ein Kunstwerk von Vicente Siso, das ab dem 13. Januar in der Tierra del Sol Gallery ausgestellt wird. (Foto mit freundlicher Genehmigung von Vicente Siso) " + " predictions \\\n", + "0 Sisos Darstellungen von Land und Wasser stehen im Mittelpunkt der neuen Galerieausstellung. \n", + "1 \"Menschen, die im Schwimmbad schwimmen\" aus dem Jahr 2022 ist ein Kunstwerk von Vicente Siso, das ab dem 13. Januar in der Tierra del Sol Gallery ausgestellt wird. (Foto mit freundlicher Genehmigung von Vicente Siso) \n", + "\n", + " g_eval_consistency \\\n", + "0 {'score': 0.783501935092053, 'reason': 'The Actual Output closely matches the Expected Output but includes 'stehen im Mittelpunkt', adding emphasis on the centrality of the depictions in the exhibition not mentioned in the Expected Output.'} \n", + "1 {'score': 0.809247416736514, 'reason': 'The translation is mostly accurate but deviates by translating the title into German, which doesn't remain consistent with the Expected Output. The gallery name is translated incorrectly and the photo credit phrasing is also slightly off.'} \n", + "\n", + " g_eval_fluency \n", + "0 {'score': 0.8507981545435841, 'reason': 'The translation is mostly accurate and follows the structure closely, with no grammatical or spelling errors. However, the phrase 'stehen im Mittelpunkt der neuen Galerieausstellung' adds 'stehen im Mittelpunkt der' which is not present in the expected output, slightly affecting fluency.'} \n", + "1 {'score': 0.7965320893502961, 'reason': 'The translation is fluent with minor deviations in word choice; 'Menschen, die im Schwimmbad schwimmen' deviates from 'People Swimming in the Swimming Pool' by translating rather than keeping the title as is. Additionally, 'Tierra del Sol Gallery' was not translated to 'Galerie', and 'Foto mit freundlicher Genehmigung' is slightly different from 'Foto bereitgestellt von'. Overall, the translation is mostly clear and easy to understand.'} " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: qwen2.5\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
examplesground_truthpredictionsg_eval_consistencyg_eval_fluency
0Siso's depictions of land, water center new gallery exhibitionSisos Darstellungen von Land und Wasser in neuer AusstellungSisos Darstellungen von Land und Wasser bilden den Mittelpunkt der neuen Galerieausstellung{'score': 0.7584223499454361, 'reason': 'The actual output correctly identifies 'Sisos Darstellungen von Land und Wasser' as the main subject, similar to the expected output. However, it adds 'bilden den Mittelpunkt der neuen Galerieausstellung', which is not explicitly stated in the expected output, thus introducing additional information.'}{'score': 0.77087828225066, 'reason': 'The actual output uses 'bilden den Mittelpunkt der neuen Galerieausstellung' instead of 'in neuer Ausstellung,' which adds more detail than needed but remains grammatically correct and clear. The translation is natural and easy to understand, with minor differences that do not significantly affect the fluency.'}
1\"People Swimming in the Swimming Pool\" from 2022 is one Vicente Siso artwork that will display at Tierra del Sol Gallery beginning Jan. 13. (photo courtesy of Vicente Siso)„People Swimming in the Swimming Pool“ aus dem Jahr 2022 ist ein Kunstwerk von Vicente Siso, das ab dem 13. Januar in der Galerie „Tierra del Sol“ ausgestellt wird (Foto bereitgestellt von Vicente Siso)\"Leute, die im Schwimmbad schwimmen,\" von 2022 ist ein Werk des Künstlers Vicente Siso, das ab dem 13. Januar im Tierra del Sol Gallery ausgestellt sein wird. (Foto courtoisie von Vicente Siso){'score': 0.7911382512918791, 'reason': 'The translation closely follows the expected output with minor deviations: 1) The phrase 'People Swimming in the Swimming Pool' was correctly retained in English, 2) 'photo courtoisie von Vicente Siso' introduces a slight inconsistency with 'Foto bereitgestellt von Vicente Siso,' and 3) 'Tierra del Sol Gallery' instead of 'Galerie 'Tierra del Sol'' shows a slight variation in noun phrase structure. Otherwise, factual details are accurately translated.'}{'score': 0.5433600169040791, 'reason': 'The translation is generally understandable but has a few issues: 'Leute, die im Schwimmbad schwimmen' does not maintain the original English title's non-translated style; 'im Tierra del Sol Gallery' should align with 'in der Galerie „Tierra del Sol“'; and 'courtoisie' is a non-standard term in this context compared to 'bereitgestellt'.'}
\n", + "
" + ], + "text/plain": [ + " examples \\\n", + "0 Siso's depictions of land, water center new gallery exhibition \n", + "1 \"People Swimming in the Swimming Pool\" from 2022 is one Vicente Siso artwork that will display at Tierra del Sol Gallery beginning Jan. 13. (photo courtesy of Vicente Siso) \n", + "\n", + " ground_truth \\\n", + "0 Sisos Darstellungen von Land und Wasser in neuer Ausstellung \n", + "1 „People Swimming in the Swimming Pool“ aus dem Jahr 2022 ist ein Kunstwerk von Vicente Siso, das ab dem 13. Januar in der Galerie „Tierra del Sol“ ausgestellt wird (Foto bereitgestellt von Vicente Siso) \n", + "\n", + " predictions \\\n", + "0 Sisos Darstellungen von Land und Wasser bilden den Mittelpunkt der neuen Galerieausstellung \n", + "1 \"Leute, die im Schwimmbad schwimmen,\" von 2022 ist ein Werk des Künstlers Vicente Siso, das ab dem 13. Januar im Tierra del Sol Gallery ausgestellt sein wird. (Foto courtoisie von Vicente Siso) \n", + "\n", + " g_eval_consistency \\\n", + "0 {'score': 0.7584223499454361, 'reason': 'The actual output correctly identifies 'Sisos Darstellungen von Land und Wasser' as the main subject, similar to the expected output. However, it adds 'bilden den Mittelpunkt der neuen Galerieausstellung', which is not explicitly stated in the expected output, thus introducing additional information.'} \n", + "1 {'score': 0.7911382512918791, 'reason': 'The translation closely follows the expected output with minor deviations: 1) The phrase 'People Swimming in the Swimming Pool' was correctly retained in English, 2) 'photo courtoisie von Vicente Siso' introduces a slight inconsistency with 'Foto bereitgestellt von Vicente Siso,' and 3) 'Tierra del Sol Gallery' instead of 'Galerie 'Tierra del Sol'' shows a slight variation in noun phrase structure. Otherwise, factual details are accurately translated.'} \n", + "\n", + " g_eval_fluency \n", + "0 {'score': 0.77087828225066, 'reason': 'The actual output uses 'bilden den Mittelpunkt der neuen Galerieausstellung' instead of 'in neuer Ausstellung,' which adds more detail than needed but remains grammatically correct and clear. The translation is natural and easy to understand, with minor differences that do not significantly affect the fluency.'} \n", + "1 {'score': 0.5433600169040791, 'reason': 'The translation is generally understandable but has a few issues: 'Leute, die im Schwimmbad schwimmen' does not maintain the original English title's non-translated style; 'im Tierra del Sol Gallery' should align with 'in der Galerie „Tierra del Sol“'; and 'courtoisie' is a non-standard term in this context compared to 'bereitgestellt'.'} " + ] + }, + "metadata": {}, + "output_type": "display_data" + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Model: Helsinki-NLP/opus-mt-en-de\n" + ] + }, + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
examplesground_truthpredictionsg_eval_consistencyg_eval_fluency
0Siso's depictions of land, water center new gallery exhibitionSisos Darstellungen von Land und Wasser in neuer AusstellungSiso's Darstellungen von Land, Wasserzentrum neue Galerie Ausstellung{'score': 0.472292078211453, 'reason': 'The Actual Output contains factual deviations such as 'Wasserzentrum' instead of 'und Wasser' and omits 'in neuer'.'}{'score': 0.404512953793764, 'reason': 'Output has grammatical issues: 'Siso's' should be 'Sisos' and 'Wasserzentrum neue Galerie Ausstellung' is incorrect structure and word choice compared to expected 'und Wasser in neuer Ausstellung'.'}
1\"People Swimming in the Swimming Pool\" from 2022 is one Vicente Siso artwork that will display at Tierra del Sol Gallery beginning Jan. 13. (photo courtesy of Vicente Siso)„People Swimming in the Swimming Pool“ aus dem Jahr 2022 ist ein Kunstwerk von Vicente Siso, das ab dem 13. Januar in der Galerie „Tierra del Sol“ ausgestellt wird (Foto bereitgestellt von Vicente Siso)\"People Swimming in the Swimming Pool\" von 2022 ist ein Vicente Siso Kunstwerk, das in der Tierra del Sol Gallery ab dem 13. Januar ausgestellt wird. (Foto mit freundlicher Genehmigung von Vicente Siso){'score': 0.8679178692681611, 'reason': 'The actual output closely matches the expected output with minor differences: the use of 'von' instead of 'aus dem Jahr' for 'from' and 'mit freundlicher Genehmigung von' instead of 'bereitgestellt von' for 'photo courtesy of'. These do not affect the factual content significantly.'}{'score': 0.726302293135876, 'reason': 'The translation is mostly clear but has some issues such as 'ein Vicente Siso Kunstwerk' instead of 'ein Kunstwerk von Vicente Siso' and lack of quotes around 'Tierra del Sol'. These impact clarity slightly but the main content remains comprehensible.'}
\n", + "
" + ], + "text/plain": [ + " examples \\\n", + "0 Siso's depictions of land, water center new gallery exhibition \n", + "1 \"People Swimming in the Swimming Pool\" from 2022 is one Vicente Siso artwork that will display at Tierra del Sol Gallery beginning Jan. 13. (photo courtesy of Vicente Siso) \n", + "\n", + " ground_truth \\\n", + "0 Sisos Darstellungen von Land und Wasser in neuer Ausstellung \n", + "1 „People Swimming in the Swimming Pool“ aus dem Jahr 2022 ist ein Kunstwerk von Vicente Siso, das ab dem 13. Januar in der Galerie „Tierra del Sol“ ausgestellt wird (Foto bereitgestellt von Vicente Siso) \n", + "\n", + " predictions \\\n", + "0 Siso's Darstellungen von Land, Wasserzentrum neue Galerie Ausstellung \n", + "1 \"People Swimming in the Swimming Pool\" von 2022 ist ein Vicente Siso Kunstwerk, das in der Tierra del Sol Gallery ab dem 13. Januar ausgestellt wird. (Foto mit freundlicher Genehmigung von Vicente Siso) \n", + "\n", + " g_eval_consistency \\\n", + "0 {'score': 0.472292078211453, 'reason': 'The Actual Output contains factual deviations such as 'Wasserzentrum' instead of 'und Wasser' and omits 'in neuer'.'} \n", + "1 {'score': 0.8679178692681611, 'reason': 'The actual output closely matches the expected output with minor differences: the use of 'von' instead of 'aus dem Jahr' for 'from' and 'mit freundlicher Genehmigung von' instead of 'bereitgestellt von' for 'photo courtesy of'. These do not affect the factual content significantly.'} \n", + "\n", + " g_eval_fluency \n", + "0 {'score': 0.404512953793764, 'reason': 'Output has grammatical issues: 'Siso's' should be 'Sisos' and 'Wasserzentrum neue Galerie Ausstellung' is incorrect structure and word choice compared to expected 'und Wasser in neuer Ausstellung'.'} \n", + "1 {'score': 0.726302293135876, 'reason': 'The translation is mostly clear but has some issues such as 'ein Vicente Siso Kunstwerk' instead of 'ein Kunstwerk von Vicente Siso' and lack of quotes around 'Tierra del Sol'. These impact clarity slightly but the main content remains comprehensible.'} " ] }, "metadata": {}, @@ -621,12 +785,22 @@ } ], "source": [ - "output_texts_per_model = {\n", - " wr.model: pd.read_json(wr.artifacts_download_url)[\"artifacts\"][\n", + "output_texts_per_model = {}\n", + "for wr in workflow_results:\n", + " # Input and predictions\n", + " base_dict = pd.read_json(wr.artifacts_download_url)[\"artifacts\"][\n", " [\"examples\", \"ground_truth\", \"predictions\"]\n", " ].to_dict()\n", - " for wr in workflow_results\n", - "}\n", + "\n", + " # LLM as a Judge Metrics\n", + " base_dict[\"g_eval_consistency\"] = pd.read_json(wr.artifacts_download_url)[\"metrics\"][\n", + " [\"g_eval_translation\"]\n", + " ].to_dict()[\"g_eval_translation\"][\"consistency\"]\n", + " base_dict[\"g_eval_fluency\"] = pd.read_json(wr.artifacts_download_url)[\"metrics\"][[\"g_eval_translation\"]].to_dict()[\n", + " \"g_eval_translation\"\n", + " ][\"fluency\"]\n", + " output_texts_per_model[wr.model] = base_dict\n", + "\n", "for model_name, df_texts in output_texts_per_model.items():\n", " print(f\"Model: {model_name}\")\n", " display(pd.DataFrame(df_texts).head(2))" @@ -640,6 +814,22 @@ "outputs": [], "source": [] }, + { + "cell_type": "code", + "execution_count": null, + "id": "ae844092-d0c3-4865-9de6-8a6ccd32afb9", + "metadata": {}, + "outputs": [], + "source": [] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "6b1a79a9-8ae3-43b6-abe1-2be30d7d30df", + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "markdown", "id": "30648699",