Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
24 commits
Select commit Hold shift + click to select a range
afa49ca
Remove genai support after service was sunset
yoavkatz Jan 13, 2025
fa2ed9a
Merge branch 'main' into remove_genai_support
yoavkatz Jan 13, 2025
b4be9c1
Updated catalog
yoavkatz Jan 13, 2025
10d4789
Moved to use CrossProviderInferenceEngine instead of WML or BAM
yoavkatz Jan 14, 2025
a2adf08
Merge remote-tracking branch 'origin/main' into remove_genai_support
yoavkatz Jan 14, 2025
114c519
Removed GA
yoavkatz Jan 14, 2025
51329cc
Moved llamguard from genai to cross provider inference engine
yoavkatz Jan 14, 2025
12642c7
Made cataog consistent
yoavkatz Jan 15, 2025
7244bdd
changed table2text rating metric to follow other's conventions
yoavkatz Jan 15, 2025
a9f6436
Merge remote-tracking branch 'origin/main' into remove_genai_support
yoavkatz Jan 15, 2025
c8ef496
Merge remote-tracking branch 'origin/main' into remove_genai_support
yoavkatz Jan 26, 2025
7f03e8c
Merge remote-tracking branch 'origin/main' into remove_genai_support
yoavkatz Jun 10, 2025
46101ec
Updated json.
yoavkatz Jun 10, 2025
a89afb0
Merge remote-tracking branch 'origin/main' into remove_genai_support
yoavkatz Jun 29, 2025
a3d498d
Reverted branch changes which were addressed in main
yoavkatz Jun 29, 2025
d8f1f9b
Removed unused json from catalog
yoavkatz Jun 29, 2025
0721527
Shorten revision commit id to avoid detect secret error in json files
yoavkatz Jun 29, 2025
b20e972
Updated secrets,
yoavkatz Jun 30, 2025
d06716f
Merge remote-tracking branch 'origin/main' into remove_genai_support
yoavkatz Jul 20, 2025
2a2e40a
Revered unnneeded changes
yoavkatz Jul 20, 2025
6dd2389
Updated catalog
yoavkatz Jul 20, 2025
d8bb9d9
Updated secret
yoavkatz Jul 21, 2025
f0c50d1
Merge branch 'main' into remove_genai_support
elronbandel Jul 22, 2025
a6e6bb3
Merge branch 'main' into remove_genai_support
elronbandel Aug 6, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions examples/evaluate_llm_as_judge_from_template.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@

# List of metrics to evaluate
metrics_to_check = [
"metrics.llm_as_judge.rating.llama_3_8b_instruct_ibm_genai_template_mt_bench_single_turn",
"metrics.llm_as_judge.rating.llama_3_70b_instruct_ibm_genai_template_generic_single_turn",
"metrics.llm_as_judge.rating.llama_3_8b_instruct.mt_bench_single_turn",
"metrics.llm_as_judge.rating.llama_3_70b_instruct.generic_single_turn",
]

for metric_to_check in metrics_to_check:
Expand Down
2 changes: 1 addition & 1 deletion prepare/cards/tablebench.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
card = TaskCard(
loader=LoadHF(
path="Multilingual-Multimodal-NLP/TableBench",
revision="90593ad8af90f027f6f478b8c4c1981d9f073a83", # pragma: allowlist secret
revision="90593ad", # pragma: allowlist secret
data_classification_policy=["public"],
splits=["test"],
),
Expand Down
2 changes: 1 addition & 1 deletion prepare/cards/tablebench_data_analysis.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
card = TaskCard(
loader=LoadHF(
path="Multilingual-Multimodal-NLP/TableBench",
revision="90593ad8af90f027f6f478b8c4c1981d9f073a83", # pragma: allowlist secret
revision="90593ad", # pragma: allowlist secret
data_classification_policy=["public"],
splits=["test"],
),
Expand Down
2 changes: 1 addition & 1 deletion prepare/cards/tablebench_fact_checking.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
card = TaskCard(
loader=LoadHF(
path="Multilingual-Multimodal-NLP/TableBench",
revision="90593ad8af90f027f6f478b8c4c1981d9f073a83", # pragma: allowlist secret
revision="90593ad", # pragma: allowlist secret
data_classification_policy=["public"],
splits=["test"],
),
Expand Down
2 changes: 1 addition & 1 deletion prepare/cards/tablebench_numerical_reasoning.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
card = TaskCard(
loader=LoadHF(
path="Multilingual-Multimodal-NLP/TableBench",
revision="90593ad8af90f027f6f478b8c4c1981d9f073a83", # pragma: allowlist secret
revision="90593ad", # pragma: allowlist secret
data_classification_policy=["public"],
splits=["test"],
),
Expand Down
Empty file.
11 changes: 0 additions & 11 deletions prepare/engines/ibm_genai/llama3.py

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
from unitxt.inference import (
CrossProviderInferenceEngine,
GenericInferenceEngine,
IbmGenAiInferenceEngine,
WMLInferenceEngine,
)
from unitxt.llm_as_judge import LLMAsJudge
Expand All @@ -16,7 +15,6 @@

inference_engines = [
("ibm_wml", WMLInferenceEngine),
("ibm_genai", IbmGenAiInferenceEngine),
("generic_engine", GenericInferenceEngine),
]

Expand Down
37 changes: 37 additions & 0 deletions prepare/metrics/llm_as_judge/rating/llama_3_generic_template.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
from unitxt import add_to_catalog
from unitxt.inference import CrossProviderInferenceEngine
from unitxt.llm_as_judge_from_template import LLMAsJudge

inference_model = CrossProviderInferenceEngine(
model="llama-3-70b-instruct", max_tokens=252
)

metric = LLMAsJudge(
inference_model=inference_model,
template="templates.response_assessment.rating.generic_single_turn",
task="rating.single_turn",
format="formats.chat_api",
main_score="llama_3_70b_instruct_template_generic_single_turn",
prediction_type=str,
)

add_to_catalog(
metric,
"metrics.llm_as_judge.rating.llama_3_70b_instruct.generic_single_turn",
overwrite=True,
)

metric = LLMAsJudge(
inference_model=inference_model,
template="templates.response_assessment.rating.generic_single_turn_with_reference",
task="rating.single_turn_with_reference",
format="formats.chat_api",
single_reference_per_prediction=True,
main_score="llama_3_70b_instruct_template_generic_single_turn_with_reference",
)

add_to_catalog(
metric,
"metrics.llm_as_judge.rating.llama_3_70b_instruct.generic_single_turn_with_reference",
overwrite=True,
)

This file was deleted.

Original file line number Diff line number Diff line change
@@ -1,32 +1,32 @@
from unitxt import add_to_catalog
from unitxt.inference import IbmGenAiInferenceEngine
from unitxt.inference import CrossProviderInferenceEngine
from unitxt.llm_as_judge_from_template import LLMAsJudge
from unitxt.random_utils import get_seed

model_list = ["meta-llama/llama-3-8b-instruct", "meta-llama/llama-3-70b-instruct"]
format = "formats.llama3_instruct"
model_list = ["llama-3-70b-instruct", "llama-3-8b-instruct"]
format = "formats.chat_api"
template = "templates.response_assessment.rating.mt_bench_single_turn"
task = "rating.single_turn"


for model_id in model_list:
inference_model = IbmGenAiInferenceEngine(
model_name=model_id, max_new_tokens=252, random_seed=get_seed()
inference_model = CrossProviderInferenceEngine(
model=model_id, max_tokens=252, seed=get_seed()
)
model_label = model_id.split("/")[1].replace("-", "_").replace(".", ",").lower()
model_label = f"{model_label}_ibm_genai"
model_label = model_id.replace("-", "_").replace(".", ",").lower()
template_label = template.split(".")[-1]
metric_label = f"{model_label}_template_{template_label}"
metric = LLMAsJudge(
inference_model=inference_model,
template=template,
task=task,
format=format,
format="formats.chat_api",
main_score=metric_label,
prediction_type=str,
)

add_to_catalog(
metric,
f"metrics.llm_as_judge.rating.{model_label}_template_{template_label}",
f"metrics.llm_as_judge.rating.{model_label}.{template_label}",
overwrite=True,
)
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@
model=model_id, max_tokens=252, seed=get_seed()
)
model_label = model_id.replace("-", "_").replace(".", ",").lower()
model_label = f"{model_label}"
template_label = template.split(".")[-1]
metric_label = f"{model_label}_template_{template_label}"
metric = LLMAsJudge(
Expand Down
2 changes: 1 addition & 1 deletion src/unitxt/catalog/cards/tablebench.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"loader": {
"__type__": "load_hf",
"path": "Multilingual-Multimodal-NLP/TableBench",
"revision": "90593ad8af90f027f6f478b8c4c1981d9f073a83",
"revision": "90593ad",
"data_classification_policy": [
"public"
],
Expand Down
2 changes: 1 addition & 1 deletion src/unitxt/catalog/cards/tablebench_data_analysis.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"loader": {
"__type__": "load_hf",
"path": "Multilingual-Multimodal-NLP/TableBench",
"revision": "90593ad8af90f027f6f478b8c4c1981d9f073a83",
"revision": "90593ad",
"data_classification_policy": [
"public"
],
Expand Down
2 changes: 1 addition & 1 deletion src/unitxt/catalog/cards/tablebench_fact_checking.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"loader": {
"__type__": "load_hf",
"path": "Multilingual-Multimodal-NLP/TableBench",
"revision": "90593ad8af90f027f6f478b8c4c1981d9f073a83",
"revision": "90593ad",
"data_classification_policy": [
"public"
],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
"loader": {
"__type__": "load_hf",
"path": "Multilingual-Multimodal-NLP/TableBench",
"revision": "90593ad8af90f027f6f478b8c4c1981d9f073a83",
"revision": "90593ad",
"data_classification_policy": [
"public"
],
Expand Down

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

This file was deleted.

Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
{
"__type__": "llm_as_judge",
"inference_model": {
"__type__": "cross_provider_inference_engine",
"model": "llama-3-70b-instruct",
"max_tokens": 252,
"seed": 42
},
"template": "templates.response_assessment.rating.mt_bench_single_turn",
"task": "rating.single_turn",
"format": "formats.chat_api",
"main_score": "llama_3_70b_instruct_template_mt_bench_single_turn",
"prediction_type": "str"
}

This file was deleted.

This file was deleted.

Loading
Loading