14
14
def evaluate (
15
15
dataset : Dataset ,
16
16
metrics : list [Metric ] | None = None ,
17
+ column_map : dict [str , str ] = {
18
+ "question" : "question" ,
19
+ "contexts" : "contexts" ,
20
+ "answer" : "answer" ,
21
+ "ground_truths" : "ground_truths" ,
22
+ },
17
23
) -> Result :
18
24
"""
19
25
Run the evaluation on the dataset with different metrics
@@ -26,6 +32,10 @@ def evaluate(
26
32
metrics : list[Metric] , optional
27
33
List of metrics to use for evaluation. If not provided then ragas will run the
28
34
evaluation on the best set of metrics to give a complete view.
35
+ column_map : dict[str, str], optional
36
+ The column names of the dataset to use for evaluation. If the column names of
37
+ the dataset are different from the default ones then you can provide the
38
+ mapping as a dictionary here.
29
39
30
40
Returns
31
41
-------
@@ -66,6 +76,9 @@ def evaluate(
66
76
67
77
metrics = [answer_relevancy , context_relevancy , faithfulness ]
68
78
79
+ # select columns from the dataset
80
+ dataset = dataset .from_dict ({k : dataset [v ] for k , v in column_map .items ()})
81
+
69
82
# validation
70
83
validate_evaluation_modes (dataset , metrics )
71
84
validate_column_dtypes (dataset )
0 commit comments