feat: column mapping to select columns (#91)

jjmachan · web-flow · commit ec2a34bc11c4 · 2023-08-22T19:55:11.000+05:30
diff --git a/src/ragas/evaluation.py b/src/ragas/evaluation.py
@@ -14,6 +14,12 @@
 def evaluate(
     dataset: Dataset,
     metrics: list[Metric] | None = None,
+    column_map: dict[str, str] = {
+        "question": "question",
+        "contexts": "contexts",
+        "answer": "answer",
+        "ground_truths": "ground_truths",
+    },
 ) -> Result:
     """
     Run the evaluation on the dataset with different metrics
@@ -26,6 +32,10 @@ def evaluate(
     metrics : list[Metric] , optional
         List of metrics to use for evaluation. If not provided then ragas will run the
         evaluation on the best set of metrics to give a complete view.
+    column_map : dict[str, str], optional
+        The column names of the dataset to use for evaluation. If the column names of
+        the dataset are different from the default ones then you can provide the
+        mapping as a dictionary here.
 
     Returns
     -------
@@ -66,6 +76,9 @@ def evaluate(
 
         metrics = [answer_relevancy, context_relevancy, faithfulness]
 
+    # select columns from the dataset
+    dataset = dataset.from_dict({k: dataset[v] for k, v in column_map.items()})
+
     # validation
     validate_evaluation_modes(dataset, metrics)
     validate_column_dtypes(dataset)
diff --git a/src/ragas/validation.py b/src/ragas/validation.py
@@ -21,7 +21,7 @@ def validate_column_dtypes(ds: Dataset):
             ):
                 raise ValueError(
                     f'Dataset feature "{column_names}" should be of type'
-                    " Sequence[string]"
+                    f" Sequence[string], got {type(ds.features[column_names])}"
                 )
 
 

Original file line number	Diff line number	Diff line change
`@@ -21,7 +21,7 @@ def validate_column_dtypes(ds: Dataset):`
`21`	`21`	`):`
`22`	`22`	`raise ValueError(`
`23`	`23`	`f'Dataset feature "{column_names}" should be of type'`
`24`		`- " Sequence[string]"`
	`24`	`+ f" Sequence[string], got {type(ds.features[column_names])}"`
`25`	`25`	`)`
`26`	`26`
`27`	`27`