35
35
MetricWithLLM ,
36
36
MultiTurnMetric ,
37
37
SingleTurnMetric ,
38
- is_reproducable ,
39
38
)
40
39
from ragas .run_config import RunConfig
41
40
from ragas .utils import convert_v1_to_v2_dataset
@@ -60,7 +59,6 @@ def evaluate(
60
59
llm : t .Optional [BaseRagasLLM | LangchainLLM ] = None ,
61
60
embeddings : t .Optional [BaseRagasEmbeddings | LangchainEmbeddings ] = None ,
62
61
callbacks : Callbacks = None ,
63
- in_ci : bool = False ,
64
62
run_config : t .Optional [RunConfig ] = None ,
65
63
token_usage_parser : t .Optional [TokenUsageParser ] = None ,
66
64
raise_exceptions : bool = False ,
@@ -93,10 +91,6 @@ def evaluate(
93
91
Lifecycle Langchain Callbacks to run during evaluation. Check the
94
92
[langchain documentation](https://python.langchain.com/docs/modules/callbacks/)
95
93
for more information.
96
- in_ci: bool
97
- Whether the evaluation is running in CI or not. If set to True then some
98
- metrics will be run to increase the reproducability of the evaluations. This
99
- will increase the runtime and cost of evaluations. Default is False.
100
94
run_config: RunConfig, optional
101
95
Configuration for runtime settings like timeout and retries. If not provided,
102
96
default values are used.
@@ -193,7 +187,6 @@ def evaluate(
193
187
binary_metrics = []
194
188
llm_changed : t .List [int ] = []
195
189
embeddings_changed : t .List [int ] = []
196
- reproducable_metrics : t .List [int ] = []
197
190
answer_correctness_is_set = - 1
198
191
199
192
# loop through the metrics and perform initializations
@@ -214,12 +207,6 @@ def evaluate(
214
207
if isinstance (metric , AnswerCorrectness ):
215
208
if metric .answer_similarity is None :
216
209
answer_correctness_is_set = i
217
- # set reproducibility for metrics if in CI
218
- if in_ci and is_reproducable (metric ):
219
- if metric .reproducibility == 1 : # type: ignore
220
- # only set a value if not already set
221
- metric .reproducibility = 3 # type: ignore
222
- reproducable_metrics .append (i )
223
210
224
211
# init all the models
225
212
metric .init (run_config )
@@ -354,9 +341,6 @@ def evaluate(
354
341
AnswerCorrectness , metrics [answer_correctness_is_set ]
355
342
).answer_similarity = None
356
343
357
- for i in reproducable_metrics :
358
- metrics [i ].reproducibility = 1 # type: ignore
359
-
360
344
# flush the analytics batcher
361
345
from ragas ._analytics import _analytics_batcher
362
346
0 commit comments