Skip to content

Commit f2ec778

Browse files
authored
fix: remove reproducibility arg (#1790)
Remove reproducibility arg in light on alignment feature which better solves the same problem. fixes: #1711
1 parent 9403320 commit f2ec778

File tree

7 files changed

+0
-99
lines changed

7 files changed

+0
-99
lines changed

src/ragas/evaluation.py

-16
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,6 @@
3535
MetricWithLLM,
3636
MultiTurnMetric,
3737
SingleTurnMetric,
38-
is_reproducable,
3938
)
4039
from ragas.run_config import RunConfig
4140
from ragas.utils import convert_v1_to_v2_dataset
@@ -60,7 +59,6 @@ def evaluate(
6059
llm: t.Optional[BaseRagasLLM | LangchainLLM] = None,
6160
embeddings: t.Optional[BaseRagasEmbeddings | LangchainEmbeddings] = None,
6261
callbacks: Callbacks = None,
63-
in_ci: bool = False,
6462
run_config: t.Optional[RunConfig] = None,
6563
token_usage_parser: t.Optional[TokenUsageParser] = None,
6664
raise_exceptions: bool = False,
@@ -93,10 +91,6 @@ def evaluate(
9391
Lifecycle Langchain Callbacks to run during evaluation. Check the
9492
[langchain documentation](https://python.langchain.com/docs/modules/callbacks/)
9593
for more information.
96-
in_ci: bool
97-
Whether the evaluation is running in CI or not. If set to True then some
98-
metrics will be run to increase the reproducability of the evaluations. This
99-
will increase the runtime and cost of evaluations. Default is False.
10094
run_config: RunConfig, optional
10195
Configuration for runtime settings like timeout and retries. If not provided,
10296
default values are used.
@@ -193,7 +187,6 @@ def evaluate(
193187
binary_metrics = []
194188
llm_changed: t.List[int] = []
195189
embeddings_changed: t.List[int] = []
196-
reproducable_metrics: t.List[int] = []
197190
answer_correctness_is_set = -1
198191

199192
# loop through the metrics and perform initializations
@@ -214,12 +207,6 @@ def evaluate(
214207
if isinstance(metric, AnswerCorrectness):
215208
if metric.answer_similarity is None:
216209
answer_correctness_is_set = i
217-
# set reproducibility for metrics if in CI
218-
if in_ci and is_reproducable(metric):
219-
if metric.reproducibility == 1: # type: ignore
220-
# only set a value if not already set
221-
metric.reproducibility = 3 # type: ignore
222-
reproducable_metrics.append(i)
223210

224211
# init all the models
225212
metric.init(run_config)
@@ -354,9 +341,6 @@ def evaluate(
354341
AnswerCorrectness, metrics[answer_correctness_is_set]
355342
).answer_similarity = None
356343

357-
for i in reproducable_metrics:
358-
metrics[i].reproducibility = 1 # type: ignore
359-
360344
# flush the analytics batcher
361345
from ragas._analytics import _analytics_batcher
362346

src/ragas/integrations/llama_index.py

-1
Original file line numberDiff line numberDiff line change
@@ -97,7 +97,6 @@ def evaluate(
9797
callbacks=callbacks,
9898
show_progress=show_progress,
9999
run_config=run_config or RunConfig(),
100-
in_ci=in_ci,
101100
token_usage_parser=token_usage_parser,
102101
)
103102

src/ragas/metrics/_context_precision.py

-18
Original file line numberDiff line numberDiff line change
@@ -109,27 +109,10 @@ class LLMContextPrecisionWithReference(MetricWithLLM, SingleTurnMetric):
109109
default_factory=ContextPrecisionPrompt
110110
)
111111
max_retries: int = 1
112-
_reproducibility: int = 1
113112

114113
def _get_row_attributes(self, row: t.Dict) -> t.Tuple[str, t.List[str], t.Any]:
115114
return row["user_input"], row["retrieved_contexts"], row["reference"]
116115

117-
@property
118-
def reproducibility(self):
119-
return self._reproducibility
120-
121-
@reproducibility.setter
122-
def reproducibility(self, value):
123-
if value < 1:
124-
logger.warning("reproducibility cannot be less than 1, setting to 1")
125-
value = 1
126-
elif value % 2 == 0:
127-
logger.warning(
128-
"reproducibility level cannot be set to even number, setting to odd"
129-
)
130-
value += 1
131-
self._reproducibility = value
132-
133116
def _calculate_average_precision(
134117
self, verifications: t.List[Verification]
135118
) -> float:
@@ -173,7 +156,6 @@ async def _ascore(
173156
context=context,
174157
answer=reference,
175158
),
176-
n=self.reproducibility,
177159
llm=self.llm,
178160
callbacks=callbacks,
179161
)

src/ragas/metrics/_context_recall.py

-23
Original file line numberDiff line numberDiff line change
@@ -113,28 +113,6 @@ class LLMContextRecall(MetricWithLLM, SingleTurnMetric):
113113
default_factory=ContextRecallClassificationPrompt
114114
)
115115
max_retries: int = 1
116-
_reproducibility: int = 1
117-
118-
@property
119-
def reproducibility(self):
120-
return self._reproducibility
121-
122-
@reproducibility.setter
123-
def reproducibility(self, value):
124-
if value < 1:
125-
logger.warning("reproducibility cannot be less than 1, setting to 1")
126-
value = 1
127-
elif value % 2 == 0:
128-
logger.warning(
129-
"reproducibility level cannot be set to even number, setting to odd"
130-
)
131-
value += 1
132-
self._reproducibility = value
133-
134-
def __post_init__(self) -> None:
135-
if self.reproducibility < 1:
136-
logger.warning("reproducibility cannot be less than 1, setting to 1")
137-
self.reproducibility = 1
138116

139117
def _compute_score(self, responses: t.List[ContextRecallClassification]) -> float:
140118
response = [1 if item.attributed else 0 for item in responses]
@@ -166,7 +144,6 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
166144
),
167145
llm=self.llm,
168146
callbacks=callbacks,
169-
n=self.reproducibility,
170147
)
171148
)
172149
classification_dicts = []

src/ragas/metrics/_faithfulness.py

-17
Original file line numberDiff line numberDiff line change
@@ -178,23 +178,6 @@ class Faithfulness(MetricWithLLM, SingleTurnMetric):
178178
statement_prompt: PydanticPrompt = field(default_factory=LongFormAnswerPrompt)
179179
sentence_segmenter: t.Optional[HasSegmentMethod] = None
180180
max_retries: int = 1
181-
_reproducibility: int = 1
182-
183-
@property
184-
def reproducibility(self):
185-
return self._reproducibility
186-
187-
@reproducibility.setter
188-
def reproducibility(self, value):
189-
if value < 1:
190-
logger.warning("reproducibility cannot be less than 1, setting to 1")
191-
value = 1
192-
elif value % 2 == 0:
193-
logger.warning(
194-
"reproducibility level cannot be set to even number, setting to odd"
195-
)
196-
value += 1
197-
self._reproducibility = value
198181

199182
def __post_init__(self):
200183
if self.sentence_segmenter is None:

src/ragas/metrics/_noise_sensitivity.py

-17
Original file line numberDiff line numberDiff line change
@@ -49,23 +49,6 @@ class NoiseSensitivity(MetricWithLLM, SingleTurnMetric):
4949
statement_prompt: PydanticPrompt = field(default_factory=LongFormAnswerPrompt)
5050
sentence_segmenter: t.Optional[HasSegmentMethod] = None
5151
max_retries: int = 1
52-
_reproducibility: int = 1
53-
54-
@property
55-
def reproducibility(self):
56-
return self._reproducibility
57-
58-
@reproducibility.setter
59-
def reproducibility(self, value):
60-
if value < 1:
61-
logger.warning("reproducibility cannot be less than 1, setting to 1")
62-
value = 1
63-
elif value % 2 == 0:
64-
logger.warning(
65-
"reproducibility level cannot be set to even number, setting to odd"
66-
)
67-
value += 1
68-
self._reproducibility = value
6952

7053
def __post_init__(self):
7154
if self.sentence_segmenter is None:

src/ragas/metrics/base.py

-7
Original file line numberDiff line numberDiff line change
@@ -712,11 +712,4 @@ def get_segmenter(
712712
)
713713

714714

715-
def is_reproducable(metric: Metric) -> bool:
716-
"""
717-
Check if a metric is reproducible by checking if it has a `_reproducibility` attribute.
718-
"""
719-
return hasattr(metric, "_reproducibility")
720-
721-
722715
ensembler = Ensember()

0 commit comments

Comments
 (0)