Skip to content

Commit fe379a1

Browse files
authored
feat (experimental) added new prompt and metric into ragas.experimental (#1240)
you can use it like ```py from ragas.experimental.metrics import FaithfulnessExperimental from ragas.metrics import faithfulness from ragas import evaluate f = FaithfulnessExperimental(llm=LangchainLLMWrapper(gpt4o)) faithfulness.llm = LangchainLLMWrapper(gpt4o) # row = amnesty_qa["eval"][0] # await f.ascore(row) # await faithfulness.ascore(row) r = evaluate( amnesty_qa["eval"].select(range(10)), metrics=[f, faithfulness], raise_exceptions=True, callbacks=[] ) ```
1 parent 68d52b9 commit fe379a1

File tree

6 files changed

+48
-41
lines changed

6 files changed

+48
-41
lines changed

src/experimental/tests/test_prompt.py

+9-13
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
1-
from ragas_experimental.llms.prompt import StringPrompt, StringIO
1+
import pytest
2+
from langchain_core.outputs import Generation, LLMResult
3+
from ragas_experimental.llms.prompt import StringIO, StringPrompt
4+
25
from ragas.llms.base import BaseRagasLLM
3-
from langchain_core.outputs import LLMResult, Generation
46
from ragas.llms.prompt import PromptValue
57
from ragas.run_config import RunConfig
68

7-
import pytest
8-
99

1010
class EchoLLM(BaseRagasLLM):
1111
def generate_text( # type: ignore
@@ -37,10 +37,11 @@ async def test_string_prompt():
3737

3838

3939
def test_process_fields():
40-
from ragas_experimental.llms.prompt import PydanticPrompt, StringIO
41-
from pydantic import BaseModel
4240
from enum import Enum
4341

42+
from pydantic import BaseModel
43+
from ragas_experimental.llms.prompt import PydanticPrompt, StringIO
44+
4445
class Categories(str, Enum):
4546
science = "science"
4647
commerce = "commerce"
@@ -63,10 +64,7 @@ class JokeGenerator(PydanticPrompt[InputModel, StringIO]):
6364

6465
@pytest.mark.asyncio
6566
async def test_pydantic_prompt_io():
66-
from ragas_experimental.llms.prompt import (
67-
PydanticPrompt,
68-
StringIO,
69-
)
67+
from ragas_experimental.llms.prompt import PydanticPrompt, StringIO
7068

7169
class Prompt(PydanticPrompt[StringIO, StringIO]):
7270
instruction = ""
@@ -82,9 +80,7 @@ class Prompt(PydanticPrompt[StringIO, StringIO]):
8280

8381

8482
def test_pydantic_prompt_examples():
85-
from ragas_experimental.llms.prompt import (
86-
PydanticPrompt,
87-
)
83+
from ragas_experimental.llms.prompt import PydanticPrompt
8884

8985
class Prompt(PydanticPrompt[StringIO, StringIO]):
9086
instruction = ""

src/ragas/experimental/llms/__init__.py

Whitespace-only changes.

src/experimental/ragas_experimental/llms/prompt.py renamed to src/ragas/experimental/llms/prompt.py

+24-17
Original file line numberDiff line numberDiff line change
@@ -1,21 +1,21 @@
11
from __future__ import annotations
22

3-
from abc import ABC, abstractmethod
4-
from dataclasses import dataclass
5-
import json
63
import typing as t
4+
from abc import ABC, abstractmethod
75

8-
from ragas.llms.output_parser import RagasoutputParser
9-
from ragas.llms.prompt import PromptValue
6+
import pydantic
107

118
# Check Pydantic version
129
from pydantic import BaseModel
13-
import pydantic
10+
11+
from ragas.llms.output_parser import RagasoutputParser
12+
from ragas.llms.prompt import PromptValue
1413

1514
if t.TYPE_CHECKING:
16-
from ragas.llms.base import BaseRagasLLM
1715
from langchain_core.callbacks import Callbacks
1816

17+
from ragas.llms.base import BaseRagasLLM
18+
1919
PYDANTIC_V2 = pydantic.VERSION.startswith("2.")
2020

2121

@@ -24,7 +24,7 @@ def __init__(self, llm):
2424
self.llm: BaseRagasLLM = llm
2525

2626
@abstractmethod
27-
async def generate(self, data: t.Any) -> t.Any:
27+
async def generate(self, data: t.Any, callbacks: Callbacks = None) -> t.Any:
2828
pass
2929

3030

@@ -57,12 +57,14 @@ def to_json(model: t.Any, indent: int = 4) -> str:
5757
return model.json(indent=indent)
5858

5959

60-
def model_to_json_schema(model: t.Type[BaseModel]) -> dict:
60+
def model_to_json_schema(model: t.Type[BaseModel]) -> str:
6161
if PYDANTIC_V2:
62-
return model.model_json_schema()
62+
# NOTE: this is not the same as model.schema_json()
63+
return model.model_json_schema() # type: ignore
6364
else:
6465
return model.schema_json()
6566

67+
6668
InputModel = t.TypeVar("InputModel", bound=BaseModel)
6769
OutputModel = t.TypeVar("OutputModel", bound=BaseModel)
6870

@@ -96,9 +98,11 @@ def generate_examples(self):
9698
example_strings.append(
9799
self.instruction
98100
+ "\n"
99-
+ "input: " + to_json(input_data, indent=4)
101+
+ "input: "
102+
+ to_json(input_data, indent=4)
100103
+ "\n"
101-
+ "output: " + to_json(output_data, indent=4)
104+
+ "output: "
105+
+ to_json(output_data, indent=4)
102106
)
103107

104108
return (
@@ -118,12 +122,15 @@ def to_string(self, data: InputModel) -> str:
118122
+ "\n"
119123
+ self.generate_examples()
120124
+ "\nNow perform the above instruction with the following input\n"
121-
+ "input: " + to_json(data, indent=4)
125+
+ "input: "
126+
+ to_json(data, indent=4)
122127
+ "\n"
123128
+ "output: "
124129
)
125130

126-
async def generate(self, data: InputModel, callbacks: Callbacks) -> OutputModel:
131+
async def generate(
132+
self, data: InputModel, callbacks: Callbacks = None
133+
) -> OutputModel:
127134
prompt_value = PromptValue(prompt_str=self.to_string(data))
128135
resp = await self.llm.generate(prompt_value, callbacks=callbacks)
129136
resp_text = resp.generations[0][0].text
@@ -135,7 +142,7 @@ async def generate(self, data: InputModel, callbacks: Callbacks) -> OutputModel:
135142

136143

137144
class StringPrompt(BasePrompt):
138-
async def generate(self, data: str) -> str:
145+
async def generate(self, data: str, callbacks: Callbacks = None) -> str:
139146
prompt_value = PromptValue(prompt_str=data)
140-
llm_result = await self.llm.agenerate_text(prompt_value)
141-
return llm_result.generations[0][0].text
147+
llm_result = await self.llm.agenerate_text(prompt_value, callbacks=callbacks)
148+
return llm_result.generations[0][0].text
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,3 @@
11
from ._faithfulness import FaithfulnessExperimental
22

3-
__all__ = ["FaithfulnessExperimental"]
3+
__all__ = ["FaithfulnessExperimental"]

src/experimental/ragas_experimental/metrics/_faithfulness.py renamed to src/ragas/experimental/metrics/_faithfulness.py

+10-8
Original file line numberDiff line numberDiff line change
@@ -1,17 +1,18 @@
11
from __future__ import annotations
22

3-
import typing as t
43
import logging
4+
import typing as t
55
from dataclasses import dataclass
66

7-
from pydantic import BaseModel, Field
87
import numpy as np
8+
from pydantic import BaseModel, Field
99

10+
from ragas.experimental.llms.prompt import PydanticPrompt
1011
from ragas.metrics.base import EvaluationMode, MetricWithLLM, get_segmenter
11-
from ragas_experimental.llms.prompt import PydanticPrompt
1212

1313
if t.TYPE_CHECKING:
1414
from langchain_core.callbacks import Callbacks
15+
1516
from ragas.metrics._faithfulness import HasSegmentMethod
1617

1718

@@ -187,6 +188,8 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
187188
answer, question, contexts = row["answer"], row["question"], row["contexts"]
188189

189190
# get the sentences from the answer
191+
if self.sentence_segmenter is None:
192+
raise ValueError("Sentence segmenter is not set")
190193
sentences = self.sentence_segmenter.segment(answer)
191194
# TODO: why do we do this?
192195
sentences = [
@@ -198,9 +201,9 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
198201
answer=answer,
199202
sentences={i: sentence for i, sentence in enumerate(sentences)},
200203
),
201-
callbacks=callbacks
204+
callbacks=callbacks,
202205
)
203-
206+
204207
statements = [
205208
statement
206209
for component in sentence_components.sentences
@@ -211,9 +214,9 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
211214
context="\n".join(contexts),
212215
statements=statements,
213216
),
214-
callbacks=callbacks
217+
callbacks=callbacks,
215218
)
216-
219+
217220
# compute the score
218221
num_faithful_statements = sum(
219222
verdict.verdict for verdict in verdicts.statements
@@ -223,4 +226,3 @@ async def _ascore(self, row: t.Dict, callbacks: Callbacks) -> float:
223226
else:
224227
score = np.nan
225228
return score
226-

src/ragas/llms/base.py

+4-2
Original file line numberDiff line numberDiff line change
@@ -63,7 +63,8 @@ def generate_text(
6363
temperature: float = 1e-8,
6464
stop: t.Optional[t.List[str]] = None,
6565
callbacks: Callbacks = None,
66-
) -> LLMResult: ...
66+
) -> LLMResult:
67+
...
6768

6869
@abstractmethod
6970
async def agenerate_text(
@@ -73,7 +74,8 @@ async def agenerate_text(
7374
temperature: t.Optional[float] = None,
7475
stop: t.Optional[t.List[str]] = None,
7576
callbacks: Callbacks = None,
76-
) -> LLMResult: ...
77+
) -> LLMResult:
78+
...
7779

7880
async def generate(
7981
self,

0 commit comments

Comments
 (0)