-
Notifications
You must be signed in to change notification settings - Fork 2.4k
Labels
bugSomething isn't workingSomething isn't working
Description
What happened?
I need to use the responses API with web search to optimize using training data with GEPA. Yet, when I tried this, it gives this error:
WARNING dspy.adapters.json_adapter: Failed to use structured output format, falling back to JSON mode.
Traceback (most recent call last):
File "/.venv/lib/python3.11/site-packages/dspy/adapters/chat_adapter.py", line 38, in __call__
return super().__call__(lm, lm_kwargs, signature, demos, inputs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/.venv/lib/python3.11/site-packages/dspy/adapters/base.py", line 155, in __call__
outputs = lm(messages=inputs, **lm_kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/.venv/lib/python3.11/site-packages/dspy/utils/callback.py", line 326, in sync_wrapper
return fn(instance, *args, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/.venv/lib/python3.11/site-packages/dspy/clients/base_lm.py", line 86, in __call__
outputs = self._process_lm_response(response, prompt, messages, **kwargs)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/.venv/lib/python3.11/site-packages/dspy/clients/base_lm.py", line 55, in _process_lm_response
outputs = self._process_response(response)
^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
File "/.venv/lib/python3.11/site-packages/dspy/clients/base_lm.py", line 233, in _process_response
output_item_type = output_item.type
^^^^^^^^^^^^^^^^
AttributeError: 'dict' object has no attribute 'type'
And later when it falls back to JSON:
litellm.BadRequestError: OpenAIException - {
"error": {
"message": "Web Search cannot be used with JSON mode.",
"type": "invalid_request_error",
"param": "response_format",
"code": null
}
}
Steps to reproduce
# uv pip install git+https://github.com/stanfordnlp/dspy.git[dev]
import os
import dspy
from datasets import load_dataset
gpt5_high_search = dspy.LM(
"openai/gpt-5",
model_type="responses",
api_key=os.getenv("OPENAI_API_KEY"),
api_base=os.getenv("OPENAI_BASE_URL"),
temperature=1.0,
max_tokens=128000,
tools=[{"type": "web_search"}],
reasoning={"effort": "high"},
)
gpt5_high = dspy.LM(
"openai/gpt-5",
model_type="responses",
api_key=os.getenv("OPENAI_API_KEY"),
api_base=os.getenv("OPENAI_BASE_URL"),
temperature=1.0,
max_tokens=128000,
reasoning={"effort": "high"},
)
class Response(dspy.Signature):
"""
You are a biomedical expert. You must attempt to answer the question below with a correct conclusion.
"""
question = dspy.InputField(desc="The hard biological question to answer.")
answer = dspy.OutputField(desc="The answer to the hard biological question.")
class Judge(dspy.Signature):
"""Judge whether the following response to question is correct or not based on the precise and unambiguous correct_answer.
Your judgement must be in the format and criteria specified below:
correct: Answer 'yes' if generated response matches the correct_answer, or is within a small margin of error for numerical problems. Answer 'no' otherwise, i.e. if there if there is any inconsistency, ambiguity, non-equivalency, or if the generated answer is incorrect.
"""
question = dspy.InputField(desc="The hard biological question to answer.")
response = dspy.InputField(desc="The generated reseponse.")
correct_answer = dspy.InputField(desc="The correct answer, ground truth.")
issues = dspy.OutputField(desc="The specific issues with the answer.")
correct = dspy.OutputField(desc="'yes' if correct, otherwise 'no'")
class Generator(dspy.Module):
def __init__(self):
super().__init__()
self.generate = dspy.Predict(Response)
def forward(self, question):
with dspy.context(lm=gpt5_high_search):
result = self.generate(question=question)
return result
class Verifier(dspy.Module):
def __init__(self):
super().__init__()
self.verify = dspy.Predict(Judge)
def forward(self, question, response, correct_answer):
with dspy.context(lm=gpt5_high):
result = self.verify(question=question, response=response, correct_answer=correct_answer)
return result
def init_dataset():
ds = load_dataset("casperhansen/pmc-oa-markdown-qa")
train_split = [
dspy.Example(
{
"question": x["question"],
"answer": x["answer"],
}
).with_inputs("question")
for x in ds["train"]
]
test_split = [
dspy.Example(
{
"question": x["question"],
"answer": x["answer"],
}
).with_inputs("question")
for x in ds["test"]
]
train_set = train_split[:int(0.8 * len(ds["train"]))]
val_set = train_split[int(0.8 * len(ds["train"])):]
test_set = test_split
return train_set, val_set, test_set
train_set, val_set, test_set = init_dataset()
generator = Generator()
verifier = Verifier()
def metric(
example: dspy.Example,
prediction: dspy.Prediction,
trace=None,
pred_name=None,
pred_trace=None,
) -> dspy.Prediction:
try:
judgement: Judge = verifier(
question=example.question,
response=prediction.answer,
correct_answer=example.answer,
)
if judgement.correct.lower() == "yes":
score = 1.0
else:
score = 0.0
return dspy.Prediction(
score=score,
feedback=judgement.issues,
)
except Exception as ex:
print(ex)
return dspy.Prediction(
score=0.0,
feedback="Error during metric computation",
)
def single_run():
pred = generator(**train_set[0].inputs())
output = verifier(train_set[0].question, pred.answer, train_set[0].answer)
print(pred)
print("="*80)
print(output)
print(">>>>> eval on generator")
evaluator = dspy.Evaluate(
devset=test_set,
metric=metric,
num_threads=100,
display_progress=True,
max_errors=1,
provide_traceback=True,
)
evaluator(generator)
optimizer = dspy.GEPA(
auto="light",
metric=metric,
reflection_lm=gpt5_high,
# increasing batch size can lead to worse performance due to increased context size
reflection_minibatch_size=5,
use_merge=True,
max_merge_invocations=10,
num_threads=100,
)
print(">>>>> training")
optimized_generator = optimizer.compile(
generator,
trainset=train_set,
valset=val_set,
)
optimized_generator.save("optimized_generator.json")
print(">>>>> eval on optimized generator")
evaluator(optimized_generator)DSPy version
latest on main branch (e9c36ab)
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working