- 
                Notifications
    You must be signed in to change notification settings 
- Fork 2.4k
Labels
bugSomething isn't workingSomething isn't working
Description
What happened?
I need to use the responses API with web search to optimize using training data with GEPA. Yet, when I tried this, it gives this error:
WARNING dspy.adapters.json_adapter: Failed to use structured output format, falling back to JSON mode.
Traceback (most recent call last):
  File "/.venv/lib/python3.11/site-packages/dspy/adapters/chat_adapter.py", line 38, in __call__
    return super().__call__(lm, lm_kwargs, signature, demos, inputs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/.venv/lib/python3.11/site-packages/dspy/adapters/base.py", line 155, in __call__
    outputs = lm(messages=inputs, **lm_kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/.venv/lib/python3.11/site-packages/dspy/utils/callback.py", line 326, in sync_wrapper
    return fn(instance, *args, **kwargs)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/.venv/lib/python3.11/site-packages/dspy/clients/base_lm.py", line 86, in __call__
    outputs = self._process_lm_response(response, prompt, messages, **kwargs)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/.venv/lib/python3.11/site-packages/dspy/clients/base_lm.py", line 55, in _process_lm_response
    outputs = self._process_response(response)
              ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/.venv/lib/python3.11/site-packages/dspy/clients/base_lm.py", line 233, in _process_response
    output_item_type = output_item.type
                       ^^^^^^^^^^^^^^^^
AttributeError: 'dict' object has no attribute 'type'
And later when it falls back to JSON:
litellm.BadRequestError: OpenAIException - {
  "error": {
    "message": "Web Search cannot be used with JSON mode.",
    "type": "invalid_request_error",
    "param": "response_format",
    "code": null
  }
}
Steps to reproduce
# uv pip install git+https://github.com/stanfordnlp/dspy.git[dev]
import os
import dspy
from datasets import load_dataset
gpt5_high_search = dspy.LM(
    "openai/gpt-5",
    model_type="responses",
    api_key=os.getenv("OPENAI_API_KEY"),
    api_base=os.getenv("OPENAI_BASE_URL"),
    temperature=1.0,
    max_tokens=128000,
    tools=[{"type": "web_search"}],
    reasoning={"effort": "high"},
)
gpt5_high = dspy.LM(
    "openai/gpt-5",
    model_type="responses",
    api_key=os.getenv("OPENAI_API_KEY"),
    api_base=os.getenv("OPENAI_BASE_URL"),
    temperature=1.0,
    max_tokens=128000,
    reasoning={"effort": "high"},
)
class Response(dspy.Signature):
    """
    You are a biomedical expert. You must attempt to answer the question below with a correct conclusion.
    """
    question = dspy.InputField(desc="The hard biological question to answer.")
    answer = dspy.OutputField(desc="The answer to the hard biological question.")
class Judge(dspy.Signature):
    """Judge whether the following response to question is correct or not based on the precise and unambiguous correct_answer.
Your judgement must be in the format and criteria specified below:
correct: Answer 'yes' if generated response matches the correct_answer, or is within a small margin of error for numerical problems. Answer 'no' otherwise, i.e. if there if there is any inconsistency, ambiguity, non-equivalency, or if the generated answer is incorrect.
    """
    question = dspy.InputField(desc="The hard biological question to answer.")
    response = dspy.InputField(desc="The generated reseponse.")
    correct_answer = dspy.InputField(desc="The correct answer, ground truth.")
    issues = dspy.OutputField(desc="The specific issues with the answer.")
    correct = dspy.OutputField(desc="'yes' if correct, otherwise 'no'")
class Generator(dspy.Module):
    def __init__(self):
        super().__init__()
        self.generate = dspy.Predict(Response)
    
    def forward(self, question):
        with dspy.context(lm=gpt5_high_search):
            result = self.generate(question=question)
        
        return result
class Verifier(dspy.Module):
    def __init__(self):
        super().__init__()
        self.verify = dspy.Predict(Judge)
    
    def forward(self, question, response, correct_answer):
        with dspy.context(lm=gpt5_high):
            result = self.verify(question=question, response=response, correct_answer=correct_answer)
        
        return result
def init_dataset():
    ds = load_dataset("casperhansen/pmc-oa-markdown-qa")
    train_split = [
        dspy.Example(
            {
                "question": x["question"],
                "answer": x["answer"],
            }
        ).with_inputs("question")
        for x in ds["train"]
    ]
    test_split = [
        dspy.Example(
            {
                "question": x["question"],
                "answer": x["answer"],
            }
        ).with_inputs("question")
        for x in ds["test"]
    ]
    train_set = train_split[:int(0.8 * len(ds["train"]))]
    val_set = train_split[int(0.8 * len(ds["train"])):]
    test_set = test_split
    return train_set, val_set, test_set
train_set, val_set, test_set = init_dataset()
generator = Generator()
verifier = Verifier()
def metric(
    example: dspy.Example,
    prediction: dspy.Prediction,
    trace=None,
    pred_name=None,
    pred_trace=None,
) -> dspy.Prediction:
    try:
        judgement: Judge = verifier(
            question=example.question,
            response=prediction.answer,
            correct_answer=example.answer,
        )
        if judgement.correct.lower() == "yes":
            score = 1.0
        else:
            score = 0.0
        return dspy.Prediction(
            score=score,
            feedback=judgement.issues,
        )
    except Exception as ex:
        print(ex)
        return dspy.Prediction(
            score=0.0,
            feedback="Error during metric computation",
        )
def single_run():
    pred = generator(**train_set[0].inputs())
    output = verifier(train_set[0].question, pred.answer, train_set[0].answer)
    print(pred)
    print("="*80)
    print(output)
print(">>>>> eval on generator")
evaluator = dspy.Evaluate(
    devset=test_set,
    metric=metric,
    num_threads=100,
    display_progress=True,
    max_errors=1,
    provide_traceback=True,
)
evaluator(generator)
optimizer = dspy.GEPA(
    auto="light",
    metric=metric,
    reflection_lm=gpt5_high,
    # increasing batch size can lead to worse performance due to increased context size
    reflection_minibatch_size=5,
    use_merge=True,
    max_merge_invocations=10,
    num_threads=100,
)
print(">>>>> training")
optimized_generator = optimizer.compile(
    generator,
    trainset=train_set,
    valset=val_set,
)
optimized_generator.save("optimized_generator.json")
print(">>>>> eval on optimized generator")
evaluator(optimized_generator)DSPy version
latest on main branch (e9c36ab)
Metadata
Metadata
Assignees
Labels
bugSomething isn't workingSomething isn't working