Skip to content

Commit

Permalink
fix: overlength gen
Browse files Browse the repository at this point in the history
  • Loading branch information
hanxiao committed Feb 22, 2025
1 parent 0ed7321 commit c8cd9bc
Show file tree
Hide file tree
Showing 5 changed files with 166 additions and 64 deletions.
2 changes: 1 addition & 1 deletion config.json
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@
"default": {
"model": "gemini-2.0-flash",
"temperature": 0,
"maxTokens": 8000
"maxTokens": 1000
},
"tools": {
"coder": { "temperature": 0.7 },
Expand Down
8 changes: 4 additions & 4 deletions src/agent.ts
Original file line number Diff line number Diff line change
Expand Up @@ -342,7 +342,7 @@ export async function getResponse(question?: string,
allowReflect = allowReflect && (gaps.length <= 1);
const currentQuestion: string = gaps.length > 0 ? gaps.shift()! : question
if (!evaluationMetrics[currentQuestion]) {
evaluationMetrics[currentQuestion] = await evaluateQuestion(currentQuestion, context.tokenTracker)
evaluationMetrics[currentQuestion] = await evaluateQuestion(currentQuestion, context)
}

// update all urls with buildURLMap
Expand Down Expand Up @@ -406,7 +406,7 @@ export async function getResponse(question?: string,

const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep,
evaluationMetrics[currentQuestion],
[context.tokenTracker, context.actionTracker],
context,
visitedURLs
);

Expand Down Expand Up @@ -446,7 +446,7 @@ The evaluator thinks your answer is bad because:
${evaluation.think}
`);
// store the bad context and reset the diary context
const {response: errorAnalysis} = await analyzeSteps(diaryContext, context.tokenTracker);
const {response: errorAnalysis} = await analyzeSteps(diaryContext, context);

allKnowledge.push({
question: currentQuestion,
Expand Down Expand Up @@ -535,7 +535,7 @@ But then you realized you have asked them before. You decided to to think out of
}
} else if (thisStep.action === 'search' && thisStep.searchQuery) {
// rewrite queries
let {queries: keywordsQueries} = await rewriteQuery(thisStep, context.tokenTracker);
let {queries: keywordsQueries} = await rewriteQuery(thisStep, context);

// add the original query before rewrite to the keywordsQueries
keywordsQueries.push(thisStep.searchQuery)
Expand Down
15 changes: 8 additions & 7 deletions src/tools/error-analyzer.ts
Original file line number Diff line number Diff line change
@@ -1,13 +1,12 @@
import {z} from 'zod';
import {TokenTracker} from "../utils/token-tracker";
import {ErrorAnalysisResponse} from '../types';
import {ErrorAnalysisResponse, TrackerContext} from '../types';
import {ObjectGeneratorSafe} from "../utils/safe-generator";


const responseSchema = z.object({
recap: z.string().describe('Recap of the actions taken and the steps conducted'),
blame: z.string().describe('Which action or the step was the root cause of the answer rejection'),
improvement: z.string().describe('Suggested key improvement for the next iteration, do not use bullet points, be concise and hot-take vibe.'),
recap: z.string().describe('Recap of the actions taken and the steps conducted in first person narrative.').max(500),
blame: z.string().describe('Which action or the step was the root cause of the answer rejection').max(500),
improvement: z.string().describe('Suggested key improvement for the next iteration, do not use bullet points, be concise and hot-take vibe.').max(500),
questionsToAnswer: z.array(
z.string().describe("each question must be a single line, concise and clear. not composite or compound, less than 20 words.")
).max(2)
Expand Down Expand Up @@ -111,10 +110,10 @@ ${diaryContext.join('\n')}
const TOOL_NAME = 'errorAnalyzer';
export async function analyzeSteps(
diaryContext: string[],
tracker?: TokenTracker
trackers?: TrackerContext
): Promise<{ response: ErrorAnalysisResponse }> {
try {
const generator = new ObjectGeneratorSafe(tracker);
const generator = new ObjectGeneratorSafe(trackers?.tokenTracker);
const prompt = getPrompt(diaryContext);

const result = await generator.generateObject({
Expand All @@ -124,6 +123,8 @@ export async function analyzeSteps(
});

console.log(TOOL_NAME, result.object);
trackers?.actionTracker.trackThink(result.object.blame);
trackers?.actionTracker.trackThink(result.object.improvement);

return { response: result.object };

Expand Down
25 changes: 12 additions & 13 deletions src/tools/evaluator.ts
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import {z} from 'zod';
import {GenerateObjectResult} from 'ai';
import {TokenTracker} from "../utils/token-tracker";
import {AnswerAction, EvaluationCriteria, EvaluationResponse, EvaluationType} from '../types';
import {AnswerAction, EvaluationCriteria, EvaluationResponse, EvaluationType, TrackerContext} from '../types';
import {readUrl, removeAllLineBreaks} from "./read";
import {ObjectGeneratorSafe} from "../utils/safe-generator";
import {ActionTracker} from "../utils/action-tracker";


const baseSchema = {
Expand Down Expand Up @@ -263,7 +261,7 @@ Answer: ${JSON.stringify(answer)}`;
const questionEvaluationSchema = z.object({
needsFreshness: z.boolean().describe('Whether the question requires freshness check'),
needsPlurality: z.boolean().describe('Whether the question requires plurality check'),
think: z.string().describe('Explanation of why these checks are needed').max(500),
think: z.string().describe('A very concise explain of why you choose those checks are needed in first person, extremely short.').max(500),
languageStyle: z.string().describe('The language being used and the overall vibe/mood of the question').max(50),
});

Expand Down Expand Up @@ -349,10 +347,10 @@ const TOOL_NAME = 'evaluator';

export async function evaluateQuestion(
question: string,
tracker?: TokenTracker
trackers?: TrackerContext
): Promise<EvaluationCriteria> {
try {
const generator = new ObjectGeneratorSafe(tracker);
const generator = new ObjectGeneratorSafe(trackers?.tokenTracker);

const result = await generator.generateObject({
model: TOOL_NAME,
Expand All @@ -368,6 +366,7 @@ export async function evaluateQuestion(
if (result.object.needsPlurality) types.push('plurality');

console.log('Question Metrics:', types);
trackers?.actionTracker.trackThink(result.object.think);

// Always evaluate definitive first, then freshness (if needed), then plurality (if needed)
return {types, languageStyle: result.object.languageStyle};
Expand All @@ -386,17 +385,17 @@ async function performEvaluation<T>(
schema: z.ZodType<T>;
prompt: string;
},
trackers: [TokenTracker, ActionTracker],
trackers: TrackerContext,
): Promise<GenerateObjectResult<T>> {
const generator = new ObjectGeneratorSafe(trackers[0]);
const generator = new ObjectGeneratorSafe(trackers.tokenTracker);

const result = await generator.generateObject({
model: TOOL_NAME,
schema: params.schema,
prompt: params.prompt,
}) as GenerateObjectResult<any>;

trackers[1].trackThink(result.object.think)
trackers.actionTracker.trackThink(result.object.think)

console.log(`${evaluationType} ${TOOL_NAME}`, result.object);

Expand All @@ -409,7 +408,7 @@ export async function evaluateAnswer(
question: string,
action: AnswerAction,
evaluationCri: EvaluationCriteria,
trackers: [TokenTracker, ActionTracker],
trackers: TrackerContext,
visitedURLs: string[] = []
): Promise<{ response: EvaluationResponse }> {
let result;
Expand Down Expand Up @@ -504,14 +503,14 @@ export async function evaluateAnswer(
}

// Helper function to fetch and combine source content
async function fetchSourceContent(urls: string[], trackers: [TokenTracker, ActionTracker]): Promise<string> {
async function fetchSourceContent(urls: string[], trackers: TrackerContext): Promise<string> {
if (!urls.length) return '';
trackers[1].trackThink('Let me fetch the source content to verify the answer.');
trackers.actionTracker.trackThink('Let me fetch the source content to verify the answer.');
try {
const results = await Promise.all(
urls.map(async (url) => {
try {
const {response} = await readUrl(url, trackers[0]);
const {response} = await readUrl(url, trackers.tokenTracker);
const content = response?.data?.content || '';
return removeAllLineBreaks(content);
} catch (error) {
Expand Down
Loading

0 comments on commit c8cd9bc

Please sign in to comment.