fix: overlength gen

jina-ai · Feb 22, 2025 · c8cd9bc · c8cd9bc
1 parent 0ed7321
commit c8cd9bc
Show file tree

Hide file tree

Showing 5 changed files with 166 additions and 64 deletions.
diff --git a/config.json b/config.json
@@ -30,7 +30,7 @@
       "default": {
         "model": "gemini-2.0-flash",
         "temperature": 0,
-        "maxTokens": 8000
+        "maxTokens": 1000
       },
       "tools": {
         "coder": { "temperature": 0.7 },

diff --git a/src/agent.ts b/src/agent.ts
@@ -342,7 +342,7 @@ export async function getResponse(question?: string,
     allowReflect = allowReflect && (gaps.length <= 1);
     const currentQuestion: string = gaps.length > 0 ? gaps.shift()! : question
     if (!evaluationMetrics[currentQuestion]) {
-      evaluationMetrics[currentQuestion] = await evaluateQuestion(currentQuestion, context.tokenTracker)
+      evaluationMetrics[currentQuestion] = await evaluateQuestion(currentQuestion, context)
     }
 
     // update all urls with buildURLMap
@@ -406,7 +406,7 @@ export async function getResponse(question?: string,
 
       const {response: evaluation} = await evaluateAnswer(currentQuestion, thisStep,
         evaluationMetrics[currentQuestion],
-        [context.tokenTracker, context.actionTracker],
+        context,
         visitedURLs
       );
 
@@ -446,7 +446,7 @@ The evaluator thinks your answer is bad because:
 ${evaluation.think}
 `);
             // store the bad context and reset the diary context
-            const {response: errorAnalysis} = await analyzeSteps(diaryContext, context.tokenTracker);
+            const {response: errorAnalysis} = await analyzeSteps(diaryContext, context);
 
             allKnowledge.push({
               question: currentQuestion,
@@ -535,7 +535,7 @@ But then you realized you have asked them before. You decided to to think out of
       }
     } else if (thisStep.action === 'search' && thisStep.searchQuery) {
       // rewrite queries
-      let {queries: keywordsQueries} = await rewriteQuery(thisStep, context.tokenTracker);
+      let {queries: keywordsQueries} = await rewriteQuery(thisStep, context);
 
       // add the original query before rewrite to the keywordsQueries
       keywordsQueries.push(thisStep.searchQuery)

diff --git a/src/tools/error-analyzer.ts b/src/tools/error-analyzer.ts
@@ -1,13 +1,12 @@
 import {z} from 'zod';
-import {TokenTracker} from "../utils/token-tracker";
-import {ErrorAnalysisResponse} from '../types';
+import {ErrorAnalysisResponse, TrackerContext} from '../types';
 import {ObjectGeneratorSafe} from "../utils/safe-generator";
 
 
 const responseSchema = z.object({
-  recap: z.string().describe('Recap of the actions taken and the steps conducted'),
-  blame: z.string().describe('Which action or the step was the root cause of the answer rejection'),
-  improvement: z.string().describe('Suggested key improvement for the next iteration, do not use bullet points, be concise and hot-take vibe.'),
+  recap: z.string().describe('Recap of the actions taken and the steps conducted in first person narrative.').max(500),
+  blame: z.string().describe('Which action or the step was the root cause of the answer rejection').max(500),
+  improvement: z.string().describe('Suggested key improvement for the next iteration, do not use bullet points, be concise and hot-take vibe.').max(500),
   questionsToAnswer: z.array(
     z.string().describe("each question must be a single line, concise and clear. not composite or compound, less than 20 words.")
   ).max(2)
@@ -111,10 +110,10 @@ ${diaryContext.join('\n')}
 const TOOL_NAME = 'errorAnalyzer';
 export async function analyzeSteps(
   diaryContext: string[],
-  tracker?: TokenTracker
+  trackers?: TrackerContext
 ): Promise<{ response: ErrorAnalysisResponse }> {
   try {
-    const generator = new ObjectGeneratorSafe(tracker);
+    const generator = new ObjectGeneratorSafe(trackers?.tokenTracker);
     const prompt = getPrompt(diaryContext);
 
     const result = await generator.generateObject({
@@ -124,6 +123,8 @@ export async function analyzeSteps(
     });
 
     console.log(TOOL_NAME, result.object);
+    trackers?.actionTracker.trackThink(result.object.blame);
+    trackers?.actionTracker.trackThink(result.object.improvement);
 
     return { response: result.object };
 

diff --git a/src/tools/evaluator.ts b/src/tools/evaluator.ts
@@ -1,10 +1,8 @@
 import {z} from 'zod';
 import {GenerateObjectResult} from 'ai';
-import {TokenTracker} from "../utils/token-tracker";
-import {AnswerAction, EvaluationCriteria, EvaluationResponse, EvaluationType} from '../types';
+import {AnswerAction, EvaluationCriteria, EvaluationResponse, EvaluationType, TrackerContext} from '../types';
 import {readUrl, removeAllLineBreaks} from "./read";
 import {ObjectGeneratorSafe} from "../utils/safe-generator";
-import {ActionTracker} from "../utils/action-tracker";
 
 
 const baseSchema = {
@@ -263,7 +261,7 @@ Answer: ${JSON.stringify(answer)}`;
 const questionEvaluationSchema = z.object({
   needsFreshness: z.boolean().describe('Whether the question requires freshness check'),
   needsPlurality: z.boolean().describe('Whether the question requires plurality check'),
-  think: z.string().describe('Explanation of why these checks are needed').max(500),
+  think: z.string().describe('A very concise explain of why you choose those checks are needed in first person, extremely short.').max(500),
   languageStyle: z.string().describe('The language being used and the overall vibe/mood of the question').max(50),
 });
 
@@ -349,10 +347,10 @@ const TOOL_NAME = 'evaluator';
 
 export async function evaluateQuestion(
   question: string,
-  tracker?: TokenTracker
+  trackers?: TrackerContext
 ): Promise<EvaluationCriteria> {
   try {
-    const generator = new ObjectGeneratorSafe(tracker);
+    const generator = new ObjectGeneratorSafe(trackers?.tokenTracker);
 
     const result = await generator.generateObject({
       model: TOOL_NAME,
@@ -368,6 +366,7 @@ export async function evaluateQuestion(
     if (result.object.needsPlurality) types.push('plurality');
 
     console.log('Question Metrics:', types);
+    trackers?.actionTracker.trackThink(result.object.think);
 
     // Always evaluate definitive first, then freshness (if needed), then plurality (if needed)
     return {types, languageStyle: result.object.languageStyle};
@@ -386,17 +385,17 @@ async function performEvaluation<T>(
     schema: z.ZodType<T>;
     prompt: string;
   },
-  trackers: [TokenTracker, ActionTracker],
+  trackers: TrackerContext,
 ): Promise<GenerateObjectResult<T>> {
-  const generator = new ObjectGeneratorSafe(trackers[0]);
+  const generator = new ObjectGeneratorSafe(trackers.tokenTracker);
 
   const result = await generator.generateObject({
     model: TOOL_NAME,
     schema: params.schema,
     prompt: params.prompt,
   }) as GenerateObjectResult<any>;
 
-  trackers[1].trackThink(result.object.think)
+  trackers.actionTracker.trackThink(result.object.think)
 
   console.log(`${evaluationType} ${TOOL_NAME}`, result.object);
 
@@ -409,7 +408,7 @@ export async function evaluateAnswer(
   question: string,
   action: AnswerAction,
   evaluationCri: EvaluationCriteria,
-  trackers: [TokenTracker, ActionTracker],
+  trackers: TrackerContext,
   visitedURLs: string[] = []
 ): Promise<{ response: EvaluationResponse }> {
   let result;
@@ -504,14 +503,14 @@ export async function evaluateAnswer(
 }
 
 // Helper function to fetch and combine source content
-async function fetchSourceContent(urls: string[], trackers: [TokenTracker, ActionTracker]): Promise<string> {
+async function fetchSourceContent(urls: string[], trackers: TrackerContext): Promise<string> {
   if (!urls.length) return '';
-  trackers[1].trackThink('Let me fetch the source content to verify the answer.');
+  trackers.actionTracker.trackThink('Let me fetch the source content to verify the answer.');
   try {
     const results = await Promise.all(
       urls.map(async (url) => {
         try {
-          const {response} = await readUrl(url, trackers[0]);
+          const {response} = await readUrl(url, trackers.tokenTracker);
           const content = response?.data?.content || '';
           return removeAllLineBreaks(content);
         } catch (error) {