Skip to content

Commit 4f1afba

Browse files
authored
[feat][evaluation]upgrade llm parse (#278)
* 优化llm评估器解析 * fix lint * add ut
1 parent d1a1c70 commit 4f1afba

File tree

2 files changed

+836
-35
lines changed

2 files changed

+836
-35
lines changed

backend/modules/evaluation/domain/service/evaluator_source_prompt_impl.go

Lines changed: 146 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,6 @@ package service
66
import (
77
"context"
88
json2 "encoding/json"
9-
"fmt"
109
"io"
1110
"regexp"
1211
"strconv"
@@ -363,59 +362,93 @@ type outputMsgFormat struct {
363362
Reason string `json:"reason"`
364363
}
365364

366-
// 优化后的正则表达式,支持 score 为 number 或 string 类型
367-
var jsonRe = regexp.MustCompile(`\{(?s:.*?"score"\s*:\s*(?:"([\d.]+)"|([\d.]+)).*?"reason"\s*:\s*"((?:[^"\\]|\\.)*)".*?)}`)
365+
// 优化后的正则表达式,支持 score 和 reason 任意顺序,score 为 number 或 string 类型
366+
var jsonRe = regexp.MustCompile(`\{(?s:[^{}]*(?:"score"\s*:\s*(?:"[\d.]+"|\d+(?:\.\d+)?)[^{}]*"reason"\s*:\s*"(?:[^"\\]|\\.)*"|"reason"\s*:\s*"(?:[^"\\]|\\.)*"[^{}]*"score"\s*:\s*(?:"[\d.]+"|\d+(?:\.\d+)?))[^{}]*)}`)
368367

369368
func parseContentOutput(ctx context.Context, evaluatorVersion *entity.PromptEvaluatorVersion, replyItem *entity.ReplyItem, output *entity.EvaluatorOutputData) error {
370369
content := gptr.Indirect(replyItem.Content)
370+
371+
// 按优先级顺序执行解析策略
372+
strategies := []func(context.Context, string, *entity.EvaluatorOutputData) (bool, error){
373+
parseDirectJSON, // 策略1:直接解析完整JSON
374+
parseRepairedJSON, // 策略2:修复后解析完整JSON
375+
parseRegexExtractedJSON, // 策略3:正则提取JSON片段并解析
376+
parseScoreWithRegex, // 策略4:正则提取score,优先尝试用正则提取reason字段作为reason,否则使用完整内容作为reason
377+
}
378+
379+
for _, strategy := range strategies {
380+
success, err := strategy(ctx, content, output)
381+
if err != nil {
382+
return err
383+
}
384+
if success {
385+
return nil
386+
}
387+
}
388+
389+
// 当所有解析策略都失败时,返回错误(Run方法的defer会处理错误并设置EvaluatorRunError)
390+
logs.CtxWarn(ctx, "[parseContentOutput] All parsing strategies failed, original content: %s", content)
391+
return errorx.NewByCode(errno.InvalidOutputFromModelCode, errorx.WithExtraMsg("All parsing strategies failed. Original content: "+content))
392+
}
393+
394+
// parseDirectJSON 策略1:直接解析完整JSON内容
395+
func parseDirectJSON(ctx context.Context, content string, output *entity.EvaluatorOutputData) (bool, error) {
371396
var outputMsg outputMsgFormat
372397
b := []byte(content)
373398

374-
// 尝试直接解析整个 content
375399
if err := sonic.Unmarshal(b, &outputMsg); err == nil {
376400
if outputMsg.Reason != "" {
377401
score, err := outputMsg.Score.Float64()
378402
if err != nil {
379-
err := fmt.Errorf("[parseContentOutput] convert score to float64 failed, score=%s", outputMsg.Score)
380-
return errorx.WrapByCode(err, errno.InvalidOutputFromModelCode)
403+
return false, errorx.WrapByCode(err, errno.InvalidOutputFromModelCode)
381404
}
382405
output.EvaluatorResult.Score = &score
383406
output.EvaluatorResult.Reasoning = outputMsg.Reason
384-
return nil
407+
return true, nil
385408
}
386409
}
410+
return false, nil
411+
}
412+
413+
// parseRepairedJSON 策略2:使用jsonrepair修复后解析完整JSON内容
414+
func parseRepairedJSON(ctx context.Context, content string, output *entity.EvaluatorOutputData) (bool, error) {
415+
var outputMsg outputMsgFormat
387416

388-
// 新增:尝试使用jsonrepair修复整个content
389417
repairedContent, repairErr := jsonrepair.JSONRepair(content)
390418
if repairErr == nil {
391419
if err := sonic.Unmarshal([]byte(repairedContent), &outputMsg); err == nil {
392420
if outputMsg.Reason != "" {
393421
score, err := outputMsg.Score.Float64()
394422
if err != nil {
395-
err := fmt.Errorf("[parseContentOutput] convert score to float64 failed, score=%s", outputMsg.Score)
396-
return errorx.WrapByCode(err, errno.InvalidOutputFromModelCode)
423+
return false, errorx.WrapByCode(err, errno.InvalidOutputFromModelCode)
397424
}
398425
output.EvaluatorResult.Score = &score
399426
output.EvaluatorResult.Reasoning = outputMsg.Reason
400-
return nil
427+
return true, nil
401428
}
402429
}
403430
}
431+
return false, nil
432+
}
433+
434+
// parseRegexExtractedJSON 策略3:使用正则表达式提取JSON片段并解析
435+
func parseRegexExtractedJSON(ctx context.Context, content string, output *entity.EvaluatorOutputData) (bool, error) {
436+
var outputMsg outputMsgFormat
437+
b := []byte(content)
404438

405-
// 保留原有逻辑:使用正则表达式查找 JSON 片段
439+
// 使用正则表达式查找JSON片段
406440
all := jsonRe.FindAll(b, -1)
407441
for _, bb := range all {
408442
// 首先尝试直接解析原始片段
409443
if err := sonic.Unmarshal(bb, &outputMsg); err == nil {
410444
if outputMsg.Reason != "" {
411445
score, err := outputMsg.Score.Float64()
412446
if err != nil {
413-
err := fmt.Errorf("[parseContentOutput] convert score to float64 failed, score=%s", outputMsg.Score)
414-
return errorx.WrapByCode(err, errno.InvalidOutputFromModelCode)
447+
return false, errorx.WrapByCode(err, errno.InvalidOutputFromModelCode)
415448
}
416449
output.EvaluatorResult.Score = &score
417450
output.EvaluatorResult.Reasoning = outputMsg.Reason
418-
return nil
451+
return true, nil
419452
}
420453
}
421454

@@ -426,20 +459,112 @@ func parseContentOutput(ctx context.Context, evaluatorVersion *entity.PromptEval
426459
if outputMsg.Reason != "" {
427460
score, err := outputMsg.Score.Float64()
428461
if err != nil {
429-
err := fmt.Errorf("[parseContentOutput] convert score to float64 failed, score=%s", outputMsg.Score)
430-
return errorx.WrapByCode(err, errno.InvalidOutputFromModelCode)
462+
return false, errorx.WrapByCode(err, errno.InvalidOutputFromModelCode)
431463
}
432464
output.EvaluatorResult.Score = &score
433465
output.EvaluatorResult.Reasoning = outputMsg.Reason
434-
return nil
466+
return true, nil
435467
}
436468
}
437469
}
438470
}
471+
return false, nil
472+
}
473+
474+
// parseScoreWithRegex 策略4:通过正则解析score字段,优先尝试用正则提取reason字段作为reason,否则使用完整内容作为reason
475+
func parseScoreWithRegex(ctx context.Context, content string, output *entity.EvaluatorOutputData) (bool, error) {
476+
scoreRegex := regexp.MustCompile(`(?i)score[^0-9]*([0-9]+(?:\.[0-9]+)?)`)
477+
scoreMatches := scoreRegex.FindStringSubmatch(content)
478+
if len(scoreMatches) > 1 {
479+
scoreStr := scoreMatches[1]
480+
score, err := strconv.ParseFloat(scoreStr, 64)
481+
if err == nil {
482+
// 尝试提取reason字段,处理未转义双引号的情况
483+
// 方法:找到 "reason": " 后面的内容,提取到下一个字段或JSON对象结束之前
484+
reasonFieldRegex := regexp.MustCompile(`(?i)"reason"\s*:\s*"`)
485+
reasonStartMatches := reasonFieldRegex.FindStringIndex(content)
486+
if reasonStartMatches != nil {
487+
// 找到了reason字段的开始位置,reasonStartPos是reason值内容开始的位置(最后一个双引号之后)
488+
reasonStartPos := reasonStartMatches[1]
489+
reasonEndPos := -1
490+
491+
// 首先检查reason值是否为空字符串(连续的两个双引号)
492+
if reasonStartPos < len(content) && content[reasonStartPos] == '"' {
493+
// reason值为空字符串,结束位置就是开始位置(不包含任何内容)
494+
reasonEndPos = reasonStartPos
495+
} else {
496+
// reason值不为空,需要找到结束位置
497+
// 查找下一个字段的开始位置(如 ", "score": 或其他字段)
498+
// 注意:需要查找reason之后的下一个字段
499+
nextFieldRegex := regexp.MustCompile(`(?i)",\s*"[^"]+"\s*:`)
500+
nextFieldMatches := nextFieldRegex.FindStringIndex(content[reasonStartPos:])
501+
if nextFieldMatches != nil {
502+
// 找到了下一个字段,且它在reason之后
503+
potentialEndPos := reasonStartPos + nextFieldMatches[0]
504+
// 从potentialEndPos向前查找最后一个双引号(reason值的结束双引号)
505+
for i := potentialEndPos - 1; i >= reasonStartPos; i-- {
506+
if content[i] == '"' {
507+
// 检查这是否是真正的结束双引号(前面不是转义符)
508+
if i == 0 || content[i-1] != '\\' {
509+
reasonEndPos = i
510+
break
511+
}
512+
// 如果是转义的双引号,继续向前查找
513+
}
514+
}
515+
} else {
516+
// 没找到下一个字段,尝试找到JSON对象的结束位置
517+
// 从reasonStartPos开始,向后查找第一个未转义的双引号
518+
for i := reasonStartPos; i < len(content); i++ {
519+
if content[i] == '"' {
520+
// 检查这是否是真正的结束双引号(前面不是转义符)
521+
if i == 0 || content[i-1] != '\\' {
522+
// 检查这个双引号后面是否是逗号、空格、}或其他字段
523+
if i+1 < len(content) {
524+
nextChar := content[i+1]
525+
if nextChar == ',' || nextChar == '}' || nextChar == ' ' || nextChar == '\n' || nextChar == '\r' {
526+
reasonEndPos = i
527+
break
528+
}
529+
} else {
530+
// 到达内容末尾
531+
reasonEndPos = i
532+
break
533+
}
534+
}
535+
}
536+
}
537+
}
538+
}
439539

440-
// 若都没有找到合法的解析结果,返回错误
441-
err := fmt.Errorf("[parseContentOutput] parse failed, content does not contain both score and reason: %s", content)
442-
return errorx.WrapByCode(err, errno.InvalidOutputFromModelCode)
540+
if reasonEndPos >= reasonStartPos {
541+
// 提取reason值(从开始位置到结束位置,如果reason为空则extractedReason为空字符串)
542+
extractedReason := content[reasonStartPos:reasonEndPos]
543+
// 即使是空字符串也接受(reason可以为空)
544+
logs.CtxWarn(ctx, "[parseScoreWithRegex] Hit regex parsing strategy with reason extraction (handling unescaped quotes), original content: %s", content)
545+
output.EvaluatorResult.Score = &score
546+
output.EvaluatorResult.Reasoning = extractedReason
547+
return true, nil
548+
}
549+
}
550+
// 如果无法通过定位字段的方式提取reason,尝试传统方式(可能在无未转义双引号时有效)
551+
reasonRegex := regexp.MustCompile(`(?i)reason[^"]*"([^"]+)"`)
552+
reasonMatches := reasonRegex.FindStringSubmatch(content)
553+
if len(reasonMatches) > 1 && len(reasonMatches[1]) > 0 {
554+
// 成功提取到reason字段(传统方式,适用于无未转义双引号的情况)
555+
logs.CtxWarn(ctx, "[parseScoreWithRegex] Hit regex parsing strategy with reason extraction, original content: %s", content)
556+
output.EvaluatorResult.Score = &score
557+
output.EvaluatorResult.Reasoning = reasonMatches[1]
558+
return true, nil
559+
}
560+
// 如果无法提取reason字段,使用完整输出作为reason
561+
logs.CtxWarn(ctx, "[parseScoreWithRegex] Hit regex parsing strategy without reason extraction, original content: %s", content)
562+
output.EvaluatorResult.Score = &score
563+
output.EvaluatorResult.Reasoning = content // 使用完整输出作为reason
564+
return true, nil
565+
}
566+
}
567+
return false, nil
443568
}
444569

445570
func parseFunctionCallOutput(ctx context.Context, evaluatorVersion *entity.PromptEvaluatorVersion, replyItem *entity.ReplyItem, output *entity.EvaluatorOutputData) error {

0 commit comments

Comments
 (0)