@@ -6,7 +6,6 @@ package service
66import (
77 "context"
88 json2 "encoding/json"
9- "fmt"
109 "io"
1110 "regexp"
1211 "strconv"
@@ -363,59 +362,93 @@ type outputMsgFormat struct {
363362 Reason string `json:"reason"`
364363}
365364
366- // 优化后的正则表达式,支持 score 为 number 或 string 类型
367- var jsonRe = regexp .MustCompile (`\{(?s:.*? "score"\s*:\s*(?:"( [\d.]+)"|([\d.]+)).*? "reason"\s*:\s*"(( ?:[^"\\]|\\.)*)".*? )}` )
365+ // 优化后的正则表达式,支持 score 和 reason 任意顺序,score 为 number 或 string 类型
366+ var jsonRe = regexp .MustCompile (`\{(?s:[^{}]*(?: "score"\s*:\s*(?:"[\d.]+"|\d+(?:\.\d+)?)[^{}]* "reason"\s*:\s*"(?:[^"\\]|\\.)*"|"reason"\s*:\s*"(?:[^"\\]|\\.)*"[^{}]*"score"\s*:\s*(?:"[\d.]+"|\d+(?:\.\d+)?))[^{}]* )}` )
368367
369368func parseContentOutput (ctx context.Context , evaluatorVersion * entity.PromptEvaluatorVersion , replyItem * entity.ReplyItem , output * entity.EvaluatorOutputData ) error {
370369 content := gptr .Indirect (replyItem .Content )
370+
371+ // 按优先级顺序执行解析策略
372+ strategies := []func (context.Context , string , * entity.EvaluatorOutputData ) (bool , error ){
373+ parseDirectJSON , // 策略1:直接解析完整JSON
374+ parseRepairedJSON , // 策略2:修复后解析完整JSON
375+ parseRegexExtractedJSON , // 策略3:正则提取JSON片段并解析
376+ parseScoreWithRegex , // 策略4:正则提取score,优先尝试用正则提取reason字段作为reason,否则使用完整内容作为reason
377+ }
378+
379+ for _ , strategy := range strategies {
380+ success , err := strategy (ctx , content , output )
381+ if err != nil {
382+ return err
383+ }
384+ if success {
385+ return nil
386+ }
387+ }
388+
389+ // 当所有解析策略都失败时,返回错误(Run方法的defer会处理错误并设置EvaluatorRunError)
390+ logs .CtxWarn (ctx , "[parseContentOutput] All parsing strategies failed, original content: %s" , content )
391+ return errorx .NewByCode (errno .InvalidOutputFromModelCode , errorx .WithExtraMsg ("All parsing strategies failed. Original content: " + content ))
392+ }
393+
394+ // parseDirectJSON 策略1:直接解析完整JSON内容
395+ func parseDirectJSON (ctx context.Context , content string , output * entity.EvaluatorOutputData ) (bool , error ) {
371396 var outputMsg outputMsgFormat
372397 b := []byte (content )
373398
374- // 尝试直接解析整个 content
375399 if err := sonic .Unmarshal (b , & outputMsg ); err == nil {
376400 if outputMsg .Reason != "" {
377401 score , err := outputMsg .Score .Float64 ()
378402 if err != nil {
379- err := fmt .Errorf ("[parseContentOutput] convert score to float64 failed, score=%s" , outputMsg .Score )
380- return errorx .WrapByCode (err , errno .InvalidOutputFromModelCode )
403+ return false , errorx .WrapByCode (err , errno .InvalidOutputFromModelCode )
381404 }
382405 output .EvaluatorResult .Score = & score
383406 output .EvaluatorResult .Reasoning = outputMsg .Reason
384- return nil
407+ return true , nil
385408 }
386409 }
410+ return false , nil
411+ }
412+
413+ // parseRepairedJSON 策略2:使用jsonrepair修复后解析完整JSON内容
414+ func parseRepairedJSON (ctx context.Context , content string , output * entity.EvaluatorOutputData ) (bool , error ) {
415+ var outputMsg outputMsgFormat
387416
388- // 新增:尝试使用jsonrepair修复整个content
389417 repairedContent , repairErr := jsonrepair .JSONRepair (content )
390418 if repairErr == nil {
391419 if err := sonic .Unmarshal ([]byte (repairedContent ), & outputMsg ); err == nil {
392420 if outputMsg .Reason != "" {
393421 score , err := outputMsg .Score .Float64 ()
394422 if err != nil {
395- err := fmt .Errorf ("[parseContentOutput] convert score to float64 failed, score=%s" , outputMsg .Score )
396- return errorx .WrapByCode (err , errno .InvalidOutputFromModelCode )
423+ return false , errorx .WrapByCode (err , errno .InvalidOutputFromModelCode )
397424 }
398425 output .EvaluatorResult .Score = & score
399426 output .EvaluatorResult .Reasoning = outputMsg .Reason
400- return nil
427+ return true , nil
401428 }
402429 }
403430 }
431+ return false , nil
432+ }
433+
434+ // parseRegexExtractedJSON 策略3:使用正则表达式提取JSON片段并解析
435+ func parseRegexExtractedJSON (ctx context.Context , content string , output * entity.EvaluatorOutputData ) (bool , error ) {
436+ var outputMsg outputMsgFormat
437+ b := []byte (content )
404438
405- // 保留原有逻辑:使用正则表达式查找 JSON 片段
439+ // 使用正则表达式查找JSON片段
406440 all := jsonRe .FindAll (b , - 1 )
407441 for _ , bb := range all {
408442 // 首先尝试直接解析原始片段
409443 if err := sonic .Unmarshal (bb , & outputMsg ); err == nil {
410444 if outputMsg .Reason != "" {
411445 score , err := outputMsg .Score .Float64 ()
412446 if err != nil {
413- err := fmt .Errorf ("[parseContentOutput] convert score to float64 failed, score=%s" , outputMsg .Score )
414- return errorx .WrapByCode (err , errno .InvalidOutputFromModelCode )
447+ return false , errorx .WrapByCode (err , errno .InvalidOutputFromModelCode )
415448 }
416449 output .EvaluatorResult .Score = & score
417450 output .EvaluatorResult .Reasoning = outputMsg .Reason
418- return nil
451+ return true , nil
419452 }
420453 }
421454
@@ -426,20 +459,112 @@ func parseContentOutput(ctx context.Context, evaluatorVersion *entity.PromptEval
426459 if outputMsg .Reason != "" {
427460 score , err := outputMsg .Score .Float64 ()
428461 if err != nil {
429- err := fmt .Errorf ("[parseContentOutput] convert score to float64 failed, score=%s" , outputMsg .Score )
430- return errorx .WrapByCode (err , errno .InvalidOutputFromModelCode )
462+ return false , errorx .WrapByCode (err , errno .InvalidOutputFromModelCode )
431463 }
432464 output .EvaluatorResult .Score = & score
433465 output .EvaluatorResult .Reasoning = outputMsg .Reason
434- return nil
466+ return true , nil
435467 }
436468 }
437469 }
438470 }
471+ return false , nil
472+ }
473+
474+ // parseScoreWithRegex 策略4:通过正则解析score字段,优先尝试用正则提取reason字段作为reason,否则使用完整内容作为reason
475+ func parseScoreWithRegex (ctx context.Context , content string , output * entity.EvaluatorOutputData ) (bool , error ) {
476+ scoreRegex := regexp .MustCompile (`(?i)score[^0-9]*([0-9]+(?:\.[0-9]+)?)` )
477+ scoreMatches := scoreRegex .FindStringSubmatch (content )
478+ if len (scoreMatches ) > 1 {
479+ scoreStr := scoreMatches [1 ]
480+ score , err := strconv .ParseFloat (scoreStr , 64 )
481+ if err == nil {
482+ // 尝试提取reason字段,处理未转义双引号的情况
483+ // 方法:找到 "reason": " 后面的内容,提取到下一个字段或JSON对象结束之前
484+ reasonFieldRegex := regexp .MustCompile (`(?i)"reason"\s*:\s*"` )
485+ reasonStartMatches := reasonFieldRegex .FindStringIndex (content )
486+ if reasonStartMatches != nil {
487+ // 找到了reason字段的开始位置,reasonStartPos是reason值内容开始的位置(最后一个双引号之后)
488+ reasonStartPos := reasonStartMatches [1 ]
489+ reasonEndPos := - 1
490+
491+ // 首先检查reason值是否为空字符串(连续的两个双引号)
492+ if reasonStartPos < len (content ) && content [reasonStartPos ] == '"' {
493+ // reason值为空字符串,结束位置就是开始位置(不包含任何内容)
494+ reasonEndPos = reasonStartPos
495+ } else {
496+ // reason值不为空,需要找到结束位置
497+ // 查找下一个字段的开始位置(如 ", "score": 或其他字段)
498+ // 注意:需要查找reason之后的下一个字段
499+ nextFieldRegex := regexp .MustCompile (`(?i)",\s*"[^"]+"\s*:` )
500+ nextFieldMatches := nextFieldRegex .FindStringIndex (content [reasonStartPos :])
501+ if nextFieldMatches != nil {
502+ // 找到了下一个字段,且它在reason之后
503+ potentialEndPos := reasonStartPos + nextFieldMatches [0 ]
504+ // 从potentialEndPos向前查找最后一个双引号(reason值的结束双引号)
505+ for i := potentialEndPos - 1 ; i >= reasonStartPos ; i -- {
506+ if content [i ] == '"' {
507+ // 检查这是否是真正的结束双引号(前面不是转义符)
508+ if i == 0 || content [i - 1 ] != '\\' {
509+ reasonEndPos = i
510+ break
511+ }
512+ // 如果是转义的双引号,继续向前查找
513+ }
514+ }
515+ } else {
516+ // 没找到下一个字段,尝试找到JSON对象的结束位置
517+ // 从reasonStartPos开始,向后查找第一个未转义的双引号
518+ for i := reasonStartPos ; i < len (content ); i ++ {
519+ if content [i ] == '"' {
520+ // 检查这是否是真正的结束双引号(前面不是转义符)
521+ if i == 0 || content [i - 1 ] != '\\' {
522+ // 检查这个双引号后面是否是逗号、空格、}或其他字段
523+ if i + 1 < len (content ) {
524+ nextChar := content [i + 1 ]
525+ if nextChar == ',' || nextChar == '}' || nextChar == ' ' || nextChar == '\n' || nextChar == '\r' {
526+ reasonEndPos = i
527+ break
528+ }
529+ } else {
530+ // 到达内容末尾
531+ reasonEndPos = i
532+ break
533+ }
534+ }
535+ }
536+ }
537+ }
538+ }
439539
440- // 若都没有找到合法的解析结果,返回错误
441- err := fmt .Errorf ("[parseContentOutput] parse failed, content does not contain both score and reason: %s" , content )
442- return errorx .WrapByCode (err , errno .InvalidOutputFromModelCode )
540+ if reasonEndPos >= reasonStartPos {
541+ // 提取reason值(从开始位置到结束位置,如果reason为空则extractedReason为空字符串)
542+ extractedReason := content [reasonStartPos :reasonEndPos ]
543+ // 即使是空字符串也接受(reason可以为空)
544+ logs .CtxWarn (ctx , "[parseScoreWithRegex] Hit regex parsing strategy with reason extraction (handling unescaped quotes), original content: %s" , content )
545+ output .EvaluatorResult .Score = & score
546+ output .EvaluatorResult .Reasoning = extractedReason
547+ return true , nil
548+ }
549+ }
550+ // 如果无法通过定位字段的方式提取reason,尝试传统方式(可能在无未转义双引号时有效)
551+ reasonRegex := regexp .MustCompile (`(?i)reason[^"]*"([^"]+)"` )
552+ reasonMatches := reasonRegex .FindStringSubmatch (content )
553+ if len (reasonMatches ) > 1 && len (reasonMatches [1 ]) > 0 {
554+ // 成功提取到reason字段(传统方式,适用于无未转义双引号的情况)
555+ logs .CtxWarn (ctx , "[parseScoreWithRegex] Hit regex parsing strategy with reason extraction, original content: %s" , content )
556+ output .EvaluatorResult .Score = & score
557+ output .EvaluatorResult .Reasoning = reasonMatches [1 ]
558+ return true , nil
559+ }
560+ // 如果无法提取reason字段,使用完整输出作为reason
561+ logs .CtxWarn (ctx , "[parseScoreWithRegex] Hit regex parsing strategy without reason extraction, original content: %s" , content )
562+ output .EvaluatorResult .Score = & score
563+ output .EvaluatorResult .Reasoning = content // 使用完整输出作为reason
564+ return true , nil
565+ }
566+ }
567+ return false , nil
443568}
444569
445570func parseFunctionCallOutput (ctx context.Context , evaluatorVersion * entity.PromptEvaluatorVersion , replyItem * entity.ReplyItem , output * entity.EvaluatorOutputData ) error {
0 commit comments