11import type { RateLimiter } from "./constants/rate-limit.constant" ;
2+ import {
3+ RANKING_WEIGHTS ,
4+ SCORE_MULTIPLIERS ,
5+ TIME_CONSTANTS ,
6+ } from "./constants/search.constant" ;
27import type { DbClient } from "./db" ;
38import { and , cosineDistance , desc , eq , gt , ilike , or , sql } from "./db" ;
49import { comments } from "./db/schema/entities/comment.sql" ;
@@ -50,7 +55,49 @@ export namespace SemanticSearch {
5055 } ,
5156 openai ,
5257 ) ;
53- const similarity = sql < number > `1-(${ cosineDistance ( issueTable . embedding , embedding ) } )` ;
58+ const similarity = sql < number > `(1-(${ cosineDistance ( issueTable . embedding , embedding ) } ))::float` ;
59+
60+ // Exponential decay for recency score
61+ // exp(-t/τ) where:
62+ // t is time elapsed in days
63+ // τ (tau) is the characteristic decay time in days
64+ // After 30 days (RECENCY_BASE_DAYS), score will be ~0.37 (1/e)
65+ // After 60 days, score will be ~0.14 (1/e²)
66+ // Score approaches but never reaches 0
67+ const recencyScore = sql < number > `
68+ EXP(
69+ -1.0 *
70+ EXTRACT(EPOCH FROM (NOW() - ${ issueTable . issueUpdatedAt } ))::float /
71+ (86400 * ${ TIME_CONSTANTS . RECENCY_BASE_DAYS } ) -- Convert decay time to seconds
72+ )::float
73+ ` ;
74+
75+ // Logarithmic comment score normalization
76+ // ln(x + 1) gives us:
77+ // 0 comments = 0.0
78+ // 4 comments ≈ 1.6
79+ // 5 comments ≈ 1.8
80+ // 10 comments ≈ 2.4
81+ // 20 comments ≈ 3.0
82+ // 50 comments ≈ 3.9
83+ // Then normalize to 0-1 range by dividing by ln(50 + 1)
84+ const commentScore = sql < number > `
85+ LN(GREATEST(count(${ comments . id } )::float + 1, 1)) /
86+ LN(51) -- ln(50 + 1) ≈ 3.93 as normalizing factor
87+ ` ;
88+
89+ // Combined ranking score
90+ const rankingScore = sql < number > `
91+ (${ RANKING_WEIGHTS . SEMANTIC_SIMILARITY } ::float * ${ similarity } ) +
92+ (${ RANKING_WEIGHTS . RECENCY } ::float * ${ recencyScore } ) +
93+ (${ RANKING_WEIGHTS . COMMENT_COUNT } ::float * ${ commentScore } ) +
94+ (${ RANKING_WEIGHTS . ISSUE_STATE } ::float * (
95+ CASE
96+ WHEN ${ issueTable . issueState } = 'OPEN' THEN ${ SCORE_MULTIPLIERS . OPEN_ISSUE } ::float
97+ ELSE ${ SCORE_MULTIPLIERS . CLOSED_ISSUE } ::float
98+ END
99+ ))
100+ ` ;
54101
55102 const selected = db
56103 . select ( {
@@ -83,6 +130,7 @@ export namespace SemanticSearch {
83130 repoOwnerName : repos . owner ,
84131 repoLastSyncedAt : repos . lastSyncedAt ,
85132 commentCount : count ( comments . id ) . as ( "comment_count" ) ,
133+ rankingScore,
86134 } )
87135 . from ( issueTable )
88136 . leftJoin ( repos , eq ( issueTable . repoId , repos . id ) )
@@ -95,10 +143,11 @@ export namespace SemanticSearch {
95143 repos . owner ,
96144 repos . lastSyncedAt ,
97145 )
98- . orderBy ( desc ( similarity ) )
146+ . orderBy ( desc ( rankingScore ) )
99147 . where (
100148 and (
101149 eq ( repos . initStatus , "completed" ) ,
150+ // probably should switch to ranking score?
102151 gt ( similarity , SIMILARITY_THRESHOLD ) ,
103152 // general substring queries match either title or body
104153 ...substringQueries . map ( ( subQuery ) =>
0 commit comments