Skip to content

Commit 4c6e7bf

Browse files
authored
feat: better sorting (#11)
* kinda working * feat(search): implement exponential decay for recency and logarithmic comment score --------- Co-authored-by: zx <[email protected]>
1 parent b7def93 commit 4c6e7bf

File tree

4 files changed

+79
-6
lines changed

4 files changed

+79
-6
lines changed

packages/core/src/constants/search.constant.ts

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,3 +12,23 @@ export type SearchOperator = (typeof SEARCH_OPERATORS)[number]["operator"];
1212

1313
export const STATE_SUBMENU_VALUES = ["open", "closed", "all"] as const;
1414
export type StateSubmenuValue = (typeof STATE_SUBMENU_VALUES)[number];
15+
16+
// Ranking weights (should sum to 1)
17+
export const RANKING_WEIGHTS = {
18+
SEMANTIC_SIMILARITY: 0.5, // Start with higher weight for semantic search
19+
COMMENT_COUNT: 0.25, // Activity level
20+
RECENCY: 0.2, // Recent updates
21+
ISSUE_STATE: 0.05, // Small bonus for open issues
22+
} as const;
23+
24+
// Time-based constants
25+
export const TIME_CONSTANTS = {
26+
// Base time unit in days for recency calculation
27+
RECENCY_BASE_DAYS: 30,
28+
} as const;
29+
30+
// Score multipliers
31+
export const SCORE_MULTIPLIERS = {
32+
OPEN_ISSUE: 1.0,
33+
CLOSED_ISSUE: 0.8,
34+
} as const;

packages/core/src/db/index.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -39,6 +39,7 @@ export function createDb(config: {
3939
connectionString: string;
4040
isProd: boolean;
4141
}) {
42+
// Disable prefetch as it is not supported for "Transaction" pool mode
4243
const client = postgres(config.connectionString, { prepare: false });
4344
return {
4445
db: drizzle(client, {

packages/core/src/semsearch.ts

Lines changed: 51 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,9 @@
11
import type { RateLimiter } from "./constants/rate-limit.constant";
2+
import {
3+
RANKING_WEIGHTS,
4+
SCORE_MULTIPLIERS,
5+
TIME_CONSTANTS,
6+
} from "./constants/search.constant";
27
import type { DbClient } from "./db";
38
import { and, cosineDistance, desc, eq, gt, ilike, or, sql } from "./db";
49
import { comments } from "./db/schema/entities/comment.sql";
@@ -50,7 +55,49 @@ export namespace SemanticSearch {
5055
},
5156
openai,
5257
);
53-
const similarity = sql<number>`1-(${cosineDistance(issueTable.embedding, embedding)})`;
58+
const similarity = sql<number>`(1-(${cosineDistance(issueTable.embedding, embedding)}))::float`;
59+
60+
// Exponential decay for recency score
61+
// exp(-t/τ) where:
62+
// t is time elapsed in days
63+
// τ (tau) is the characteristic decay time in days
64+
// After 30 days (RECENCY_BASE_DAYS), score will be ~0.37 (1/e)
65+
// After 60 days, score will be ~0.14 (1/e²)
66+
// Score approaches but never reaches 0
67+
const recencyScore = sql<number>`
68+
EXP(
69+
-1.0 *
70+
EXTRACT(EPOCH FROM (NOW() - ${issueTable.issueUpdatedAt}))::float /
71+
(86400 * ${TIME_CONSTANTS.RECENCY_BASE_DAYS}) -- Convert decay time to seconds
72+
)::float
73+
`;
74+
75+
// Logarithmic comment score normalization
76+
// ln(x + 1) gives us:
77+
// 0 comments = 0.0
78+
// 4 comments ≈ 1.6
79+
// 5 comments ≈ 1.8
80+
// 10 comments ≈ 2.4
81+
// 20 comments ≈ 3.0
82+
// 50 comments ≈ 3.9
83+
// Then normalize to 0-1 range by dividing by ln(50 + 1)
84+
const commentScore = sql<number>`
85+
LN(GREATEST(count(${comments.id})::float + 1, 1)) /
86+
LN(51) -- ln(50 + 1) ≈ 3.93 as normalizing factor
87+
`;
88+
89+
// Combined ranking score
90+
const rankingScore = sql<number>`
91+
(${RANKING_WEIGHTS.SEMANTIC_SIMILARITY}::float * ${similarity}) +
92+
(${RANKING_WEIGHTS.RECENCY}::float * ${recencyScore}) +
93+
(${RANKING_WEIGHTS.COMMENT_COUNT}::float * ${commentScore}) +
94+
(${RANKING_WEIGHTS.ISSUE_STATE}::float * (
95+
CASE
96+
WHEN ${issueTable.issueState} = 'OPEN' THEN ${SCORE_MULTIPLIERS.OPEN_ISSUE}::float
97+
ELSE ${SCORE_MULTIPLIERS.CLOSED_ISSUE}::float
98+
END
99+
))
100+
`;
54101

55102
const selected = db
56103
.select({
@@ -83,6 +130,7 @@ export namespace SemanticSearch {
83130
repoOwnerName: repos.owner,
84131
repoLastSyncedAt: repos.lastSyncedAt,
85132
commentCount: count(comments.id).as("comment_count"),
133+
rankingScore,
86134
})
87135
.from(issueTable)
88136
.leftJoin(repos, eq(issueTable.repoId, repos.id))
@@ -95,10 +143,11 @@ export namespace SemanticSearch {
95143
repos.owner,
96144
repos.lastSyncedAt,
97145
)
98-
.orderBy(desc(similarity))
146+
.orderBy(desc(rankingScore))
99147
.where(
100148
and(
101149
eq(repos.initStatus, "completed"),
150+
// probably should switch to ranking score?
102151
gt(similarity, SIMILARITY_THRESHOLD),
103152
// general substring queries match either title or body
104153
...substringQueries.map((subQuery) =>

packages/web/src/components/IssueCard.tsx

Lines changed: 7 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -153,10 +153,13 @@ function IssueTitleWithLabels({ issue }: { issue: Issue }) {
153153
<span
154154
className="[word-break:break-word]"
155155
dangerouslySetInnerHTML={{
156-
__html: DOMPurify.sanitize(`${processTitle(issue.title)}`, {
157-
ALLOWED_TAGS: ["code"],
158-
ALLOWED_ATTR: [],
159-
}),
156+
__html: DOMPurify.sanitize(
157+
`(${(issue.rankingScore * 100).toFixed(1)}%) ${processTitle(issue.title)}`,
158+
{
159+
ALLOWED_TAGS: ["code"],
160+
ALLOWED_ATTR: [],
161+
},
162+
),
160163
}}
161164
/>
162165
</a>

0 commit comments

Comments
 (0)