From f6543172f15f47a45fc7d1159650d47559fb4a80 Mon Sep 17 00:00:00 2001 From: rainxchzed Date: Tue, 5 May 2026 15:42:39 +0500 Subject: [PATCH] Add nested license object and search sort=updated/releases with browse mode --- CLAUDE.md | 2 +- .../githubstore/db/MeilisearchClient.kt | 6 +- .../rainxch/githubstore/db/RepoRepository.kt | 8 +++ .../githubstore/db/SearchRepository.kt | 29 +++++++-- .../githubstore/ingest/GitHubSearchClient.kt | 1 + .../rainxch/githubstore/model/RepoResponse.kt | 15 +++++ .../rainxch/githubstore/routes/RepoRoutes.kt | 1 + .../githubstore/routes/SearchRoutes.kt | 63 ++++++++++++++----- 8 files changed, 104 insertions(+), 21 deletions(-) diff --git a/CLAUDE.md b/CLAUDE.md index c6dd7e7..2c69a5d 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -46,7 +46,7 @@ All under `/v1/`: | Endpoint | Purpose | |----------|---------| | `GET /health` | Health check (Postgres + Meilisearch status) | -| `GET /search?q=&platform=&sort=&limit=&offset=` | Meilisearch-powered search. Auto-triggers GitHub passthrough if <5 results. Reads optional `X-GitHub-Token` header to run passthrough on the user's 5000/hr quota instead of the backend's fallback quota. Response carries `passthroughAttempted: Boolean` so clients can distinguish "index was warm but returned nothing" from "GitHub also has nothing". | +| `GET /search?q=&platform=&sort=&limit=&offset=` | Meilisearch-powered search. Auto-triggers GitHub passthrough if <5 results. `sort` ∈ {`relevance` (default), `stars`, `recent` / `releases` (alias, by latest stable release date), `updated` (by repo `updated_at_gh`)}. `relevance` requires `q`; the others allow empty `q` for browse-mode listings. `sort=updated` is routed directly to Postgres FTS until the fetcher repo's `meili_sync.py` adds `updated_at_gh` to Meili's sortable-attributes. Reads optional `X-GitHub-Token` header to run passthrough on the user's 5000/hr quota instead of the backend's fallback quota. Response carries `passthroughAttempted: Boolean` so clients can distinguish "index was warm but returned nothing" from "GitHub also has nothing". | | `GET /search/explore?q=&platform=&page=` | User-triggered deep GitHub search, paginated, ingests into index. Also reads `X-GitHub-Token`. Cold-path latency is 10–30s — clients must use a 30s timeout. | | `GET /categories/{trending\|new-releases\|most-popular}/{android\|windows\|macos\|linux}` | Pre-ranked repo lists. Sort order is `search_score DESC NULLS LAST, rank ASC` — static `rank` is only the tie-breaker once behavioral signals exist. | | `GET /topics/{privacy\|media\|productivity\|networking\|dev-tools}/{platform}` | Topic-bucketed repos. Same dynamic ordering as categories. | diff --git a/src/main/kotlin/zed/rainxch/githubstore/db/MeilisearchClient.kt b/src/main/kotlin/zed/rainxch/githubstore/db/MeilisearchClient.kt index 1101474..4a6fe39 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/db/MeilisearchClient.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/db/MeilisearchClient.kt @@ -39,9 +39,13 @@ class MeilisearchClient( } } + // `recent` is the legacy name; `releases` is the new public name -- + // both alias the same release-date sort. `updated` is the new + // GitHub-parity sort by repo-level updated_at_gh. val sortList = when (sort) { "stars" -> listOf("stars:desc") - "recent" -> listOf("latest_release_date:desc") + "recent", "releases" -> listOf("latest_release_date:desc") + "updated" -> listOf("updated_at_gh:desc") else -> emptyList() // relevance = Meilisearch default ranking } diff --git a/src/main/kotlin/zed/rainxch/githubstore/db/RepoRepository.kt b/src/main/kotlin/zed/rainxch/githubstore/db/RepoRepository.kt index 808217b..c816249 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/db/RepoRepository.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/db/RepoRepository.kt @@ -89,6 +89,7 @@ class RepoRepository { openIssuesCount = this[Repos.openIssues], licenseSpdxId = this[Repos.licenseSpdxId], licenseName = this[Repos.licenseName], + license = nestedLicense(this[Repos.licenseSpdxId], this[Repos.licenseName]), language = this[Repos.language], topics = this[Repos.topics], releasesUrl = "${this[Repos.htmlUrl]}/releases", @@ -109,3 +110,10 @@ class RepoRepository { } } + +// Builds the nested RepoLicense from the flat columns. Returns null when +// both inputs are null so the JSON field is `"license": null` rather than +// `"license": {"spdxId": null, "name": null}` for licenseless repos. +internal fun nestedLicense(spdxId: String?, name: String?): zed.rainxch.githubstore.model.RepoLicense? = + if (spdxId == null && name == null) null + else zed.rainxch.githubstore.model.RepoLicense(spdxId = spdxId, name = name) diff --git a/src/main/kotlin/zed/rainxch/githubstore/db/SearchRepository.kt b/src/main/kotlin/zed/rainxch/githubstore/db/SearchRepository.kt index ff1e118..56e5802 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/db/SearchRepository.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/db/SearchRepository.kt @@ -29,27 +29,42 @@ class SearchRepository { // search_score tie-breaker on every path so behavioral signal still // disambiguates within the primary sort's equivalence class. + // `recent` and `releases` are aliases for "by release date" -- the + // newer name aligns with the GET /v1/search?sort=releases option + // exposed to clients (matches user intent: stable releases first). + // `updated` mirrors GitHub's repo-level `updated_at` (any push, + // not necessarily a release). val orderClause = when (sort) { "stars" -> "ORDER BY stars DESC, search_score DESC NULLS LAST" - "recent" -> "ORDER BY latest_release_date DESC NULLS LAST, search_score DESC NULLS LAST" + "recent", "releases" -> "ORDER BY latest_release_date DESC NULLS LAST, search_score DESC NULLS LAST" + "updated" -> "ORDER BY updated_at_gh DESC NULLS LAST, search_score DESC NULLS LAST" else -> "ORDER BY ts_rank(tsv_search, plainto_tsquery('english', ?)) DESC, search_score DESC NULLS LAST" } + // Browse mode: empty query + non-relevance sort skips the ts_match + // filter entirely. Clients use this for "no search box, just sort + // the catalog" UX (Recently-Updated / Recent-Releases home tabs). + val browseMode = query.isBlank() && sort != "relevance" val sql = buildString { append( """ SELECT id, full_name, owner, name, owner_avatar_url, description, default_branch, - html_url, stars, forks, language, topics, + html_url, stars, forks, open_issues, license_spdx_id, license_name, + language, topics, latest_release_date, latest_release_tag, download_count, has_installers_android, has_installers_windows, has_installers_macos, has_installers_linux, trending_score, popularity_score, search_score, updated_at_gh, created_at_gh FROM repos - WHERE tsv_search @@ plainto_tsquery('english', ?) """.trimIndent() ) - if (platformColumn != null) append(" AND $platformColumn = true") + // WHERE clause skipped in browse mode -- caller wants the whole + // catalog sorted by `sort`, not a text-matched subset. + if (!browseMode) append(" WHERE tsv_search @@ plainto_tsquery('english', ?)") + if (platformColumn != null) { + append(if (browseMode) " WHERE " else " AND ").append("$platformColumn = true") + } append(" ").append(orderClause).append(" LIMIT ? OFFSET ?") } @@ -58,7 +73,7 @@ class SearchRepository { conn.prepareStatement(sql).use { stmt -> var i = 1 - stmt.setString(i++, query) + if (!browseMode) stmt.setString(i++, query) if (sort == "relevance") stmt.setString(i++, query) // ts_rank in ORDER BY stmt.setInt(i++, limit) stmt.setInt(i, offset) @@ -91,6 +106,10 @@ class SearchRepository { htmlUrl = rs.getString("html_url"), stargazersCount = rs.getInt("stars"), forksCount = rs.getInt("forks"), + openIssuesCount = rs.getInt("open_issues"), + licenseSpdxId = rs.getString("license_spdx_id"), + licenseName = rs.getString("license_name"), + license = nestedLicense(rs.getString("license_spdx_id"), rs.getString("license_name")), language = rs.getString("language"), topics = topics, releasesUrl = "${rs.getString("html_url")}/releases", diff --git a/src/main/kotlin/zed/rainxch/githubstore/ingest/GitHubSearchClient.kt b/src/main/kotlin/zed/rainxch/githubstore/ingest/GitHubSearchClient.kt index b390892..ac4d9e1 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/ingest/GitHubSearchClient.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/ingest/GitHubSearchClient.kt @@ -610,6 +610,7 @@ class GitHubSearchClient( openIssuesCount = repo.openIssuesCount, licenseSpdxId = repo.license?.spdxId, licenseName = repo.license?.name, + license = repo.license?.let { zed.rainxch.githubstore.model.RepoLicense(spdxId = it.spdxId, name = it.name) }, language = repo.language, topics = repo.topics, releasesUrl = "${repo.htmlUrl}/releases", diff --git a/src/main/kotlin/zed/rainxch/githubstore/model/RepoResponse.kt b/src/main/kotlin/zed/rainxch/githubstore/model/RepoResponse.kt index af6e288..6bf583d 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/model/RepoResponse.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/model/RepoResponse.kt @@ -8,6 +8,17 @@ data class RepoOwner( val avatarUrl: String?, ) +// Nested form of the GitHub-detected license. Same data as the flat +// `licenseSpdxId` / `licenseName` fields below; this shape matches the +// upstream GitHub object so a client doing direct-GitHub fallback can use +// one DTO. Prefer this nested form on new client code; the flat fields +// are kept for back-compat with shipped client builds. +@Serializable +data class RepoLicense( + val spdxId: String? = null, + val name: String? = null, +) + @Serializable data class RepoResponse( val id: Long, @@ -29,6 +40,10 @@ data class RepoResponse( // version ("MIT License"). val licenseSpdxId: String? = null, val licenseName: String? = null, + // Nested form of the same data, matching upstream GitHub's shape. + // Clients should prefer this; the flat fields above will be removed + // after the next client release migrates. + val license: RepoLicense? = null, val language: String?, val topics: List, val releasesUrl: String?, diff --git a/src/main/kotlin/zed/rainxch/githubstore/routes/RepoRoutes.kt b/src/main/kotlin/zed/rainxch/githubstore/routes/RepoRoutes.kt index d55d66b..11149c3 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/routes/RepoRoutes.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/routes/RepoRoutes.kt @@ -108,6 +108,7 @@ internal fun GitHubRepo.toMetadataOnlyResponse(): RepoResponse = RepoResponse( openIssuesCount = openIssuesCount, licenseSpdxId = license?.spdxId, licenseName = license?.name, + license = license?.let { zed.rainxch.githubstore.model.RepoLicense(spdxId = it.spdxId, name = it.name) }, language = language, topics = topics, releasesUrl = "$htmlUrl/releases", diff --git a/src/main/kotlin/zed/rainxch/githubstore/routes/SearchRoutes.kt b/src/main/kotlin/zed/rainxch/githubstore/routes/SearchRoutes.kt index cbba5cf..77c2ed5 100644 --- a/src/main/kotlin/zed/rainxch/githubstore/routes/SearchRoutes.kt +++ b/src/main/kotlin/zed/rainxch/githubstore/routes/SearchRoutes.kt @@ -14,7 +14,9 @@ import zed.rainxch.githubstore.model.RepoResponse import zed.rainxch.githubstore.model.SearchResponse private val VALID_PLATFORMS = setOf("android", "windows", "macos", "linux") -private val VALID_SORTS = setOf("relevance", "stars", "recent") +// `recent` kept for back-compat; `releases` is the public-facing alias. +// `updated` mirrors GitHub's repo-level updated_at sort. +private val VALID_SORTS = setOf("relevance", "stars", "recent", "releases", "updated") private const val ON_DEMAND_THRESHOLD = 5 fun Route.searchRoutes( @@ -25,26 +27,30 @@ fun Route.searchRoutes( metrics: SearchMetricsRegistry, ) { get("/search") { - val query = call.request.queryParameters["q"] - if (query.isNullOrBlank()) { + // Empty `q` is allowed when `sort` is anything other than relevance -- + // browse mode for "Recently Updated" / "Recent Releases" home tabs. + // sort=relevance still requires a query because text-rank needs one. + val rawQuery = call.request.queryParameters["q"] + val sort = call.request.queryParameters["sort"] ?: "relevance" + if (sort !in VALID_SORTS) { return@get call.respond( - HttpStatusCode.BadRequest, mapOf("error" to "Missing query parameter 'q'") + HttpStatusCode.BadRequest, + mapOf("error" to "Invalid sort. Must be one of: $VALID_SORTS") ) } - - val platform = call.request.queryParameters["platform"] - if (platform != null && platform !in VALID_PLATFORMS) { + if ((rawQuery.isNullOrBlank()) && sort == "relevance") { return@get call.respond( HttpStatusCode.BadRequest, - mapOf("error" to "Invalid platform. Must be one of: $VALID_PLATFORMS") + mapOf("error" to "Missing query parameter 'q' (required when sort=relevance)") ) } + val query = rawQuery.orEmpty() - val sort = call.request.queryParameters["sort"] ?: "relevance" - if (sort !in VALID_SORTS) { + val platform = call.request.queryParameters["platform"] + if (platform != null && platform !in VALID_PLATFORMS) { return@get call.respond( HttpStatusCode.BadRequest, - mapOf("error" to "Invalid sort. Must be one of: $VALID_SORTS") + mapOf("error" to "Invalid platform. Must be one of: $VALID_PLATFORMS") ) } @@ -53,6 +59,31 @@ fun Route.searchRoutes( val userToken = call.request.headers["X-GitHub-Token"]?.takeIf { it.isNotBlank() } + // sort=updated needs `updated_at_gh` in Meili's sortable-attributes + // config -- not yet pushed by the fetcher repo's meili_sync.py. + // Route it directly to Postgres FTS where the column already exists. + // Once the fetcher learns the field, this branch can drop and Meili + // serves the sort with full search semantics. + if (sort == "updated") { + val startTime = System.currentTimeMillis() + val items = searchRepository.search( + query = query, + platform = platform, + sort = sort, + limit = limit, + offset = offset, + ) + val elapsed = (System.currentTimeMillis() - startTime).toInt() + metrics.recordPostgresFallback(items.size, elapsed) + call.response.header(HttpHeaders.CacheControl, "public, max-age=15, s-maxage=30") + return@get call.respond(SearchResponse( + items = items, + totalHits = items.size, + processingTimeMs = elapsed, + source = "postgres", + )) + } + // Try Meilisearch first, fall back to Postgres FTS try { val result = meilisearch.search( @@ -68,8 +99,10 @@ fun Route.searchRoutes( var source = "meilisearch" var passthroughAttempted = false - // On-demand: if few results, also search GitHub and ingest - if (items.size < ON_DEMAND_THRESHOLD && offset == 0) { + // On-demand passthrough only makes sense for actual text queries. + // Browse mode (empty q with a non-relevance sort) is a catalog + // listing -- no GitHub call is appropriate. + if (query.isNotBlank() && items.size < ON_DEMAND_THRESHOLD && offset == 0) { passthroughAttempted = true val githubResults = githubSearch.searchAndIngest(query, platform, limit = 10, userToken = userToken) if (githubResults.isNotEmpty()) { @@ -88,7 +121,8 @@ fun Route.searchRoutes( // Log near-misses too — queries with 1-4 results are tractable training // candidates; the worker prioritizes zero-result rows via result_count. - if (items.size < ON_DEMAND_THRESHOLD) { + // Browse mode has no query to log. + if (query.isNotBlank() && items.size < ON_DEMAND_THRESHOLD) { searchMissRepository.logMiss(query, resultCount = items.size) } @@ -179,6 +213,7 @@ private fun zed.rainxch.githubstore.db.MeiliRepoHit.toRepoResponse() = RepoRespo openIssuesCount = open_issues, licenseSpdxId = license_spdx_id, licenseName = license_name, + license = zed.rainxch.githubstore.db.nestedLicense(license_spdx_id, license_name), language = language, topics = topics, releasesUrl = "$html_url/releases",