Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 34 additions & 4 deletions src/main/kotlin/zed/rainxch/githubstore/routes/InternalRoutes.kt
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,16 @@ import kotlinx.coroutines.coroutineScope
import kotlinx.coroutines.delay
import kotlinx.coroutines.launch
import kotlinx.serialization.Serializable
import org.jetbrains.exposed.sql.SqlExpressionBuilder.eq
import org.jetbrains.exposed.sql.SqlExpressionBuilder.isNull
import org.jetbrains.exposed.sql.selectAll
import org.jetbrains.exposed.sql.transactions.TransactionManager
import org.jetbrains.exposed.sql.transactions.experimental.newSuspendedTransaction
import org.jetbrains.exposed.sql.transactions.transaction
import org.jetbrains.exposed.sql.update
import org.slf4j.LoggerFactory
import zed.rainxch.githubstore.db.Repos
import zed.rainxch.githubstore.ingest.GitHubRepo
import zed.rainxch.githubstore.ingest.GitHubSearchClient
import zed.rainxch.githubstore.ingest.WorkerSupervisor
import zed.rainxch.githubstore.metrics.SearchMetricsRegistry
Expand Down Expand Up @@ -203,9 +206,9 @@ private suspend fun runBackfill(
val pacePerRepoMs: Long = (System.getenv("REPO_REFRESH_PACE_MS")?.toLongOrNull() ?: 500L)
.coerceAtLeast(0L)
var ok = 0
var metadataOnly = 0
var gone = 0
var archived = 0
var stale = 0
var failed = 0
for ((_, fullName) in candidates) {
// Quiet-window guard: pause the loop, don't burn the candidate.
Expand All @@ -219,7 +222,14 @@ private suspend fun runBackfill(
ok++
}
is GitHubSearchClient.RefreshResult.NoUsableRelease -> {
stale++
// Repo metadata fetched fine but has no installable release.
// The full persist path requires release info, so update the
// drift-prone metadata columns directly. Otherwise these rows
// would stay at default open_issues=0 / NULL license forever
// and re-appear in every subsequent backfill query -- the
// exact failure mode that prompted this fix.
upsertMetadataOnly(result.repo)
metadataOnly++
}
GitHubSearchClient.RefreshResult.Gone -> gone++
GitHubSearchClient.RefreshResult.Archived -> archived++
Expand All @@ -228,11 +238,31 @@ private suspend fun runBackfill(
delay(pacePerRepoMs)
}
internalLog.info(
"Backfill done: ok={} gone={} archived={} no-release={} transient-fail={} (of {})",
ok, gone, archived, stale, failed, candidates.size,
"Backfill done: ok={} metadata-only={} gone={} archived={} transient-fail={} (of {})",
ok, metadataOnly, gone, archived, failed, candidates.size,
)
}

// Metadata-only UPDATE for repos without an installable release. Touches
// just the drift-prone columns (the new ones added by V14/V15 plus
// stars/forks/description/archived which are also volatile). Leaves
// release-related columns alone -- they were already correct from the
// last successful Ok-path refresh, OR they're at schema defaults because
// no release ever existed (correct outcome either way).
private fun upsertMetadataOnly(repo: GitHubRepo) {
transaction {
Repos.update({ Repos.fullName eq repo.fullName }) {
it[stars] = repo.stargazersCount
it[forks] = repo.forksCount
it[openIssues] = repo.openIssuesCount
it[licenseSpdxId] = repo.license?.spdxId
it[licenseName] = repo.license?.name
it[description] = repo.description
it[indexedAt] = java.time.OffsetDateTime.now()
}
}
}

private suspend fun fetchDbMetrics(): TrainingMetrics = coroutineScope {
val unprocessed = async { countUnprocessedMisses() }
val reposWithSignals = async { countReposWithSignals() }
Expand Down
Loading