From 65c6c52f8e5ea919ef3bec93df00bcd8c9e6955b Mon Sep 17 00:00:00 2001 From: Luiz Carvalho Date: Tue, 7 Oct 2025 16:53:29 -0400 Subject: [PATCH 1/2] fix: sort CVE records correctly CVE records follow a specific format where the last segment represents a numerical sequence. To properly sort CVE records, we must treat this sequence segment differently than the rest of the record ID. fixes #1811 Co-Authored-By: Claude Signed-off-by: Luiz Carvalho --- .../src/vulnerability/service/mod.rs | 31 ++++++++++- .../src/vulnerability/service/test.rs | 52 +++++++++++++++++++ 2 files changed, 82 insertions(+), 1 deletion(-) diff --git a/modules/fundamental/src/vulnerability/service/mod.rs b/modules/fundamental/src/vulnerability/service/mod.rs index 6b47d4c88..c173e1e7f 100644 --- a/modules/fundamental/src/vulnerability/service/mod.rs +++ b/modules/fundamental/src/vulnerability/service/mod.rs @@ -45,7 +45,36 @@ impl VulnerabilityService { connection: &C, ) -> Result, Error> { let limiter = vulnerability::Entity::find() - .filtering_with(search, Columns::from_entity::())? + .filtering_with( + search, + Columns::from_entity::().translator(|field, order, _value| { + // When sorting by 'id', translate to use a normalized sort key + // This ensures proper numeric sorting within CVE IDs while maintaining + // alphabetical ordering between different prefixes (ABC-, CVE-, GHSA-, etc.) + if field == "id" && (order == "asc" || order == "desc") { + Some(format!("id_sort_key:{}", order)) + } else { + None + } + }) + .add_expr( + "id_sort_key", + // Create a normalized sort key that preserves prefixes but sorts numbers numerically + // For CVE IDs: converts "CVE-2024-288" to "CVE-2024-0000000000000000288" + // - Year is always 4 digits, no padding needed + // - Pad sequence to 19 digits (max length per CVE schema) + // CVE schema: https://github.com/CVEProject/cve-schema/blob/main/schema/CVE_Record_Format.json + // For other IDs: returns the ID as-is for alphabetical sorting + // This ensures: ABC-123 < CVE-2023-9000 < CVE-2024-10000 < GHSA-xxx + Expr::cust( + "CASE WHEN id ~ '^CVE-[0-9]{4}-[0-9]{4,19}$' THEN \ + SUBSTRING(id FROM '^CVE-[0-9]{4}-') || \ + LPAD(SUBSTRING(id FROM '-([0-9]+)$'), 19, '0') \ + ELSE id END" + ), + sea_orm::ColumnType::Text, + ), + )? .limiting(connection, paginated.offset, paginated.limit); let total = limiter.total().await?; diff --git a/modules/fundamental/src/vulnerability/service/test.rs b/modules/fundamental/src/vulnerability/service/test.rs index 9906d15fe..f48018e23 100644 --- a/modules/fundamental/src/vulnerability/service/test.rs +++ b/modules/fundamental/src/vulnerability/service/test.rs @@ -540,6 +540,58 @@ async fn vulnerability_queries(ctx: &TrustifyContext) -> Result<(), anyhow::Erro Ok(()) } +#[test_context(TrustifyContext)] +#[test(tokio::test)] +async fn vulnerability_numeric_sorting(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { + let service = VulnerabilityService::new(); + + ctx.graph.ingest_vulnerability("CVE-2024-40000", (), &ctx.db).await?; + ctx.graph.ingest_vulnerability("CVE-2024-10288000", (), &ctx.db).await?; + ctx.graph.ingest_vulnerability("CVE-2023-1234", (), &ctx.db).await?; + ctx.graph.ingest_vulnerability("CVE-2024-9000", (), &ctx.db).await?; + ctx.graph.ingest_vulnerability("CVE-2023-5100", (), &ctx.db).await?; + ctx.graph.ingest_vulnerability("GHSA-xxxx-yyyy-zzzz", (), &ctx.db).await?; + ctx.graph.ingest_vulnerability("ABC-xxxx-yyyy", (), &ctx.db).await?; + + // Test ascending sort + let vulns = service + .fetch_vulnerabilities( + q("").sort("id:asc"), + Paginated::default(), + Default::default(), + &ctx.db, + ) + .await?; + assert_eq!(7, vulns.items.len()); + assert_eq!(vulns.items[0].head.identifier, "ABC-xxxx-yyyy"); + assert_eq!(vulns.items[1].head.identifier, "CVE-2023-1234"); + assert_eq!(vulns.items[2].head.identifier, "CVE-2023-5100"); + assert_eq!(vulns.items[3].head.identifier, "CVE-2024-9000"); + assert_eq!(vulns.items[4].head.identifier, "CVE-2024-40000"); + assert_eq!(vulns.items[5].head.identifier, "CVE-2024-10288000"); + assert_eq!(vulns.items[6].head.identifier, "GHSA-xxxx-yyyy-zzzz"); + + // Test descending sort + let vulns = service + .fetch_vulnerabilities( + q("").sort("id:desc"), + Paginated::default(), + Default::default(), + &ctx.db, + ) + .await?; + assert_eq!(7, vulns.items.len()); + assert_eq!(vulns.items[0].head.identifier, "GHSA-xxxx-yyyy-zzzz"); + assert_eq!(vulns.items[1].head.identifier, "CVE-2024-10288000"); + assert_eq!(vulns.items[2].head.identifier, "CVE-2024-40000"); + assert_eq!(vulns.items[3].head.identifier, "CVE-2024-9000"); + assert_eq!(vulns.items[4].head.identifier, "CVE-2023-5100"); + assert_eq!(vulns.items[5].head.identifier, "CVE-2023-1234"); + assert_eq!(vulns.items[6].head.identifier, "ABC-xxxx-yyyy"); + + Ok(()) +} + #[test_context(TrustifyContext)] #[test(tokio::test)] async fn analyze_purls(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { From 5ae3648f03983b1365187f793ca44fbf72e7d790 Mon Sep 17 00:00:00 2001 From: Luiz Carvalho Date: Fri, 17 Oct 2025 17:02:22 -0400 Subject: [PATCH 2/2] Generalize vulnerability sorting Signed-off-by: Luiz Carvalho --- .../src/vulnerability/service/mod.rs | 24 +++--- .../src/vulnerability/service/test.rs | 79 ++++++++++++++----- 2 files changed, 73 insertions(+), 30 deletions(-) diff --git a/modules/fundamental/src/vulnerability/service/mod.rs b/modules/fundamental/src/vulnerability/service/mod.rs index c173e1e7f..b5f169541 100644 --- a/modules/fundamental/src/vulnerability/service/mod.rs +++ b/modules/fundamental/src/vulnerability/service/mod.rs @@ -59,18 +59,20 @@ impl VulnerabilityService { }) .add_expr( "id_sort_key", - // Create a normalized sort key that preserves prefixes but sorts numbers numerically - // For CVE IDs: converts "CVE-2024-288" to "CVE-2024-0000000000000000288" - // - Year is always 4 digits, no padding needed - // - Pad sequence to 19 digits (max length per CVE schema) - // CVE schema: https://github.com/CVEProject/cve-schema/blob/main/schema/CVE_Record_Format.json - // For other IDs: returns the ID as-is for alphabetical sorting - // This ensures: ABC-123 < CVE-2023-9000 < CVE-2024-10000 < GHSA-xxx + // Pad numberic segments with zeros to achieve the expected numeric sorting. + // The padding is done into two steps. First add 19 zeros to each number + // segment. Second, keep only the 19 right-most digits for each number segment. + // This behaves like LPAD which cannot be used here because that would be + // evaluated before the REGEXP matching. + // The number 19 is used as that is the largest segment defined, coming from the + // CVE ID spec. Expr::cust( - "CASE WHEN id ~ '^CVE-[0-9]{4}-[0-9]{4,19}$' THEN \ - SUBSTRING(id FROM '^CVE-[0-9]{4}-') || \ - LPAD(SUBSTRING(id FROM '-([0-9]+)$'), 19, '0') \ - ELSE id END" + "REGEXP_REPLACE( \ + REGEXP_REPLACE(id, '\\y([0-9]+)\\y', '0000000000000000000\\1', 'g'), \ + '\\y([0-9]+)([0-9]{19})\\y', \ + '\\2', \ + 'g' \ + )" ), sea_orm::ColumnType::Text, ), diff --git a/modules/fundamental/src/vulnerability/service/test.rs b/modules/fundamental/src/vulnerability/service/test.rs index f48018e23..1655572b6 100644 --- a/modules/fundamental/src/vulnerability/service/test.rs +++ b/modules/fundamental/src/vulnerability/service/test.rs @@ -545,12 +545,36 @@ async fn vulnerability_queries(ctx: &TrustifyContext) -> Result<(), anyhow::Erro async fn vulnerability_numeric_sorting(ctx: &TrustifyContext) -> Result<(), anyhow::Error> { let service = VulnerabilityService::new(); + // Test various OSV ID formats to ensure generic numeric sorting works + // CVE format ctx.graph.ingest_vulnerability("CVE-2024-40000", (), &ctx.db).await?; ctx.graph.ingest_vulnerability("CVE-2024-10288000", (), &ctx.db).await?; - ctx.graph.ingest_vulnerability("CVE-2023-1234", (), &ctx.db).await?; - ctx.graph.ingest_vulnerability("CVE-2024-9000", (), &ctx.db).await?; - ctx.graph.ingest_vulnerability("CVE-2023-5100", (), &ctx.db).await?; - ctx.graph.ingest_vulnerability("GHSA-xxxx-yyyy-zzzz", (), &ctx.db).await?; + + // GHSA format (alphanumeric) + ctx.graph.ingest_vulnerability("GHSA-r9p9-mrjm-926w", (), &ctx.db).await?; + ctx.graph.ingest_vulnerability("GHSA-vp9c-fpxx-744v", (), &ctx.db).await?; + + // Go format + ctx.graph.ingest_vulnerability("GO-2024-268", (), &ctx.db).await?; + ctx.graph.ingest_vulnerability("GO-2024-1234", (), &ctx.db).await?; + + // RustSec format + ctx.graph.ingest_vulnerability("RUSTSEC-2019-0033", (), &ctx.db).await?; + ctx.graph.ingest_vulnerability("RUSTSEC-2024-0001", (), &ctx.db).await?; + + // Alpine format + ctx.graph.ingest_vulnerability("ALPINE-12345", (), &ctx.db).await?; + ctx.graph.ingest_vulnerability("ALPINE-6789", (), &ctx.db).await?; + + // PyPI format + ctx.graph.ingest_vulnerability("PYSEC-2021-1234", (), &ctx.db).await?; + ctx.graph.ingest_vulnerability("PYSEC-2024-5678", (), &ctx.db).await?; + + // OSV format + ctx.graph.ingest_vulnerability("OSV-2020-111", (), &ctx.db).await?; + ctx.graph.ingest_vulnerability("OSV-2020-58", (), &ctx.db).await?; + + // Generic test prefix ctx.graph.ingest_vulnerability("ABC-xxxx-yyyy", (), &ctx.db).await?; // Test ascending sort @@ -562,14 +586,23 @@ async fn vulnerability_numeric_sorting(ctx: &TrustifyContext) -> Result<(), anyh &ctx.db, ) .await?; - assert_eq!(7, vulns.items.len()); + assert_eq!(15, vulns.items.len()); + // Alphabetical by prefix, then numeric within each prefix assert_eq!(vulns.items[0].head.identifier, "ABC-xxxx-yyyy"); - assert_eq!(vulns.items[1].head.identifier, "CVE-2023-1234"); - assert_eq!(vulns.items[2].head.identifier, "CVE-2023-5100"); - assert_eq!(vulns.items[3].head.identifier, "CVE-2024-9000"); - assert_eq!(vulns.items[4].head.identifier, "CVE-2024-40000"); - assert_eq!(vulns.items[5].head.identifier, "CVE-2024-10288000"); - assert_eq!(vulns.items[6].head.identifier, "GHSA-xxxx-yyyy-zzzz"); + assert_eq!(vulns.items[1].head.identifier, "ALPINE-6789"); + assert_eq!(vulns.items[2].head.identifier, "ALPINE-12345"); + assert_eq!(vulns.items[3].head.identifier, "CVE-2024-40000"); + assert_eq!(vulns.items[4].head.identifier, "CVE-2024-10288000"); + assert_eq!(vulns.items[5].head.identifier, "GHSA-r9p9-mrjm-926w"); + assert_eq!(vulns.items[6].head.identifier, "GHSA-vp9c-fpxx-744v"); + assert_eq!(vulns.items[7].head.identifier, "GO-2024-268"); + assert_eq!(vulns.items[8].head.identifier, "GO-2024-1234"); + assert_eq!(vulns.items[9].head.identifier, "OSV-2020-58"); + assert_eq!(vulns.items[10].head.identifier, "OSV-2020-111"); + assert_eq!(vulns.items[11].head.identifier, "PYSEC-2021-1234"); + assert_eq!(vulns.items[12].head.identifier, "PYSEC-2024-5678"); + assert_eq!(vulns.items[13].head.identifier, "RUSTSEC-2019-0033"); + assert_eq!(vulns.items[14].head.identifier, "RUSTSEC-2024-0001"); // Test descending sort let vulns = service @@ -580,14 +613,22 @@ async fn vulnerability_numeric_sorting(ctx: &TrustifyContext) -> Result<(), anyh &ctx.db, ) .await?; - assert_eq!(7, vulns.items.len()); - assert_eq!(vulns.items[0].head.identifier, "GHSA-xxxx-yyyy-zzzz"); - assert_eq!(vulns.items[1].head.identifier, "CVE-2024-10288000"); - assert_eq!(vulns.items[2].head.identifier, "CVE-2024-40000"); - assert_eq!(vulns.items[3].head.identifier, "CVE-2024-9000"); - assert_eq!(vulns.items[4].head.identifier, "CVE-2023-5100"); - assert_eq!(vulns.items[5].head.identifier, "CVE-2023-1234"); - assert_eq!(vulns.items[6].head.identifier, "ABC-xxxx-yyyy"); + assert_eq!(15, vulns.items.len()); + assert_eq!(vulns.items[0].head.identifier, "RUSTSEC-2024-0001"); + assert_eq!(vulns.items[1].head.identifier, "RUSTSEC-2019-0033"); + assert_eq!(vulns.items[2].head.identifier, "PYSEC-2024-5678"); + assert_eq!(vulns.items[3].head.identifier, "PYSEC-2021-1234"); + assert_eq!(vulns.items[4].head.identifier, "OSV-2020-111"); + assert_eq!(vulns.items[5].head.identifier, "OSV-2020-58"); + assert_eq!(vulns.items[6].head.identifier, "GO-2024-1234"); + assert_eq!(vulns.items[7].head.identifier, "GO-2024-268"); + assert_eq!(vulns.items[8].head.identifier, "GHSA-vp9c-fpxx-744v"); + assert_eq!(vulns.items[9].head.identifier, "GHSA-r9p9-mrjm-926w"); + assert_eq!(vulns.items[10].head.identifier, "CVE-2024-10288000"); + assert_eq!(vulns.items[11].head.identifier, "CVE-2024-40000"); + assert_eq!(vulns.items[12].head.identifier, "ALPINE-12345"); + assert_eq!(vulns.items[13].head.identifier, "ALPINE-6789"); + assert_eq!(vulns.items[14].head.identifier, "ABC-xxxx-yyyy"); Ok(()) }