From 67bc3acb35c01d8bd1bb8f8322591fe3627536a6 Mon Sep 17 00:00:00 2001 From: "Clelia (Astra) Bertelli" Date: Fri, 6 Feb 2026 15:44:32 +0100 Subject: [PATCH 1/8] wip: migrate storage layer to qdrant edge --- .gitignore | 1 + Cargo.lock | 5085 +++++++++++++++------------------------- Cargo.toml | 11 +- src/bin/workspace.rs | 15 +- src/search/mod.rs | 15 +- src/workspace/store.rs | 1596 ++++--------- 6 files changed, 2407 insertions(+), 4316 deletions(-) diff --git a/.gitignore b/.gitignore index c5078494..8ecea0db 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,3 @@ target .idea +legacy_store diff --git a/Cargo.lock b/Cargo.lock index 07fb3233..4a60ed6f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,6 +17,12 @@ version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "320119579fcad9c21884f5c4861d16174d0e06250625266f50fe6898340abefa" +[[package]] +name = "adler32" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aae1277d39aeec15cb388266ecc24b11c80469deae6067e17a1a7aa9e5c1f234" + [[package]] name = "ahash" version = "0.8.12" @@ -24,7 +30,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a15f179cd60c4584b8a8c596927aadc462e27f2ca70c04e0071964a73ba7a75" dependencies = [ "cfg-if", - "const-random", "getrandom 0.3.3", "once_cell", "serde", @@ -42,18 +47,12 @@ dependencies = [ ] [[package]] -name = "alloc-no-stdlib" -version = "2.0.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cc7bb162ec39d46ab1ca8c77bf72e890535becd1751bb45f64c597edb4c8c6b3" - -[[package]] -name = "alloc-stdlib" -version = "0.2.2" +name = "aligned-vec" +version = "0.6.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "94fb8275041c72129eb51b7d0322c29b8387a0386127718b096429201a5d6ece" +checksum = "dc890384c8602f339876ded803c97ad529f3842aba97f6392b3dba0dd171769b" dependencies = [ - "alloc-no-stdlib", + "equator", ] [[package]] @@ -62,12 +61,6 @@ version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "683d7910e743518b0e34f1186f92494becacb047c7b6bf616c96772180fef923" -[[package]] -name = "android-tzdata" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e999941b234f3131b00bc13c22d06e8c5ff726d1b6318ac7eb276997bbb4fef0" - [[package]] name = "android_system_properties" version = "0.1.5" @@ -129,302 +122,61 @@ dependencies = [ [[package]] name = "anyhow" -version = "1.0.99" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b0674a1ddeecb70197781e945de4b3b8ffb61fa939a5597bcf48503737663100" - -[[package]] -name = "approx" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" -dependencies = [ - "num-traits", -] - -[[package]] -name = "ar_archive_writer" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7eb93bbb63b9c227414f6eb3a0adfddca591a8ce1e9b60661bb08969b87e340b" -dependencies = [ - "object 0.37.3", -] - -[[package]] -name = "arc-swap" -version = "1.7.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "69f7f8c3906b62b754cd5326047894316021dcfe5a194c8ea52bdd94934a3457" - -[[package]] -name = "arrayref" -version = "0.3.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76a2e8124351fda1ef8aaaa3bbd7ebbcb486bbcd4225aca0aa0d84bb2db8fecb" - -[[package]] -name = "arrayvec" -version = "0.7.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" - -[[package]] -name = "arrow" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e833808ff2d94ed40d9379848a950d995043c7fb3e81a30b383f4c6033821cc" -dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-csv", - "arrow-data", - "arrow-ipc", - "arrow-json", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "arrow-string", -] - -[[package]] -name = "arrow-arith" -version = "56.2.0" +version = "1.0.101" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ad08897b81588f60ba983e3ca39bda2b179bdd84dced378e7df81a5313802ef8" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "num", -] +checksum = "5f0e0fee31ef5ed1ba1316088939cea399010ed7731dba877ed44aeb407a75ea" [[package]] -name = "arrow-array" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8548ca7c070d8db9ce7aa43f37393e4bfcf3f2d3681df278490772fd1673d08d" +name = "api" +version = "1.16.4-dev" +source = "git+https://github.com/qdrant/qdrant.git?branch=dev#c033e26cf45071bdb018d9345db8edcb1c901f30" dependencies = [ "ahash", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "chrono", - "chrono-tz", - "half", - "hashbrown 0.16.1", - "num", -] - -[[package]] -name = "arrow-buffer" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e003216336f70446457e280807a73899dd822feaf02087d31febca1363e2fccc" -dependencies = [ - "bytes", - "half", - "num", -] - -[[package]] -name = "arrow-cast" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "919418a0681298d3a77d1a315f625916cb5678ad0d74b9c60108eb15fd083023" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "atoi", - "base64 0.22.1", - "chrono", - "comfy-table", - "half", - "lexical-core", - "num", - "ryu", -] - -[[package]] -name = "arrow-csv" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bfa9bf02705b5cf762b6f764c65f04ae9082c7cfc4e96e0c33548ee3f67012eb" -dependencies = [ - "arrow-array", - "arrow-cast", - "arrow-schema", - "chrono", - "csv", - "csv-core", - "regex", -] - -[[package]] -name = "arrow-data" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5c64fff1d142f833d78897a772f2e5b55b36cb3e6320376f0961ab0db7bd6d0" -dependencies = [ - "arrow-buffer", - "arrow-schema", - "half", - "num", -] - -[[package]] -name = "arrow-ipc" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d3594dcddccc7f20fd069bc8e9828ce37220372680ff638c5e00dea427d88f5" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "flatbuffers", - "lz4_flex", - "zstd", -] - -[[package]] -name = "arrow-json" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "88cf36502b64a127dc659e3b305f1d993a544eab0d48cce704424e62074dc04b" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", "chrono", - "half", - "indexmap 2.11.1", - "lexical-core", - "memchr", - "num", - "serde", - "serde_json", - "simdutf8", -] - -[[package]] -name = "arrow-ord" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3c8f82583eb4f8d84d4ee55fd1cb306720cddead7596edce95b50ee418edf66f" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", -] - -[[package]] -name = "arrow-row" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9d07ba24522229d9085031df6b94605e0f4b26e099fb7cdeec37abd941a73753" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "half", -] - -[[package]] -name = "arrow-schema" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b3aa9e59c611ebc291c28582077ef25c97f1975383f1479b12f3b9ffee2ffabe" -dependencies = [ - "bitflags", + "common", + "itertools 0.14.0", + "ordered-float 5.1.0", + "parking_lot", + "prost", + "prost-build", + "prost-wkt-types", + "rand 0.9.2", + "schemars", + "segment", "serde", "serde_json", + "sparse", + "thiserror 2.0.18", + "tokio", + "tonic", + "tonic-build", + "uuid", + "validator", ] [[package]] -name = "arrow-select" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c41dbbd1e97bfcaee4fcb30e29105fb2c75e4d82ae4de70b792a5d3f66b2e7a" -dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "num", -] - -[[package]] -name = "arrow-string" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "53f5183c150fbc619eede22b861ea7c0eebed8eaac0333eaa7f6da5205fd504d" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "memchr", - "num", - "regex", - "regex-syntax", -] - -[[package]] -name = "async-channel" -version = "2.5.0" +name = "approx" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "924ed96dd52d1b75e9c1a3e6275715fd320f5f9439fb5a4a11fa51f4221158d2" +checksum = "cab112f0a86d568ea0e627cc1d6be74a1e9cd55214684db5561995f6dad897c6" dependencies = [ - "concurrent-queue", - "event-listener-strategy", - "futures-core", - "pin-project-lite", + "num-traits", ] [[package]] -name = "async-compression" -version = "0.4.19" +name = "arrayvec" +version = "0.4.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06575e6a9673580f52661c92107baabffbf41e2141373441cbcdc47cb733003c" +checksum = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9" dependencies = [ - "bzip2 0.5.2", - "flate2", - "futures-core", - "memchr", - "pin-project-lite", - "tokio", - "xz2", - "zstd", - "zstd-safe", + "nodrop", ] [[package]] -name = "async-lock" -version = "3.4.1" +name = "arrayvec" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fd03604047cee9b6ce9de9f70c6cd540a0520c813cbd49bae61f33ab80ed1dc" -dependencies = [ - "event-listener", - "event-listener-strategy", - "pin-project-lite", -] +checksum = "7c02d123df017efcdfbd739ef81735b36c5ba83ec3c59c80a9d7ecc718f92e50" [[package]] name = "async-openai" @@ -446,7 +198,7 @@ dependencies = [ "serde", "serde_json", "serde_urlencoded", - "thiserror 2.0.16", + "thiserror 2.0.18", "tokio", "tokio-stream", "tokio-util", @@ -466,10 +218,21 @@ dependencies = [ ] [[package]] -name = "async-recursion" -version = "1.1.1" +name = "async-stream" +version = "0.3.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3b43422f69d8ff38f95f1b2bb76517c91589a924d1559a0e935d7c8ce0274c11" +checksum = "0b5a71a6f37880a80d1d7f19efd781e4b5de42c88f0722cc13bcb6cc2cfe8476" +dependencies = [ + "async-stream-impl", + "futures-core", + "pin-project-lite", +] + +[[package]] +name = "async-stream-impl" +version = "0.3.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c7c24de15d275a1ecfd47a380fb4d5ec9bfe0933f309ed5e705b775596a3574d" dependencies = [ "proc-macro2", "quote", @@ -488,28 +251,27 @@ dependencies = [ ] [[package]] -name = "async_cell" -version = "0.2.3" +name = "atomic-waker" +version = "1.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "447ab28afbb345f5408b120702a44e5529ebf90b1796ec76e9528df8e288e6c2" -dependencies = [ - "loom", -] +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" [[package]] -name = "atoi" -version = "2.0.0" +name = "atomic_refcell" +version = "0.1.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f28d99ec8bfea296261ca1af174f24225171fea9664ba9003cbebee704810528" -dependencies = [ - "num-traits", -] +checksum = "41e67cd8309bbd06cd603a9e693a784ac2e5d1e955f11286e355089fcab3047c" [[package]] -name = "atomic-waker" -version = "1.1.2" +name = "atomicwrites" +version = "0.4.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" +checksum = "3ef1bb8d1b645fe38d51dfc331d720fb5fc2c94b440c76cc79c80ff265ca33e3" +dependencies = [ + "rustix 0.38.44", + "tempfile", + "windows-sys 0.52.0", +] [[package]] name = "autocfg" @@ -517,6 +279,51 @@ version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8" +[[package]] +name = "axum" +version = "0.6.20" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3b829e4e32b91e643de6eafe82b1d90675f5874230191a4ffbc1b336dec4d6bf" +dependencies = [ + "async-trait", + "axum-core", + "bitflags 1.3.2", + "bytes", + "futures-util", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.32", + "itoa", + "matchit", + "memchr", + "mime", + "percent-encoding", + "pin-project-lite", + "rustversion", + "serde", + "sync_wrapper 0.1.2", + "tower 0.4.13", + "tower-layer", + "tower-service", +] + +[[package]] +name = "axum-core" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "759fa577a247914fd3f7f76d62972792636412fbfd634cd452f6a385a74d2d2c" +dependencies = [ + "async-trait", + "bytes", + "futures-util", + "http 0.2.12", + "http-body 0.4.6", + "mime", + "rustversion", + "tower-layer", + "tower-service", +] + [[package]] name = "backoff" version = "0.4.0" @@ -541,7 +348,7 @@ dependencies = [ "cfg-if", "libc", "miniz_oxide", - "object 0.36.7", + "object", "rustc-demangle", "windows-targets 0.52.6", ] @@ -552,6 +359,12 @@ version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9e1b586273c5702936fe7b7d6896644d8be71e6314cfe09d3167c95f712589e8" +[[package]] +name = "base64" +version = "0.21.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9d297deb1925b89f2ccc13d7635fa0714f12c87adce1c75356b39ca9b7178567" + [[package]] name = "base64" version = "0.22.1" @@ -559,29 +372,66 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6" [[package]] -name = "bigdecimal" -version = "0.4.8" +name = "bincode" +version = "1.3.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1a22f228ab7a1b23027ccc6c350b72868017af7ea8356fbdf19f8d991c690013" +checksum = "b1f45e9417d87227c7a56d22e471c6206462cba514c7590c09aff4cf6d1ddcad" dependencies = [ - "autocfg", - "libm", - "num-bigint", - "num-integer", - "num-traits", + "serde", +] + +[[package]] +name = "bincode" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "36eaf5d7b090263e8150820482d5d93cd964a81e4019913c972f4edcc6edb740" +dependencies = [ + "bincode_derive", + "serde", + "unty", +] + +[[package]] +name = "bincode_derive" +version = "2.0.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf95709a440f45e986983918d0e8a1f30a9b1df04918fc828670606804ac3c09" +dependencies = [ + "virtue", ] +[[package]] +name = "binout" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "222fb4925a15bea6a68075021910e03d6aa2d04951d71ff1d956190a551d738f" + +[[package]] +name = "bitflags" +version = "1.3.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a" + [[package]] name = "bitflags" version = "2.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "34efbcccd345379ca2868b2b2c9d3782e9cc58ba87bc7d79d5b53d9c9ae6f25d" +[[package]] +name = "bitm" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e7edec3daafc233e78a219c85a77bcf535ee267b0fae7a1aad96bd1a67add5d3" +dependencies = [ + "dyn_size_of", +] + [[package]] name = "bitpacking" -version = "0.9.2" +version = "0.9.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c1d3e2bfd8d06048a179f7b17afc3188effa10385e7b00dc65af6aae732ea92" +checksum = "96a7139abd3d9cebf8cd6f920a389cf3dc9576172e32f4563f188cae3c3eb019" dependencies = [ "crunchy", ] @@ -599,24 +449,12 @@ dependencies = [ ] [[package]] -name = "blake2" -version = "0.10.6" +name = "blake2-rfc" +version = "0.2.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "46502ad458c9a52b69d4d4d32775c788b7a1b85e8bc9d482d92250fc0e3f8efe" +checksum = "5d6d530bdd2d52966a6d03b7a964add7ae1a288d25214066fd4b600f0f796400" dependencies = [ - "digest", -] - -[[package]] -name = "blake3" -version = "1.8.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3888aaa89e4b2a40fca9848e400f6a658a5a3978de7be858e209cafa8be9a4a0" -dependencies = [ - "arrayref", - "arrayvec", - "cc", - "cfg-if", + "arrayvec 0.4.12", "constant_time_eq", ] @@ -629,52 +467,6 @@ dependencies = [ "generic-array", ] -[[package]] -name = "bon" -version = "3.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2529c31017402be841eb45892278a6c21a000c0a17643af326c73a73f83f0fb" -dependencies = [ - "bon-macros", - "rustversion", -] - -[[package]] -name = "bon-macros" -version = "3.7.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d82020dadcb845a345591863adb65d74fa8dc5c18a0b6d408470e13b7adc7005" -dependencies = [ - "darling 0.21.3", - "ident_case", - "prettyplease", - "proc-macro2", - "quote", - "rustversion", - "syn 2.0.106", -] - -[[package]] -name = "brotli" -version = "8.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4bd8b9603c7aa97359dbd97ecf258968c95f3adddd6db2f7e7a5bef101c84560" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", - "brotli-decompressor", -] - -[[package]] -name = "brotli-decompressor" -version = "5.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "874bb8112abecc98cbd6d81ea4fa7e94fb9449648c93cc89aa40c81c24d7de03" -dependencies = [ - "alloc-no-stdlib", - "alloc-stdlib", -] - [[package]] name = "bstr" version = "1.12.1" @@ -694,9 +486,23 @@ checksum = "46c5e41b57b8bba42a04676d81cb89e9ee8e859a1a66f80a5a72e1cb76b34d43" [[package]] name = "bytemuck" -version = "1.23.2" +version = "1.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8efb64bd706a16a1bdde310ae86b351e4d21550d98d056f22f8a7f7a2183fec" +dependencies = [ + "bytemuck_derive", +] + +[[package]] +name = "bytemuck_derive" +version = "1.10.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3995eaeebcdf32f91f980d360f78732ddc061097ab4e39991ae7a6ace9194677" +checksum = "f9abbd1bc6865053c427f7198e6af43bfdedc55ab791faed4fbd361d789575ff" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] [[package]] name = "byteorder" @@ -710,34 +516,6 @@ version = "1.11.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33" -[[package]] -name = "bzip2" -version = "0.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49ecfb22d906f800d4fe833b6282cf4dc1c298f5057ca0b5445e5c209735ca47" -dependencies = [ - "bzip2-sys", -] - -[[package]] -name = "bzip2" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f3a53fac24f34a81bc9954b5d6cfce0c21e18ec6959f44f56e8e90e4bb7c346c" -dependencies = [ - "libbz2-rs-sys", -] - -[[package]] -name = "bzip2-sys" -version = "0.1.13+1.0.8" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "225bff33b2141874fe80d71e07d6eec4f85c5c216453dd96388240f96e1acc14" -dependencies = [ - "cc", - "pkg-config", -] - [[package]] name = "castaway" version = "0.2.4" @@ -759,10 +537,13 @@ dependencies = [ ] [[package]] -name = "census" -version = "0.4.2" +name = "cedarwood" +version = "0.4.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4f4c707c6a209cbe82d10abd08e1ea8995e9ea937d2550646e02798948992be0" +checksum = "6d910bedd62c24733263d0bed247460853c9d22e8956bd4cd964302095e04e90" +dependencies = [ + "smallvec", +] [[package]] name = "cfg-if" @@ -777,28 +558,48 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724" [[package]] -name = "chrono" -version = "0.4.41" +name = "cgroups-rs" +version = "0.3.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c469d952047f47f91b68d1cba3f10d63c11d73e4636f24f08daf0278abf01c4d" +checksum = "6db7c2f5545da4c12c5701455d9471da5f07db52e49b9cccb4f5512226dd0836" dependencies = [ - "android-tzdata", - "iana-time-zone", - "js-sys", - "num-traits", + "libc", + "log", + "nix 0.25.1", + "regex", + "thiserror 1.0.69", +] + +[[package]] +name = "charabia" +version = "0.9.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "51689ee7cc84c8de789fc2874711d816055b93406cfd4135c40d1c82dd24b928" +dependencies = [ + "aho-corasick", + "csv", + "either", + "fst", + "irg-kvariants", + "jieba-rs", "serde", - "wasm-bindgen", - "windows-link 0.1.3", + "slice-group-by", + "unicode-normalization", + "whatlang", ] [[package]] -name = "chrono-tz" -version = "0.10.4" +name = "chrono" +version = "0.4.43" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a6139a8597ed92cf816dfb33f5dd6cf0bb93a6adc938f11039f371bc5bcd26c3" +checksum = "fac4744fb15ae8337dc853fee7fb3f4e48c0fbaa23d0afe49c447b4fab126118" dependencies = [ - "chrono", - "phf", + "iana-time-zone", + "js-sys", + "num-traits", + "serde", + "wasm-bindgen", + "windows-link 0.2.0", ] [[package]] @@ -820,7 +621,7 @@ dependencies = [ "anstream", "anstyle", "clap_lex", - "strsim", + "strsim 0.11.1", ] [[package]] @@ -848,14 +649,39 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b05b61dc5112cbb17e4b6cd61790d9845d13888356391624cbe7e41efeac1e75" [[package]] -name = "comfy-table" -version = "7.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0d05af1e006a2407bedef5af410552494ce5be9090444dbbcb57258c1af3d56" +name = "common" +version = "0.0.0" +source = "git+https://github.com/qdrant/qdrant.git?branch=dev#c033e26cf45071bdb018d9345db8edcb1c901f30" dependencies = [ + "atomicwrites", + "bitvec", + "bytemuck", + "chrono", + "fs-err", + "log", + "memmap2", + "memory", + "nix 0.31.1", + "num-traits", + "num_cpus", + "ordered-float 5.1.0", + "parking_lot", + "ph", + "rand 0.9.2", + "schemars", + "semver", + "serde", + "serde_json", "strum", - "strum_macros", - "unicode-width", + "tap", + "tar", + "tempfile", + "thiserror 2.0.18", + "thread-priority", + "tokio", + "validator", + "walkdir", + "zerocopy", ] [[package]] @@ -873,15 +699,6 @@ dependencies = [ "static_assertions", ] -[[package]] -name = "concurrent-queue" -version = "2.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ca0197aee26d1ae37445ee532fefce43251d24cc7c166799f4d46817f1d3973" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "console" version = "0.15.11" @@ -896,30 +713,23 @@ dependencies = [ ] [[package]] -name = "const-random" -version = "0.1.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "87e00182fe74b066627d63b85fd550ac2998d4b0bd86bfed477a0ae4c7c71359" -dependencies = [ - "const-random-macro", -] - -[[package]] -name = "const-random-macro" -version = "0.1.16" +name = "console" +version = "0.16.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f9d839f2a20b0aee515dc581a6172f2321f96cab76c1a38a4c584a194955390e" +checksum = "03e45a4a8926227e4197636ba97a9fc9b00477e9f4bd711395687c5f0734bec4" dependencies = [ - "getrandom 0.2.16", + "encode_unicode", + "libc", "once_cell", - "tiny-keccak", + "unicode-width", + "windows-sys 0.61.0", ] [[package]] name = "constant_time_eq" -version = "0.3.1" +version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7c74b8349d32d297c9134b8c88677813a227df8f779daa29bfc29c183fe3dca6" +checksum = "245097e9a4535ee1e3e3931fcfcd55a796a44c643e8596ff6566d68f09b87bbc" [[package]] name = "core-foundation" @@ -937,6 +747,15 @@ version = "0.8.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "773648b94d0e5d620f64f280777445740e61fe701025087ec8b57f45c791888b" +[[package]] +name = "core2" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b49ba7ef1ad6107f8824dbe97de947cbaac53c44e7f9756a1fba0d37c1eec505" +dependencies = [ + "memchr", +] + [[package]] name = "cpufeatures" version = "0.2.17" @@ -946,6 +765,15 @@ dependencies = [ "libc", ] +[[package]] +name = "crc32c" +version = "0.6.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3a47af21622d091a8f0fb295b88bc886ac74efcc613efc19f5d0b21de5c89e47" +dependencies = [ + "rustc_version", +] + [[package]] name = "crc32fast" version = "1.5.0" @@ -983,15 +811,6 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "crossbeam-queue" -version = "0.3.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f58bbc28f91df819d0aa2a2c00cd19754769c2fad90579b3592b1c9ba7a3115" -dependencies = [ - "crossbeam-utils", -] - [[package]] name = "crossbeam-utils" version = "0.8.21" @@ -1036,23 +855,19 @@ dependencies = [ ] [[package]] -name = "darling" -version = "0.20.11" +name = "daachorse" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" -dependencies = [ - "darling_core 0.20.11", - "darling_macro 0.20.11", -] +checksum = "63b7ef7a4be509357f4804d0a22e830daddb48f19fd604e4ad32ddce04a94c36" [[package]] name = "darling" -version = "0.21.3" +version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9cdf337090841a411e2a7f3deb9187445851f91b309c0c0a29e05f74a00a48c0" +checksum = "fc7f46116c46ff9ab3eb1597a45688b6715c6e628b5c133e288e709a29bcb4ee" dependencies = [ - "darling_core 0.21.3", - "darling_macro 0.21.3", + "darling_core", + "darling_macro", ] [[package]] @@ -1065,21 +880,7 @@ dependencies = [ "ident_case", "proc-macro2", "quote", - "strsim", - "syn 2.0.106", -] - -[[package]] -name = "darling_core" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1247195ecd7e3c85f83c8d2a366e4210d588e802133e1e355180a9870b517ea4" -dependencies = [ - "fnv", - "ident_case", - "proc-macro2", - "quote", - "strsim", + "strsim 0.11.1", "syn 2.0.106", ] @@ -1089,18 +890,7 @@ version = "0.20.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc34b93ccb385b40dc71c6fceac4b2ad23662c7eeb248cf10d529b7e055b6ead" dependencies = [ - "darling_core 0.20.11", - "quote", - "syn 2.0.106", -] - -[[package]] -name = "darling_macro" -version = "0.21.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d38308df82d1080de0afee5d069fa14b0326a88c14f15c5ccda35b4a6c414c81" -dependencies = [ - "darling_core 0.21.3", + "darling_core", "quote", "syn 2.0.106", ] @@ -1115,956 +905,352 @@ dependencies = [ ] [[package]] -name = "dashmap" -version = "6.1.0" +name = "data-encoding" +version = "2.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5041cc499144891f3790297212f32a74fb938e5136a14943f338ef9e0ae276cf" -dependencies = [ - "cfg-if", - "crossbeam-utils", - "hashbrown 0.14.5", - "lock_api", - "once_cell", - "parking_lot_core", -] +checksum = "d7a1e2f27636f116493b8b860f5546edb47c8d8f8ea73e1d2a20be88e28d1fea" [[package]] -name = "datafusion" -version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2af15bb3c6ffa33011ef579f6b0bcbe7c26584688bd6c994f548e44df67f011a" +name = "dataset" +version = "0.0.0" +source = "git+https://github.com/qdrant/qdrant.git?branch=dev#c033e26cf45071bdb018d9345db8edcb1c901f30" dependencies = [ - "arrow", - "arrow-ipc", - "arrow-schema", - "async-trait", - "bytes", - "bzip2 0.6.1", - "chrono", - "datafusion-catalog", - "datafusion-catalog-listing", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-datasource-csv", - "datafusion-datasource-json", - "datafusion-datasource-parquet", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-nested", - "datafusion-functions-table", - "datafusion-functions-window", - "datafusion-optimizer", - "datafusion-physical-expr", - "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", - "datafusion-physical-optimizer", - "datafusion-physical-plan", - "datafusion-session", - "datafusion-sql", + "anyhow", "flate2", - "futures", - "itertools 0.14.0", - "log", - "object_store", - "parking_lot", - "parquet", - "rand 0.9.2", - "regex", - "sqlparser", - "tempfile", - "tokio", - "url", - "uuid", - "xz2", - "zstd", + "fs-err", + "indicatif 0.18.3", + "reqwest", ] [[package]] -name = "datafusion-catalog" -version = "50.3.0" +name = "delegate" +version = "0.13.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "187622262ad8f7d16d3be9202b4c1e0116f1c9aa387e5074245538b755261621" +checksum = "780eb241654bf097afb00fc5f054a09b687dad862e485fdcf8399bb056565370" dependencies = [ - "arrow", - "async-trait", - "dashmap", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-session", - "datafusion-sql", - "futures", - "itertools 0.14.0", - "log", - "object_store", - "parking_lot", - "tokio", + "proc-macro2", + "quote", + "syn 2.0.106", ] [[package]] -name = "datafusion-catalog-listing" -version = "50.3.0" +name = "derive_builder" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9657314f0a32efd0382b9a46fdeb2d233273ece64baa68a7c45f5a192daf0f83" +checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" dependencies = [ - "arrow", - "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", - "futures", - "log", - "object_store", - "tokio", + "derive_builder_macro", ] [[package]] -name = "datafusion-common" -version = "50.3.0" +name = "derive_builder_core" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a83760d9a13122d025fbdb1d5d5aaf93dd9ada5e90ea229add92aa30898b2d1" +checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" dependencies = [ - "ahash", - "arrow", - "arrow-ipc", - "base64 0.22.1", - "chrono", - "half", - "hashbrown 0.14.5", - "indexmap 2.11.1", - "libc", - "log", - "object_store", - "parquet", - "paste", - "recursive", - "sqlparser", - "tokio", - "web-time", + "darling", + "proc-macro2", + "quote", + "syn 2.0.106", ] [[package]] -name = "datafusion-common-runtime" -version = "50.3.0" +name = "derive_builder_macro" +version = "0.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b6234a6c7173fe5db1c6c35c01a12b2aa0f803a3007feee53483218817f8b1e" +checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" dependencies = [ - "futures", - "log", - "tokio", + "derive_builder_core", + "syn 2.0.106", ] [[package]] -name = "datafusion-datasource" -version = "50.3.0" +name = "digest" +version = "0.10.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7256c9cb27a78709dd42d0c80f0178494637209cac6e29d5c93edd09b6721b86" +checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" dependencies = [ - "arrow", - "async-compression", - "async-trait", - "bytes", - "bzip2 0.6.1", - "chrono", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", - "flate2", - "futures", - "glob", - "itertools 0.14.0", - "log", - "object_store", - "parquet", - "rand 0.9.2", - "tempfile", - "tokio", - "tokio-util", - "url", - "xz2", - "zstd", + "block-buffer", + "crypto-common", ] [[package]] -name = "datafusion-datasource-csv" -version = "50.3.0" +name = "dirs" +version = "5.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64533a90f78e1684bfb113d200b540f18f268134622d7c96bbebc91354d04825" +checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" dependencies = [ - "arrow", - "async-trait", - "bytes", - "datafusion-catalog", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", - "futures", - "object_store", - "regex", - "tokio", + "dirs-sys 0.4.1", ] [[package]] -name = "datafusion-datasource-json" -version = "50.3.0" +name = "dirs" +version = "6.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8d7ebeb12c77df0aacad26f21b0d033aeede423a64b2b352f53048a75bf1d6e6" +checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" dependencies = [ - "arrow", - "async-trait", - "bytes", - "datafusion-catalog", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-session", - "futures", - "object_store", - "serde_json", - "tokio", + "dirs-sys 0.5.0", ] [[package]] -name = "datafusion-datasource-parquet" -version = "50.3.0" +name = "dirs-sys" +version = "0.4.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09e783c4c7d7faa1199af2df4761c68530634521b176a8d1331ddbc5a5c75133" +checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" dependencies = [ - "arrow", - "async-trait", - "bytes", - "datafusion-catalog", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-datasource", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate", - "datafusion-physical-expr", - "datafusion-physical-expr-adapter", - "datafusion-physical-expr-common", - "datafusion-physical-optimizer", - "datafusion-physical-plan", - "datafusion-pruning", - "datafusion-session", - "futures", - "itertools 0.14.0", - "log", - "object_store", - "parking_lot", - "parquet", - "rand 0.9.2", - "tokio", + "libc", + "option-ext", + "redox_users 0.4.6", + "windows-sys 0.48.0", ] [[package]] -name = "datafusion-doc" -version = "50.3.0" +name = "dirs-sys" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "99ee6b1d9a80d13f9deb2291f45c07044b8e62fb540dbde2453a18be17a36429" +checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" +dependencies = [ + "libc", + "option-ext", + "redox_users 0.5.2", + "windows-sys 0.61.0", +] [[package]] -name = "datafusion-execution" -version = "50.3.0" +name = "displaydoc" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a4cec0a57653bec7b933fb248d3ffa3fa3ab3bd33bd140dc917f714ac036f531" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" dependencies = [ - "arrow", - "async-trait", - "dashmap", - "datafusion-common", - "datafusion-expr", - "futures", - "log", - "object_store", - "parking_lot", - "rand 0.9.2", - "tempfile", - "url", + "proc-macro2", + "quote", + "syn 2.0.106", ] [[package]] -name = "datafusion-expr" -version = "50.3.0" +name = "docopt" +version = "1.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ef76910bdca909722586389156d0aa4da4020e1631994d50fadd8ad4b1aa05fe" +checksum = "7f3f119846c823f9eafcf953a8f6ffb6ed69bf6240883261a7f13b634579a51f" dependencies = [ - "arrow", - "async-trait", - "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-functions-window-common", - "datafusion-physical-expr-common", - "indexmap 2.11.1", - "paste", - "recursive", - "serde_json", - "sqlparser", + "lazy_static", + "regex", + "serde", + "strsim 0.10.0", ] [[package]] -name = "datafusion-expr-common" -version = "50.3.0" +name = "dyn-clone" +version = "1.0.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6d155ccbda29591ca71a1344dd6bed26c65a4438072b400df9db59447f590bb6" -dependencies = [ - "arrow", - "datafusion-common", - "indexmap 2.11.1", - "itertools 0.14.0", - "paste", -] +checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" [[package]] -name = "datafusion-functions" -version = "50.3.0" +name = "dyn_size_of" +version = "0.4.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7de2782136bd6014670fd84fe3b0ca3b3e4106c96403c3ae05c0598577139977" -dependencies = [ - "arrow", - "arrow-buffer", - "base64 0.22.1", - "blake2", - "blake3", - "chrono", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-macros", - "hex", - "itertools 0.14.0", - "log", - "md-5", - "rand 0.9.2", - "regex", - "sha2", - "unicode-segmentation", - "uuid", -] +checksum = "4a742b95783b1f45b900129082cbc47717b6a77ee8d17eea70a8ea62462f5de3" [[package]] -name = "datafusion-functions-aggregate" -version = "50.3.0" +name = "earcutr" +version = "0.4.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "07331fc13603a9da97b74fd8a273f4238222943dffdbbed1c4c6f862a30105bf" +checksum = "79127ed59a85d7687c409e9978547cffb7dc79675355ed22da6b66fd5f6ead01" dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "half", - "log", - "paste", + "itertools 0.11.0", + "num-traits", ] [[package]] -name = "datafusion-functions-aggregate-common" -version = "50.3.0" +name = "ecow" +version = "0.2.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b5951e572a8610b89968a09b5420515a121fbc305c0258651f318dc07c97ab17" +checksum = "78e4f79b296fbaab6ce2e22d52cb4c7f010fe0ebe7a32e34fa25885fd797bd02" dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-expr-common", - "datafusion-physical-expr-common", + "serde", ] [[package]] -name = "datafusion-functions-nested" -version = "50.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdacca9302c3d8fc03f3e94f338767e786a88a33f5ebad6ffc0e7b50364b9ea3" +name = "edge" +version = "0.1.0" +source = "git+https://github.com/qdrant/qdrant.git?branch=dev#c033e26cf45071bdb018d9345db8edcb1c901f30" dependencies = [ - "arrow", - "arrow-ord", - "datafusion-common", - "datafusion-doc", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions", - "datafusion-functions-aggregate", - "datafusion-functions-aggregate-common", - "datafusion-macros", - "datafusion-physical-expr-common", + "ahash", + "common", + "fs-err", + "io", "itertools 0.14.0", "log", - "paste", + "ordered-float 5.1.0", + "parking_lot", + "rand 0.9.2", + "segment", + "shard", + "wal", ] [[package]] -name = "datafusion-functions-table" -version = "50.3.0" +name = "either" +version = "1.15.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8c37ff8a99434fbbad604a7e0669717c58c7c4f14c472d45067c4b016621d981" -dependencies = [ - "arrow", - "async-trait", - "datafusion-catalog", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-plan", - "parking_lot", - "paste", -] +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" [[package]] -name = "datafusion-functions-window" -version = "50.3.0" +name = "encode_unicode" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48e2aea7c79c926cffabb13dc27309d4eaeb130f4a21c8ba91cdd241c813652b" -dependencies = [ - "arrow", - "datafusion-common", - "datafusion-doc", - "datafusion-expr", - "datafusion-functions-window-common", - "datafusion-macros", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "log", - "paste", -] +checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" [[package]] -name = "datafusion-functions-window-common" -version = "50.3.0" +name = "encoding_rs" +version = "0.8.35" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fead257ab5fd2ffc3b40fda64da307e20de0040fe43d49197241d9de82a487f" +checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" dependencies = [ - "datafusion-common", - "datafusion-physical-expr-common", + "cfg-if", ] [[package]] -name = "datafusion-macros" -version = "50.3.0" +name = "encoding_rs_io" +version = "0.1.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec6f637bce95efac05cdfb9b6c19579ed4aa5f6b94d951cfa5bb054b7bb4f730" +checksum = "1cc3c5651fb62ab8aa3103998dade57efdd028544bd300516baa31840c252a83" dependencies = [ - "datafusion-expr", - "quote", - "syn 2.0.106", + "encoding_rs", ] [[package]] -name = "datafusion-optimizer" -version = "50.3.0" +name = "env_filter" +version = "0.1.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c6583ef666ae000a613a837e69e456681a9faa96347bf3877661e9e89e141d8a" +checksum = "1bf3c259d255ca70051b30e2e95b5446cdb8949ac4cd22c0d7fd634d89f568e2" dependencies = [ - "arrow", - "chrono", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-physical-expr", - "indexmap 2.11.1", - "itertools 0.14.0", "log", - "recursive", "regex", - "regex-syntax", ] [[package]] -name = "datafusion-physical-expr" -version = "50.3.0" +name = "env_logger" +version = "0.11.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c8668103361a272cbbe3a61f72eca60c9b7c706e87cc3565bcf21e2b277b84f6" +checksum = "13c863f0904021b108aa8b2f55046443e6b1ebde8fd4a15c399893aae4fa069f" dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-functions-aggregate-common", - "datafusion-physical-expr-common", - "half", - "hashbrown 0.14.5", - "indexmap 2.11.1", - "itertools 0.14.0", + "anstream", + "anstyle", + "env_filter", + "jiff", "log", - "parking_lot", - "paste", - "petgraph 0.8.2", ] [[package]] -name = "datafusion-physical-expr-adapter" -version = "50.3.0" +name = "equator" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "815acced725d30601b397e39958e0e55630e0a10d66ef7769c14ae6597298bb0" +checksum = "4711b213838dfee0117e3be6ac926007d7f433d7bbe33595975d4190cb07e6fc" dependencies = [ - "arrow", - "datafusion-common", - "datafusion-expr", - "datafusion-functions", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "itertools 0.14.0", + "equator-macro", ] [[package]] -name = "datafusion-physical-expr-common" -version = "50.3.0" +name = "equator-macro" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6652fe7b5bf87e85ed175f571745305565da2c0b599d98e697bcbedc7baa47c3" +checksum = "44f23cf4b44bfce11a86ace86f8a73ffdec849c9fd00a386a53d278bd9e81fb3" dependencies = [ - "ahash", - "arrow", - "datafusion-common", - "datafusion-expr-common", - "hashbrown 0.14.5", - "itertools 0.14.0", + "proc-macro2", + "quote", + "syn 2.0.106", ] [[package]] -name = "datafusion-physical-optimizer" -version = "50.3.0" +name = "equivalent" +version = "1.0.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "49b7d623eb6162a3332b564a0907ba00895c505d101b99af78345f1acf929b5c" -dependencies = [ - "arrow", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "datafusion-pruning", - "itertools 0.14.0", - "log", - "recursive", -] +checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" [[package]] -name = "datafusion-physical-plan" -version = "50.3.0" +name = "erased-serde" +version = "0.4.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2f7f778a1a838dec124efb96eae6144237d546945587557c9e6936b3414558c" +checksum = "89e8918065695684b2b0702da20382d5ae6065cf3327bc2d6436bd49a71ce9f3" dependencies = [ - "ahash", - "arrow", - "arrow-ord", - "arrow-schema", - "async-trait", - "chrono", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-functions-aggregate-common", - "datafusion-functions-window-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "futures", - "half", - "hashbrown 0.14.5", - "indexmap 2.11.1", - "itertools 0.14.0", - "log", - "parking_lot", - "pin-project-lite", - "tokio", + "serde", + "serde_core", + "typeid", ] [[package]] -name = "datafusion-pruning" -version = "50.3.0" +name = "errno" +version = "0.3.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd1e59e2ca14fe3c30f141600b10ad8815e2856caa59ebbd0e3e07cd3d127a65" +checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" dependencies = [ - "arrow", - "arrow-schema", - "datafusion-common", - "datafusion-datasource", - "datafusion-expr-common", - "datafusion-physical-expr", - "datafusion-physical-expr-common", - "datafusion-physical-plan", - "itertools 0.14.0", - "log", + "libc", + "windows-sys 0.61.0", ] [[package]] -name = "datafusion-session" -version = "50.3.0" +name = "esaxx-rs" +version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "21ef8e2745583619bd7a49474e8f45fbe98ebb31a133f27802217125a7b3d58d" +checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" dependencies = [ - "arrow", - "async-trait", - "dashmap", - "datafusion-common", - "datafusion-common-runtime", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-physical-plan", - "datafusion-sql", - "futures", - "itertools 0.14.0", - "log", - "object_store", - "parking_lot", - "tokio", + "cc", ] [[package]] -name = "datafusion-sql" -version = "50.3.0" +name = "eventsource-stream" +version = "0.2.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89abd9868770386fede29e5a4b14f49c0bf48d652c3b9d7a8a0332329b87d50b" +checksum = "74fef4569247a5f429d9156b9d0a2599914385dd189c539334c625d8099d90ab" dependencies = [ - "arrow", - "bigdecimal", - "datafusion-common", - "datafusion-expr", - "indexmap 2.11.1", - "log", - "recursive", - "regex", - "sqlparser", + "futures-core", + "nom 7.1.3", + "pin-project-lite", ] [[package]] -name = "deepsize" -version = "0.2.0" +name = "fastrand" +version = "2.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cdb987ec36f6bf7bfbea3f928b75590b736fc42af8e54d97592481351b2b96c" -dependencies = [ - "deepsize_derive", -] +checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" [[package]] -name = "deepsize_derive" -version = "0.1.2" +name = "filetime" +version = "0.2.27" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "990101d41f3bc8c1a45641024377ee284ecc338e5ecf3ea0f0e236d897c72796" +checksum = "f98844151eee8917efc50bd9e8318cb963ae8b297431495d3f758616ea5c57db" dependencies = [ - "proc-macro2", - "quote", - "syn 1.0.109", + "cfg-if", + "libc", + "libredox", ] [[package]] -name = "deranged" -version = "0.5.3" +name = "fixedbitset" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d630bccd429a5bb5a64b5e94f693bfc48c9f8566418fda4c494cc94f911f87cc" -dependencies = [ - "powerfmt", - "serde", -] +checksum = "0ce7134b9999ecaf8bcd65542e436736ef32ddca1b3e06094cb6ec5755203b80" [[package]] -name = "derive_builder" -version = "0.20.2" +name = "flate2" +version = "1.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "507dfb09ea8b7fa618fcf76e953f4f5e192547945816d5358edffe39f6f94947" +checksum = "843fba2746e448b37e26a819579957415c8cef339bf08564fe8b7ddbd959573c" dependencies = [ - "derive_builder_macro", + "crc32fast", + "miniz_oxide", ] [[package]] -name = "derive_builder_core" -version = "0.20.2" +name = "float_next_after" +version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2d5bcf7b024d6835cfb3d473887cd966994907effbe9227e8c8219824d06c4e8" -dependencies = [ - "darling 0.20.11", - "proc-macro2", - "quote", - "syn 2.0.106", -] +checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8" [[package]] -name = "derive_builder_macro" -version = "0.20.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab63b0e2bf4d5928aff72e83a7dace85d7bba5fe12dcc3c5a572d78caffd3f3c" -dependencies = [ - "derive_builder_core", - "syn 2.0.106", -] - -[[package]] -name = "digest" -version = "0.10.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ed9a281f7bc9b7576e61468ba615a66a5c8cfdff42420a70aa82701a3b1e292" -dependencies = [ - "block-buffer", - "crypto-common", - "subtle", -] - -[[package]] -name = "dirs" -version = "5.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "44c45a9d03d6676652bcb5e724c7e988de1acad23a711b5217ab9cbecbec2225" -dependencies = [ - "dirs-sys 0.4.1", -] - -[[package]] -name = "dirs" -version = "6.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c3e8aa94d75141228480295a7d0e7feb620b1a5ad9f12bc40be62411e38cce4e" -dependencies = [ - "dirs-sys 0.5.0", -] - -[[package]] -name = "dirs-sys" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "520f05a5cbd335fae5a99ff7a6ab8627577660ee5cfd6a94a6a929b52ff0321c" -dependencies = [ - "libc", - "option-ext", - "redox_users 0.4.6", - "windows-sys 0.48.0", -] - -[[package]] -name = "dirs-sys" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e01a3366d27ee9890022452ee61b2b63a67e6f13f58900b651ff5665f0bb1fab" -dependencies = [ - "libc", - "option-ext", - "redox_users 0.5.2", - "windows-sys 0.61.0", -] - -[[package]] -name = "displaydoc" -version = "0.2.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.106", -] - -[[package]] -name = "downcast-rs" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "117240f60069e65410b3ae1bb213295bd828f707b5bec6596a1afc8793ce0cbc" - -[[package]] -name = "dyn-clone" -version = "1.0.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d0881ea181b1df73ff77ffaaf9c7544ecc11e82fba9b5f27b262a3c73a332555" - -[[package]] -name = "earcutr" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "79127ed59a85d7687c409e9978547cffb7dc79675355ed22da6b66fd5f6ead01" -dependencies = [ - "itertools 0.11.0", - "num-traits", -] - -[[package]] -name = "either" -version = "1.15.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" - -[[package]] -name = "encode_unicode" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "34aa73646ffb006b8f5147f3dc182bd4bcb190227ce861fc4a4844bf8e3cb2c0" - -[[package]] -name = "encoding_rs" -version = "0.8.35" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "75030f3c4f45dafd7586dd6780965a8c7e8e285a5ecb86713e63a79c5b2766f3" -dependencies = [ - "cfg-if", -] - -[[package]] -name = "encoding_rs_io" -version = "0.1.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cc3c5651fb62ab8aa3103998dade57efdd028544bd300516baa31840c252a83" -dependencies = [ - "encoding_rs", -] - -[[package]] -name = "equivalent" -version = "1.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "877a4ace8713b0bcf2a4e7eec82529c029f1d0619886d18145fea96c3ffe5c0f" - -[[package]] -name = "errno" -version = "0.3.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb" -dependencies = [ - "libc", - "windows-sys 0.61.0", -] - -[[package]] -name = "esaxx-rs" -version = "0.1.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6" -dependencies = [ - "cc", -] - -[[package]] -name = "ethnum" -version = "1.5.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ca81e6b4777c89fd810c25a4be2b1bd93ea034fbe58e6a75216a34c6b82c539b" - -[[package]] -name = "event-listener" -version = "5.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e13b66accf52311f30a0db42147dadea9850cb48cd070028831ae5f5d4b856ab" -dependencies = [ - "concurrent-queue", - "parking", - "pin-project-lite", -] - -[[package]] -name = "event-listener-strategy" -version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8be9f3dfaaffdae2972880079a491a1a8bb7cbed0b8dd7a347f668b4150a3b93" -dependencies = [ - "event-listener", - "pin-project-lite", -] - -[[package]] -name = "eventsource-stream" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "74fef4569247a5f429d9156b9d0a2599914385dd189c539334c625d8099d90ab" -dependencies = [ - "futures-core", - "nom 7.1.3", - "pin-project-lite", -] - -[[package]] -name = "fast-float2" -version = "0.2.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f8eb564c5c7423d25c886fb561d1e4ee69f72354d16918afa32c08811f6b6a55" - -[[package]] -name = "fastdivide" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9afc2bd4d5a73106dd53d10d73d3401c2f32730ba2c0b93ddb888a8983680471" - -[[package]] -name = "fastrand" -version = "2.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be" - -[[package]] -name = "fixedbitset" -version = "0.5.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1d674e81391d1e1ab681a28d99df07927c6d4aa5b027d7da16ba32d1d21ecd99" - -[[package]] -name = "flatbuffers" -version = "25.2.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1045398c1bfd89168b5fd3f1fc11f6e70b34f6f66300c87d44d3de849463abf1" -dependencies = [ - "bitflags", - "rustc_version", -] - -[[package]] -name = "flate2" -version = "1.1.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a3d7db9596fecd151c5f638c0ee5d5bd487b6e0ea232e5dc96d5250f6f94b1d" -dependencies = [ - "crc32fast", - "libz-rs-sys", - "miniz_oxide", -] - -[[package]] -name = "float_next_after" -version = "1.0.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bf7cc16383c4b8d58b9905a8509f02926ce3058053c056376248d958c9df1e8" - -[[package]] -name = "fnv" -version = "1.0.7" +name = "fnv" +version = "1.0.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1" @@ -2074,6 +1260,12 @@ version = "0.1.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2" +[[package]] +name = "foldhash" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "77ce24cb58228fbb8aa041425bb1050850ac19177686ea6e0f41a70416f56fdb" + [[package]] name = "form_urlencoded" version = "1.2.2" @@ -2084,33 +1276,36 @@ dependencies = [ ] [[package]] -name = "fs4" -version = "0.8.4" +name = "fs-err" +version = "3.2.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f7e180ac76c23b45e767bd7ae9579bc0bb458618c4bc71835926e098e61d15f8" +checksum = "baf68cef89750956493a66a10f512b9e58d9db21f2a573c079c0bdf1207a54a7" dependencies = [ - "rustix 0.38.44", - "windows-sys 0.52.0", + "autocfg", + "tokio", ] [[package]] -name = "fsst" -version = "1.0.1" +name = "fs4" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5ffdff7a2d68d22afc0657eddde3e946371ce7cfe730a3f78a5ed44ea5b1cb2e" +checksum = "8640e34b88f7652208ce9e88b1a37a2ae95227d84abec377ccd3c5cfeb141ed4" dependencies = [ - "arrow-array", - "rand 0.9.2", + "rustix 1.1.3", + "windows-sys 0.59.0", ] +[[package]] +name = "fs_extra" +version = "1.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "42703706b716c37f96a77aea830392ad231f44c9e9a67872fa5548707e11b11c" + [[package]] name = "fst" version = "0.4.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7ab85b9b05e3978cc9a9cf8fea7f01b494e1a09ed3037e16ba39edc7a29eb61a" -dependencies = [ - "utf8-ranges", -] [[package]] name = "funty" @@ -2213,20 +1408,6 @@ dependencies = [ "slab", ] -[[package]] -name = "generator" -version = "0.8.7" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "605183a538e3e2a9c1038635cc5c2d194e2ee8fd0d1b66b8349fad7dbacce5a2" -dependencies = [ - "cc", - "cfg-if", - "libc", - "log", - "rustversion", - "windows", -] - [[package]] name = "generic-array" version = "0.14.7" @@ -2239,9 +1420,9 @@ dependencies = [ [[package]] name = "geo" -version = "0.31.0" +version = "0.32.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fc1a1678e54befc9b4bcab6cd43b8e7f834ae8ea121118b0fd8c42747675b4a" +checksum = "3f3901269ec6d4f6068d3f09e5f02f995bd076398dcd1dfec407cd230b02d11b" dependencies = [ "earcutr", "float_next_after", @@ -2250,20 +1431,13 @@ dependencies = [ "i_overlay", "log", "num-traits", + "rand 0.8.5", "robust", "rstar", + "sif-itree", "spade", ] -[[package]] -name = "geo-traits" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2e7c353d12a704ccfab1ba8bfb1a7fe6cb18b665bf89d37f4f7890edcd260206" -dependencies = [ - "geo-types", -] - [[package]] name = "geo-types" version = "0.7.18" @@ -2278,85 +1452,33 @@ dependencies = [ ] [[package]] -name = "geoarrow-array" -version = "0.6.2" +name = "geographiclib-rs" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7d1884b17253d8572e88833c282fcbb442365e4ae5f9052ced2831608253436c" +checksum = "f611040a2bb37eaa29a78a128d1e92a378a03e0b6e66ae27398d42b1ba9a7841" dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-schema", - "geo-traits", - "geoarrow-schema", - "num-traits", - "wkb", - "wkt", + "libm", ] [[package]] -name = "geoarrow-expr-geo" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a67d3b543bc3ebeffdc204b67d69b8f9fcd33d76269ddd4a4618df99f053a934" -dependencies = [ - "arrow-array", - "arrow-buffer", - "geo", - "geo-traits", - "geoarrow-array", - "geoarrow-schema", -] - -[[package]] -name = "geoarrow-schema" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "02f1b18b1c9a44ecd72be02e53d6e63bbccfdc8d1765206226af227327e2be6e" -dependencies = [ - "arrow-schema", - "geo-traits", - "serde", - "serde_json", - "thiserror 1.0.69", -] - -[[package]] -name = "geodatafusion" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83d676b8d8b5f391ab4270ba31e9b599ee2c3d780405a38e272a0a7565ea189c" -dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-schema", - "datafusion", - "geo", - "geo-traits", - "geoarrow-array", - "geoarrow-expr-geo", - "geoarrow-schema", - "geohash", - "thiserror 1.0.69", - "wkt", -] - -[[package]] -name = "geographiclib-rs" -version = "0.2.5" +name = "geohash" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f611040a2bb37eaa29a78a128d1e92a378a03e0b6e66ae27398d42b1ba9a7841" +checksum = "0fb94b1a65401d6cbf22958a9040aa364812c26674f841bee538b12c135db1e6" dependencies = [ + "geo-types", "libm", ] [[package]] -name = "geohash" -version = "0.13.1" +name = "getrandom" +version = "0.1.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fb94b1a65401d6cbf22958a9040aa364812c26674f841bee538b12c135db1e6" +checksum = "8fc3cb4d91f53b50155bdcfd23f6a4c39ae1969c2ae85982b135750cccaf5fce" dependencies = [ - "geo-types", - "libm", + "cfg-if", + "libc", + "wasi 0.9.0+wasi-snapshot-preview1", ] [[package]] @@ -2392,12 +1514,6 @@ version = "0.31.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "07e28edb80900c19c28f1072f2e8aeca7fa06b23cd4169cefe1af5aa3260783f" -[[package]] -name = "glob" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0cc23270f6e1808e30a928bdc84dea0b9b4136a8bc82338574f23baf47bbd280" - [[package]] name = "globset" version = "0.4.18" @@ -2411,6 +1527,16 @@ dependencies = [ "regex-syntax", ] +[[package]] +name = "gpu" +version = "0.1.0" +source = "git+https://github.com/qdrant/qdrant.git?branch=dev#c033e26cf45071bdb018d9345db8edcb1c901f30" +dependencies = [ + "log", + "parking_lot", + "zerocopy", +] + [[package]] name = "grep" version = "0.3.2" @@ -2490,6 +1616,53 @@ dependencies = [ "memmap2", ] +[[package]] +name = "gridstore" +version = "0.1.0" +source = "git+https://github.com/qdrant/qdrant.git?branch=dev#c033e26cf45071bdb018d9345db8edcb1c901f30" +dependencies = [ + "ahash", + "bitvec", + "common", + "dataset", + "ecow", + "fs-err", + "io", + "itertools 0.14.0", + "log", + "lz4_flex", + "memmap2", + "memory", + "parking_lot", + "rand 0.9.2", + "serde", + "serde_cbor", + "serde_json", + "smallvec", + "tempfile", + "thiserror 2.0.18", + "zerocopy", +] + +[[package]] +name = "h2" +version = "0.3.27" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0beca50380b1fc32983fc1cb4587bfa4bb9e78fc259aad4a0032d2080309222d" +dependencies = [ + "bytes", + "fnv", + "futures-core", + "futures-sink", + "futures-util", + "http 0.2.12", + "indexmap 2.11.1", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "h2" version = "0.4.12" @@ -2501,7 +1674,7 @@ dependencies = [ "fnv", "futures-core", "futures-sink", - "http", + "http 1.3.1", "indexmap 2.11.1", "slab", "tokio", @@ -2511,13 +1684,21 @@ dependencies = [ [[package]] name = "half" -version = "2.6.0" +version = "1.8.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b43ede17f21864e81be2fa654110bf1e793774238d86ef8555c37e6519c0403" + +[[package]] +name = "half" +version = "2.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "459196ed295495a68f7d7fe1d84f6c4b7ff0e21fe3017b2f283c6fac3ad803c9" +checksum = "6ea2d84b969582b4b1864a92dc5d27cd2b77b622a8d79306834f1be5ba20d84b" dependencies = [ "cfg-if", "crunchy", "num-traits", + "serde", + "zerocopy", ] [[package]] @@ -2553,7 +1734,7 @@ checksum = "9229cfe53dfd69f0609a49f65461bd93001ea1ef889cd5529dd176593f5338a1" dependencies = [ "allocator-api2", "equivalent", - "foldhash", + "foldhash 0.1.5", ] [[package]] @@ -2561,6 +1742,11 @@ name = "hashbrown" version = "0.16.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "841d1cc9bed7f9236f321df977030373f4a4163ae1a7dbfe1a51a2c1a51d9100" +dependencies = [ + "allocator-api2", + "equivalent", + "foldhash 0.2.0", +] [[package]] name = "heapless" @@ -2597,23 +1783,28 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "629d8f3bbeda9d148036d6b0de0a3ab947abd08ce90626327fc3547a49d59d97" dependencies = [ "dirs 6.0.0", - "http", - "indicatif", + "http 1.3.1", + "indicatif 0.17.11", "libc", "log", "rand 0.9.2", "serde", "serde_json", - "thiserror 2.0.16", + "thiserror 2.0.18", "ureq", "windows-sys 0.60.2", ] [[package]] -name = "htmlescape" -version = "0.3.1" +name = "http" +version = "0.2.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e9025058dae765dee5070ec375f591e2ba14638c63feff74f13805a72e523163" +checksum = "601cbb57e577e2f5ef5be8e7b83f0f63994f25aa94d673e54a92d5c516d101f1" +dependencies = [ + "bytes", + "fnv", + "itoa", +] [[package]] name = "http" @@ -2626,6 +1817,17 @@ dependencies = [ "itoa", ] +[[package]] +name = "http-body" +version = "0.4.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7ceab25649e9960c0311ea418d17bee82c0dcec1bd053b5f9a66e265a693bed2" +dependencies = [ + "bytes", + "http 0.2.12", + "pin-project-lite", +] + [[package]] name = "http-body" version = "1.0.1" @@ -2633,7 +1835,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184" dependencies = [ "bytes", - "http", + "http 1.3.1", ] [[package]] @@ -2644,8 +1846,8 @@ checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a" dependencies = [ "bytes", "futures-core", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "pin-project-lite", ] @@ -2656,10 +1858,34 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87" [[package]] -name = "humantime" -version = "2.2.0" +name = "httpdate" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9" + +[[package]] +name = "hyper" +version = "0.14.32" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9b112acc8b3adf4b107a8ec20977da0273a8c386765a3ec0229bd500a1443f9f" +checksum = "41dfc780fdec9373c01bae43289ea34c972e40ee3c9f6b3c8801a35f35586ce7" +dependencies = [ + "bytes", + "futures-channel", + "futures-core", + "futures-util", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "httparse", + "httpdate", + "itoa", + "pin-project-lite", + "socket2 0.5.10", + "tokio", + "tower-service", + "tracing", + "want", +] [[package]] name = "hyper" @@ -2671,9 +1897,9 @@ dependencies = [ "bytes", "futures-channel", "futures-core", - "h2", - "http", - "http-body", + "h2 0.4.12", + "http 1.3.1", + "http-body 1.0.1", "httparse", "itoa", "pin-project-lite", @@ -2689,18 +1915,30 @@ version = "0.27.7" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "e3c93eb611681b207e1fe55d5a71ecf91572ec8a6705cdb6857f7d8d5242cf58" dependencies = [ - "http", - "hyper", + "http 1.3.1", + "hyper 1.7.0", "hyper-util", - "rustls", + "rustls 0.23.31", "rustls-native-certs", "rustls-pki-types", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.2", "tower-service", "webpki-roots 1.0.2", ] +[[package]] +name = "hyper-timeout" +version = "0.4.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bbb958482e8c7be4bc3cf272a766a2b0bf1a6755e7a6ae777f017a31d11b13b1" +dependencies = [ + "hyper 0.14.32", + "pin-project-lite", + "tokio", + "tokio-io-timeout", +] + [[package]] name = "hyper-util" version = "0.1.16" @@ -2712,9 +1950,9 @@ dependencies = [ "futures-channel", "futures-core", "futures-util", - "http", - "http-body", - "hyper", + "http 1.3.1", + "http-body 1.0.1", + "hyper 1.7.0", "ipnet", "libc", "percent-encoding", @@ -2725,15 +1963,6 @@ dependencies = [ "tracing", ] -[[package]] -name = "hyperloglogplus" -version = "0.4.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "621debdf94dcac33e50475fdd76d34d5ea9c0362a834b9db08c3024696c1fbe3" -dependencies = [ - "serde", -] - [[package]] name = "i_float" version = "1.15.0" @@ -2789,7 +2018,7 @@ dependencies = [ "js-sys", "log", "wasm-bindgen", - "windows-core", + "windows-core 0.61.2", ] [[package]] @@ -2914,6 +2143,43 @@ dependencies = [ "icu_properties", ] +[[package]] +name = "include-flate" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e01b7cb6ca682a621e7cda1c358c9724b53a7b4409be9be1dd443b7f3a26f998" +dependencies = [ + "include-flate-codegen", + "include-flate-compress", + "libflate", + "zstd", +] + +[[package]] +name = "include-flate-codegen" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4f49bf5274aebe468d6e6eba14a977eaf1efa481dc173f361020de70c1c48050" +dependencies = [ + "include-flate-compress", + "libflate", + "proc-macro-error", + "proc-macro2", + "quote", + "syn 2.0.106", + "zstd", +] + +[[package]] +name = "include-flate-compress" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "eae6a40e716bcd5931f5dbb79cd921512a4f647e2e9413fded3171fca3824dbc" +dependencies = [ + "libflate", + "zstd", +] + [[package]] name = "indexmap" version = "1.9.3" @@ -2942,13 +2208,27 @@ version = "0.17.11" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "183b3088984b400f4cfac3620d5e076c84da5364016b4f49473de574b2586235" dependencies = [ - "console", + "console 0.15.11", "number_prefix", "portable-atomic", "unicode-width", "web-time", ] +[[package]] +name = "indicatif" +version = "0.18.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9375e112e4b463ec1b1c6c011953545c65a30164fbab5b581df32b3abf0dcb88" +dependencies = [ + "console 0.16.2", + "portable-atomic", + "rayon", + "unicode-width", + "unit-prefix", + "web-time", +] + [[package]] name = "instant" version = "0.1.13" @@ -2960,17 +2240,42 @@ dependencies = [ [[package]] name = "integer-encoding" -version = "3.0.4" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "14c00403deb17c3221a1fe4fb571b9ed0370b3dcd116553c77fa294a3d918699" + +[[package]] +name = "inventory" +version = "0.3.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8bb03732005da905c88227371639bf1ad885cc712789c011c31c5fb3ab3ccf02" +checksum = "bc61209c082fbeb19919bee74b176221b27223e27b65d781eb91af24eb1fb46e" +dependencies = [ + "rustversion", +] + +[[package]] +name = "io" +version = "0.0.0" +source = "git+https://github.com/qdrant/qdrant.git?branch=dev#c033e26cf45071bdb018d9345db8edcb1c901f30" +dependencies = [ + "atomicwrites", + "bincode 1.3.3", + "fs-err", + "fs_extra", + "semver", + "serde", + "serde_json", + "tempfile", + "thiserror 2.0.18", +] [[package]] name = "io-uring" -version = "0.7.10" +version = "0.7.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "046fa2d4d00aea763528b4950358d0ead425372445dc8ff86312b3c69ff7727b" +checksum = "fdd7bddefd0a8833b88a4b68f90dae22c7450d11b354198baee3874fd811b344" dependencies = [ - "bitflags", + "bitflags 2.9.3", "cfg-if", "libc", ] @@ -2981,6 +2286,17 @@ version = "2.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "469fb0b9cefa57e3ef31275ee7cacb78f2fdca44e4765491884a2b119d4eb130" +[[package]] +name = "irg-kvariants" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ef2af7c331f2536964a32b78a7d2e0963d78b42f4a76323b16cc7d94b1ddce26" +dependencies = [ + "csv", + "once_cell", + "serde", +] + [[package]] name = "iri-string" version = "0.7.8" @@ -2991,6 +2307,12 @@ dependencies = [ "serde", ] +[[package]] +name = "is_sorted" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "357376465c37db3372ef6a00585d336ed3d0f11d4345eef77ebcb05865392b21" + [[package]] name = "is_terminal_polyfill" version = "1.70.1" @@ -3003,755 +2325,126 @@ version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b1c173a5686ce8bfa551b3563d0c2170bf24ca44da99c7ca4bfdab5418c3fe57" dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.13.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "413ee7dfc52ee1a4949ceeb7dbc8a33f2d6c088194d9f922fb8318faf1f01186" -dependencies = [ - "either", -] - -[[package]] -name = "itertools" -version = "0.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" -dependencies = [ - "either", -] - -[[package]] -name = "itoa" -version = "1.0.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" - -[[package]] -name = "jiff" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" -dependencies = [ - "jiff-static", - "jiff-tzdb-platform", - "log", - "portable-atomic", - "portable-atomic-util", - "serde", - "windows-sys 0.59.0", -] - -[[package]] -name = "jiff-static" -version = "0.2.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.106", -] - -[[package]] -name = "jiff-tzdb" -version = "0.1.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1283705eb0a21404d2bfd6eef2a7593d240bc42a0bdb39db0ad6fa2ec026524" - -[[package]] -name = "jiff-tzdb-platform" -version = "0.1.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "875a5a69ac2bab1a891711cf5eccbec1ce0341ea805560dcd90b7a2e925132e8" -dependencies = [ - "jiff-tzdb", -] - -[[package]] -name = "jobserver" -version = "0.1.34" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" -dependencies = [ - "getrandom 0.3.3", - "libc", -] - -[[package]] -name = "js-sys" -version = "0.3.77" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" -dependencies = [ - "once_cell", - "wasm-bindgen", -] - -[[package]] -name = "jsonb" -version = "0.5.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a452366d21e8d3cbca680c41388e01d6a88739afef7877961946a6da409f9ccd" -dependencies = [ - "byteorder", - "ethnum", - "fast-float2", - "itoa", - "jiff", - "nom 8.0.0", - "num-traits", - "ordered-float 5.0.0", - "rand 0.9.2", - "ryu", - "serde", - "serde_json", -] - -[[package]] -name = "lance" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8c439decbc304e180748e34bb6d3df729069a222e83e74e2185c38f107136e9" -dependencies = [ - "arrow", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-ipc", - "arrow-ord", - "arrow-row", - "arrow-schema", - "arrow-select", - "async-recursion", - "async-trait", - "async_cell", - "byteorder", - "bytes", - "chrono", - "dashmap", - "datafusion", - "datafusion-expr", - "datafusion-functions", - "datafusion-physical-expr", - "datafusion-physical-plan", - "deepsize", - "either", - "futures", - "half", - "humantime", - "itertools 0.13.0", - "lance-arrow", - "lance-core", - "lance-datafusion", - "lance-encoding", - "lance-file", - "lance-geo", - "lance-index", - "lance-io", - "lance-linalg", - "lance-namespace", - "lance-table", - "log", - "moka", - "object_store", - "permutation", - "pin-project", - "prost", - "prost-types", - "rand 0.9.2", - "roaring", - "semver", - "serde", - "serde_json", - "snafu", - "tantivy", - "tokio", - "tokio-stream", - "tracing", - "url", - "uuid", -] - -[[package]] -name = "lance-arrow" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f4ee5508b225456d3d56998eaeef0d8fbce5ea93856df47b12a94d2e74153210" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "arrow-select", - "bytes", - "getrandom 0.2.16", - "half", - "jsonb", - "num-traits", - "rand 0.9.2", -] - -[[package]] -name = "lance-bitpacking" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1c065fb3bd4a8cc4f78428443e990d4921aa08f707b676753db740e0b402a21" -dependencies = [ - "arrayref", - "paste", - "seq-macro", -] - -[[package]] -name = "lance-core" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8856abad92e624b75cd57a04703f6441948a239463bdf973f2ac1924b0bcdbe" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-schema", - "async-trait", - "byteorder", - "bytes", - "chrono", - "datafusion-common", - "datafusion-sql", - "deepsize", - "futures", - "lance-arrow", - "libc", - "log", - "mock_instant", - "moka", - "num_cpus", - "object_store", - "pin-project", - "prost", - "rand 0.9.2", - "roaring", - "serde_json", - "snafu", - "tempfile", - "tokio", - "tokio-stream", - "tokio-util", - "tracing", - "url", -] - -[[package]] -name = "lance-datafusion" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c8835308044cef5467d7751be87fcbefc2db01c22370726a8704bd62991693f" -dependencies = [ - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ord", - "arrow-schema", - "arrow-select", - "async-trait", - "chrono", - "datafusion", - "datafusion-common", - "datafusion-functions", - "datafusion-physical-expr", - "futures", - "jsonb", - "lance-arrow", - "lance-core", - "lance-datagen", - "lance-geo", - "log", - "pin-project", - "prost", - "snafu", - "tokio", - "tracing", -] - -[[package]] -name = "lance-datagen" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "612de1e888bb36f6bf51196a6eb9574587fdf256b1759a4c50e643e00d5f96d0" -dependencies = [ - "arrow", - "arrow-array", - "arrow-cast", - "arrow-schema", - "chrono", - "futures", - "half", - "hex", - "rand 0.9.2", - "rand_xoshiro", - "random_word", -] - -[[package]] -name = "lance-encoding" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2b456b29b135d3c7192602e516ccade38b5483986e121895fa43cf1fdb38bf60" -dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "arrow-select", - "bytemuck", - "byteorder", - "bytes", - "fsst", - "futures", - "hex", - "hyperloglogplus", - "itertools 0.13.0", - "lance-arrow", - "lance-bitpacking", - "lance-core", - "log", - "lz4", - "num-traits", - "prost", - "prost-build", - "prost-types", - "rand 0.9.2", - "snafu", - "strum", - "tokio", - "tracing", - "xxhash-rust", - "zstd", -] - -[[package]] -name = "lance-file" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ab1538d14d5bb3735b4222b3f5aff83cfa59cc6ef7cdd3dd9139e4c77193c80b" -dependencies = [ - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-data", - "arrow-schema", - "arrow-select", - "async-recursion", - "async-trait", - "byteorder", - "bytes", - "datafusion-common", - "deepsize", - "futures", - "lance-arrow", - "lance-core", - "lance-encoding", - "lance-io", - "log", - "num-traits", - "object_store", - "prost", - "prost-build", - "prost-types", - "snafu", - "tokio", - "tracing", -] - -[[package]] -name = "lance-geo" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5a69a2f3b55703d9c240ad7c5ffa2c755db69e9cf8aa05efe274a212910472d" -dependencies = [ - "datafusion", - "geo-types", - "geoarrow-array", - "geoarrow-schema", - "geodatafusion", -] - -[[package]] -name = "lance-index" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0ea84613df6fa6b9168a1f056ba4f9cb73b90a1b452814c6fd4b3529bcdbfc78" -dependencies = [ - "arrow", - "arrow-arith", - "arrow-array", - "arrow-ord", - "arrow-schema", - "arrow-select", - "async-channel", - "async-recursion", - "async-trait", - "bitpacking", - "bitvec", - "bytes", - "crossbeam-queue", - "datafusion", - "datafusion-common", - "datafusion-expr", - "datafusion-physical-expr", - "datafusion-sql", - "deepsize", - "dirs 6.0.0", - "fst", - "futures", - "half", - "itertools 0.13.0", - "jsonb", - "lance-arrow", - "lance-core", - "lance-datafusion", - "lance-datagen", - "lance-encoding", - "lance-file", - "lance-io", - "lance-linalg", - "lance-table", - "libm", - "log", - "ndarray 0.16.1", - "num-traits", - "object_store", - "prost", - "prost-build", - "prost-types", - "rand 0.9.2", - "rand_distr 0.5.1", - "rayon", - "roaring", - "serde", - "serde_json", - "snafu", - "tantivy", - "tempfile", - "tokio", - "tracing", - "twox-hash", - "uuid", -] - -[[package]] -name = "lance-io" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6b3fc4c1d941fceef40a0edbd664dbef108acfc5d559bb9e7f588d0c733cbc35" -dependencies = [ - "arrow", - "arrow-arith", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-schema", - "arrow-select", - "async-recursion", - "async-trait", - "byteorder", - "bytes", - "chrono", - "deepsize", - "futures", - "lance-arrow", - "lance-core", - "lance-namespace", - "log", - "object_store", - "path_abs", - "pin-project", - "prost", - "rand 0.9.2", - "serde", - "shellexpand", - "snafu", - "tokio", - "tracing", - "url", -] - -[[package]] -name = "lance-linalg" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b62ffbc5ce367fbf700a69de3fe0612ee1a11191a64a632888610b6bacfa0f63" -dependencies = [ - "arrow-array", - "arrow-buffer", - "arrow-schema", - "cc", - "deepsize", - "half", - "lance-arrow", - "lance-core", - "num-traits", - "rand 0.9.2", -] - -[[package]] -name = "lance-namespace" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "791bbcd868ee758123a34e07d320a1fb99379432b5ecc0e78d6b4686e999b629" -dependencies = [ - "arrow", - "async-trait", - "bytes", - "lance-core", - "lance-namespace-reqwest-client", - "snafu", -] - -[[package]] -name = "lance-namespace-impls" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee713505576f6b1988a491f77c7ca8b0cf7090a393598e63c85079fa70a53ebf" -dependencies = [ - "arrow", - "arrow-ipc", - "arrow-schema", - "async-trait", - "bytes", - "futures", - "lance", - "lance-core", - "lance-index", - "lance-io", - "lance-namespace", - "log", - "object_store", - "rand 0.9.2", - "serde_json", - "snafu", - "tokio", - "url", -] - -[[package]] -name = "lance-namespace-reqwest-client" -version = "0.0.18" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3ea349999bcda4eea53fc05d334b3775ec314761e6a706555c777d7a29b18d19" -dependencies = [ - "reqwest", - "serde", - "serde_json", - "serde_repr", - "url", -] - -[[package]] -name = "lance-table" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6fdb2d56bfa4d1511c765fa0cc00fdaa37e5d2d1cd2f57b3c6355d9072177052" -dependencies = [ - "arrow", - "arrow-array", - "arrow-buffer", - "arrow-ipc", - "arrow-schema", - "async-trait", - "byteorder", - "bytes", - "chrono", - "deepsize", - "futures", - "lance-arrow", - "lance-core", - "lance-file", - "lance-io", - "log", - "object_store", - "prost", - "prost-build", - "prost-types", - "rand 0.9.2", - "rangemap", - "roaring", - "semver", - "serde", - "serde_json", - "snafu", - "tokio", - "tracing", - "url", - "uuid", -] - -[[package]] -name = "lance-testing" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d8ccb1a4a9284435c6a8c02c8c06e7e041bece0d7f722152159353cf55dc51e3" -dependencies = [ - "arrow-array", - "arrow-schema", - "lance-arrow", - "num-traits", - "rand 0.9.2", -] - -[[package]] -name = "lancedb" -version = "0.23.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9217d7d3a1f4e088bdedaad9b4fa79045b077e07f961f1cd3ec6f90850c425f2" -dependencies = [ - "ahash", - "arrow", - "arrow-array", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-ord", - "arrow-schema", - "arrow-select", - "async-trait", - "bytes", - "chrono", - "datafusion", - "datafusion-catalog", - "datafusion-common", - "datafusion-execution", - "datafusion-expr", - "datafusion-physical-plan", - "futures", - "half", - "lance", - "lance-arrow", - "lance-core", - "lance-datafusion", - "lance-datagen", - "lance-encoding", - "lance-file", - "lance-index", - "lance-io", - "lance-linalg", - "lance-namespace", - "lance-namespace-impls", - "lance-table", - "lance-testing", - "lazy_static", - "log", - "moka", - "num-traits", - "object_store", - "pin-project", - "rand 0.9.2", - "regex", - "semver", - "serde", - "serde_json", - "serde_with", - "snafu", - "tempfile", - "tokio", - "url", - "uuid", + "either", ] [[package]] -name = "lazy_static" -version = "1.5.0" +name = "itertools" +version = "0.14.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" +checksum = "2b192c782037fadd9cfa75548310488aabdbf3d2da73885b31bd0abd03351285" +dependencies = [ + "either", +] [[package]] -name = "levenshtein_automata" -version = "0.2.1" +name = "itoa" +version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c2cdeb66e45e9f36bfad5bbdb4d2384e70936afbee843c6f6543f0c551ebb25" +checksum = "4a5f13b858c8d314ee3e8f639011f7ccefe71f97f96e50151fb991f267928e2c" [[package]] -name = "lexical-core" -version = "1.0.5" +name = "jieba-macros" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b765c31809609075565a70b4b71402281283aeda7ecaf4818ac14a7b2ade8958" +checksum = "348294e44ee7e3c42685da656490f8febc7359632544019621588902216da95c" dependencies = [ - "lexical-parse-float", - "lexical-parse-integer", - "lexical-util", - "lexical-write-float", - "lexical-write-integer", + "phf_codegen", ] [[package]] -name = "lexical-parse-float" -version = "1.0.5" +name = "jieba-rs" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de6f9cb01fb0b08060209a057c048fcbab8717b4c1ecd2eac66ebfe39a65b0f2" +checksum = "766bd7012aa5ba49411ebdf4e93bddd59b182d2918e085d58dec5bb9b54b7105" dependencies = [ - "lexical-parse-integer", - "lexical-util", - "static_assertions", + "cedarwood", + "include-flate", + "jieba-macros", + "phf", + "regex", + "rustc-hash", ] [[package]] -name = "lexical-parse-integer" -version = "1.0.5" +name = "jiff" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "72207aae22fc0a121ba7b6d479e42cbfea549af1479c3f3a4f12c70dd66df12e" +checksum = "be1f93b8b1eb69c77f24bbb0afdf66f54b632ee39af40ca21c4365a1d7347e49" dependencies = [ - "lexical-util", - "static_assertions", + "jiff-static", + "log", + "portable-atomic", + "portable-atomic-util", + "serde", ] [[package]] -name = "lexical-util" -version = "1.0.6" +name = "jiff-static" +version = "0.2.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a82e24bf537fd24c177ffbbdc6ebcc8d54732c35b50a3f28cc3f4e4c949a0b3" +checksum = "03343451ff899767262ec32146f6d559dd759fdadf42ff0e227c7c48f72594b4" dependencies = [ - "static_assertions", + "proc-macro2", + "quote", + "syn 2.0.106", ] [[package]] -name = "lexical-write-float" -version = "1.0.5" +name = "jobserver" +version = "0.1.34" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5afc668a27f460fb45a81a757b6bf2f43c2d7e30cb5a2dcd3abf294c78d62bd" +checksum = "9afb3de4395d6b3e67a780b6de64b51c978ecf11cb9a462c66be7d4ca9039d33" dependencies = [ - "lexical-util", - "lexical-write-integer", - "static_assertions", + "getrandom 0.3.3", + "libc", ] [[package]] -name = "lexical-write-integer" -version = "1.0.5" +name = "js-sys" +version = "0.3.77" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "629ddff1a914a836fb245616a7888b62903aae58fa771e1d83943035efa0f978" +checksum = "1cfaf33c695fc6e08064efbc1f72ec937429614f25eef83af942d0e227c3a28f" dependencies = [ - "lexical-util", - "static_assertions", + "once_cell", + "wasm-bindgen", ] [[package]] -name = "libbz2-rs-sys" -version = "0.2.2" +name = "lazy_static" +version = "1.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c4a545a15244c7d945065b5d392b2d2d7f21526fba56ce51467b06ed445e8f7" +checksum = "bbd2bcb4c963f2ddae06a2efc7e9f3591312473c50c6685e1f298068316e66fe" [[package]] name = "libc" -version = "0.2.177" +version = "0.2.180" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bcc35a38544a891a5f7c865aca548a982ccb3b8650a5b06d0fd33a10283c56fc" + +[[package]] +name = "libflate" +version = "2.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e3248b8d211bd23a104a42d81b4fa8bb8ac4a3b75e7a43d85d2c9ccb6179cd74" +dependencies = [ + "adler32", + "core2", + "crc32fast", + "dary_heap", + "libflate_lz77", +] + +[[package]] +name = "libflate_lz77" +version = "2.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2874a2af47a2325c2001a6e6fad9b16a53b802102b528163885171cf92b15976" +checksum = "a599cb10a9cd92b1300debcef28da8f70b935ec937f44fcd1b70a7c986a11c5c" +dependencies = [ + "core2", + "hashbrown 0.16.1", + "rle-decode-fast", +] [[package]] name = "libm" @@ -3765,17 +2458,9 @@ version = "0.1.9" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "391290121bad3d37fbddad76d8f5d1c1c314cfc646d143d7e07a3086ddff0ce3" dependencies = [ - "bitflags", + "bitflags 2.9.3", "libc", -] - -[[package]] -name = "libz-rs-sys" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c10501e7805cee23da17c7790e59df2870c0d4043ec6d03f67d31e2b53e77415" -dependencies = [ - "zlib-rs", + "redox_syscall", ] [[package]] @@ -3798,41 +2483,19 @@ checksum = "241eaef5fd12c88705a01fc1066c48c4b36e0dd4377dcdc7ec3942cea7a69956" [[package]] name = "lock_api" -version = "0.4.13" +version = "0.4.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "96936507f153605bddfcda068dd804796c84324ed2510809e5b2a624c81da765" +checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965" dependencies = [ - "autocfg", "scopeguard", + "serde", ] [[package]] name = "log" -version = "0.4.27" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "13dc2df351e3202783a1fe0d44375f7295ffb4049267b0f3018346dc122a1d94" - -[[package]] -name = "loom" -version = "0.7.2" +version = "0.4.29" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "419e0dc8046cb947daa77eb95ae174acfbddb7673b4151f56d1eed8e93fbfaca" -dependencies = [ - "cfg-if", - "generator", - "scoped-tls", - "tracing", - "tracing-subscriber", -] - -[[package]] -name = "lru" -version = "0.12.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38" -dependencies = [ - "hashbrown 0.15.5", -] +checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897" [[package]] name = "lru-slab" @@ -3840,44 +2503,11 @@ version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154" -[[package]] -name = "lz4" -version = "1.28.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a20b523e860d03443e98350ceaac5e71c6ba89aea7d960769ec3ce37f4de5af4" -dependencies = [ - "lz4-sys", -] - -[[package]] -name = "lz4-sys" -version = "1.11.1+lz4-1.10.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6bd8c0d6c6ed0cd30b3652886bb8711dc4bb01d637a68105a3d5158039b418e6" -dependencies = [ - "cc", - "libc", -] - [[package]] name = "lz4_flex" -version = "0.11.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08ab2867e3eeeca90e844d1940eab391c9dc5228783db2ed999acbc0a9ed375a" -dependencies = [ - "twox-hash", -] - -[[package]] -name = "lzma-sys" -version = "0.1.20" +version = "0.12.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5fda04ab3764e6cde78b9974eec4f779acaba7c4e84b36eca3cf77c581b85d27" -dependencies = [ - "cc", - "libc", - "pkg-config", -] +checksum = "ab6473172471198271ff72e9379150e9dfd70d8e533e0752a27e515b48dd375e" [[package]] name = "macro_rules_attribute" @@ -3896,14 +2526,21 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "670fdfda89751bc4a84ac13eaa63e205cf0fd22b4c9a5fbfa085b63c1f1d3a30" [[package]] -name = "matchers" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d1525a2a28c7f4fa0fc98bb91ae755d1e2d1505079e05539e35bc876b5d65ae9" +name = "macros" +version = "0.1.0" +source = "git+https://github.com/qdrant/qdrant.git?branch=dev#c033e26cf45071bdb018d9345db8edcb1c901f30" dependencies = [ - "regex-automata", + "proc-macro2", + "quote", + "syn 2.0.106", ] +[[package]] +name = "matchit" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0e7465ac9959cc2b1404e8e2367b43684a6d13790fe23056cc8c6c5a6b7bcb94" + [[package]] name = "matrixmultiply" version = "0.3.10" @@ -3911,29 +2548,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a06de3016e9fae57a36fd14dba131fccf49f74b40b7fbdb472f96e361ec71a08" dependencies = [ "autocfg", - "num_cpus", - "once_cell", "rawpointer", - "thread-tree", -] - -[[package]] -name = "md-5" -version = "0.10.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf" -dependencies = [ - "cfg-if", - "digest", -] - -[[package]] -name = "measure_time" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51c55d61e72fc3ab704396c5fa16f4c184db37978ae4e94ca8959693a235fc0e" -dependencies = [ - "log", ] [[package]] @@ -3944,13 +2559,30 @@ checksum = "32a282da65faaf38286cf3be983213fcf1d2e2a58700e808f83f4ea9a4804bc0" [[package]] name = "memmap2" -version = "0.9.8" +version = "0.9.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "843a98750cd611cc2965a8213b53b43e715f13c37a9e096c6408e69990961db7" +checksum = "744133e4a0e0a658e1374cf3bf8e415c4052a15a111acd372764c55b4177d490" dependencies = [ "libc", ] +[[package]] +name = "memory" +version = "0.0.0" +source = "git+https://github.com/qdrant/qdrant.git?branch=dev#c033e26cf45071bdb018d9345db8edcb1c901f30" +dependencies = [ + "ahash", + "bitvec", + "delegate", + "fs-err", + "log", + "memmap2", + "nix 0.31.1", + "parking_lot", + "serde", + "thiserror 2.0.18", +] + [[package]] name = "mime" version = "0.3.17" @@ -3980,6 +2612,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1fa76a2c86f704bdb222d66965fb3d63269ce38518b83cb0575fca855ebb6316" dependencies = [ "adler2", + "simd-adler32", ] [[package]] @@ -3993,12 +2626,6 @@ dependencies = [ "windows-sys 0.59.0", ] -[[package]] -name = "mock_instant" -version = "0.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dce6dd36094cac388f119d2e9dc82dc730ef91c32a6222170d630e5414b956e6" - [[package]] name = "model2vec-rs" version = "0.1.3" @@ -4007,36 +2634,14 @@ checksum = "594823a4df1f780f1f075a91a7b7ad75a351f4c723fd196ab575316f7f452d1d" dependencies = [ "anyhow", "clap", - "half", + "half 2.7.1", "hf-hub", - "ndarray 0.15.6", + "ndarray", "safetensors", "serde_json", "tokenizers", ] -[[package]] -name = "moka" -version = "0.12.10" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a9321642ca94a4282428e6ea4af8cc2ca4eac48ac7a6a4ea8f33f76d0ce70926" -dependencies = [ - "async-lock", - "crossbeam-channel", - "crossbeam-epoch", - "crossbeam-utils", - "event-listener", - "futures-util", - "loom", - "parking_lot", - "portable-atomic", - "rustc_version", - "smallvec", - "tagptr", - "thiserror 1.0.69", - "uuid", -] - [[package]] name = "monostate" version = "0.1.14" @@ -4064,12 +2669,6 @@ version = "0.10.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "1d87ecb2933e8aeadb3e3a02b828fed80a7528047e68b4f424523a0981a3a084" -[[package]] -name = "murmurhash32" -version = "0.3.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2195bf6aa996a481483b29d62a7663eed3fe39600c460e323f8ff41e90bdd89b" - [[package]] name = "ndarray" version = "0.15.6" @@ -4084,20 +2683,35 @@ dependencies = [ ] [[package]] -name = "ndarray" -version = "0.16.1" +name = "nix" +version = "0.25.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "882ed72dce9365842bf196bdeedf5055305f11fc8c03dee7bb0194a6cad34841" +checksum = "f346ff70e7dbfd675fe90590b92d59ef2de15a8779ae305ebcbfd3f0caf59be4" dependencies = [ - "matrixmultiply", - "num-complex", - "num-integer", - "num-traits", - "portable-atomic", - "portable-atomic-util", - "rawpointer", + "autocfg", + "bitflags 1.3.2", + "cfg-if", + "libc", +] + +[[package]] +name = "nix" +version = "0.31.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "225e7cfe711e0ba79a68baeddb2982723e4235247aefce1482f2f16c27865b66" +dependencies = [ + "bitflags 2.9.3", + "cfg-if", + "cfg_aliases", + "libc", ] +[[package]] +name = "nodrop" +version = "0.1.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb" + [[package]] name = "nom" version = "7.1.3" @@ -4118,37 +2732,19 @@ dependencies = [ ] [[package]] -name = "nu-ansi-term" -version = "0.50.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d4a28e057d01f97e61255210fcff094d74ed0466038633e95017f5beb68e4399" -dependencies = [ - "windows-sys 0.52.0", -] - -[[package]] -name = "num" -version = "0.4.3" +name = "ntapi" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "35bd024e8b2ff75562e5f34e7f4905839deb4b22955ef5e73d2fea1b9813cb23" +checksum = "c70f219e21142367c70c0b30c6a9e3a14d55b4d12a204d897fbec83a0363f081" dependencies = [ - "num-bigint", - "num-complex", - "num-integer", - "num-iter", - "num-rational", - "num-traits", + "winapi", ] [[package]] -name = "num-bigint" -version = "0.4.6" +name = "num-cmp" +version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a5e44f723f1133c9deac646763579fdb3ac745e418f2a7af9cd0c431da1f20b9" -dependencies = [ - "num-integer", - "num-traits", -] +checksum = "63335b2e2c34fae2fb0aa2cecfd9f0832a1e24b3b32ecec612c3426d46dc8aaa" [[package]] name = "num-complex" @@ -4160,10 +2756,15 @@ dependencies = [ ] [[package]] -name = "num-conv" -version = "0.1.0" +name = "num-derive" +version = "0.4.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "51d515d32fb182ee37cda2ccdcb92950d6a3c2893aa280e540671c2cd0f3b1d9" +checksum = "ed3955f1a9c7c0c15e092f9c887db08b1fc683305fdf6eb6684f22555355e202" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] [[package]] name = "num-integer" @@ -4174,68 +2775,24 @@ dependencies = [ "num-traits", ] -[[package]] -name = "num-iter" -version = "0.1.45" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1429034a0490724d0075ebb2bc9e875d6503c3cf69e235a8941aa757d83ef5bf" -dependencies = [ - "autocfg", - "num-integer", - "num-traits", -] - -[[package]] -name = "num-rational" -version = "0.4.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f83d14da390562dca69fc84082e73e548e1ad308d24accdedd2720017cb37824" -dependencies = [ - "num-bigint", - "num-integer", - "num-traits", -] - [[package]] name = "num-traits" version = "0.2.19" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "071dfc062690e90b734c0b2273ce72ad0ffa95f0c74596bc250dcfd960262841" dependencies = [ - "autocfg", - "libm", -] - -[[package]] -name = "num_cpus" -version = "1.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" -dependencies = [ - "hermit-abi", - "libc", -] - -[[package]] -name = "num_enum" -version = "0.7.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b1207a7e20ad57b847bbddc6776b968420d38292bbfe2089accff5e19e82454c" -dependencies = [ - "num_enum_derive", - "rustversion", + "autocfg", + "libm", ] [[package]] -name = "num_enum_derive" -version = "0.7.5" +name = "num_cpus" +version = "1.17.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff32365de1b6743cb203b710788263c44a03de03802daf96092f2da4fe6ba4d7" +checksum = "91df4bbde75afed763b708b7eee1e8e7651e02d97f6d5dd763e89367e957b23b" dependencies = [ - "proc-macro-crate", - "proc-macro2", - "quote", - "syn 2.0.106", + "hermit-abi", + "libc", ] [[package]] @@ -4245,45 +2802,31 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "830b246a0e5f20af87141b25c173cd1b609bd7779a4617d6ec582abaf90870f3" [[package]] -name = "object" -version = "0.36.7" +name = "objc2-core-foundation" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" +checksum = "2a180dd8642fa45cdb7dd721cd4c11b1cadd4929ce112ebd8b9f5803cc79d536" dependencies = [ - "memchr", + "bitflags 2.9.3", ] [[package]] -name = "object" -version = "0.37.3" +name = "objc2-io-kit" +version = "0.3.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ff76201f031d8863c38aa7f905eca4f53abbfa15f609db4277d44cd8938f33fe" +checksum = "33fafba39597d6dc1fb709123dfa8289d39406734be322956a69f0931c73bb15" dependencies = [ - "memchr", + "libc", + "objc2-core-foundation", ] [[package]] -name = "object_store" -version = "0.12.3" +name = "object" +version = "0.36.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efc4f07659e11cd45a341cd24d71e683e3be65d9ff1f8150061678fe60437496" +checksum = "62948e14d923ea95ea2c7c86c71013138b66525b86bdc08d2dcc262bdb497b87" dependencies = [ - "async-trait", - "bytes", - "chrono", - "futures", - "http", - "humantime", - "itertools 0.14.0", - "parking_lot", - "percent-encoding", - "thiserror 2.0.16", - "tokio", - "tracing", - "url", - "walkdir", - "wasm-bindgen-futures", - "web-time", + "memchr", ] [[package]] @@ -4298,19 +2841,13 @@ version = "1.70.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a4895175b425cb1f87721b59f0f286c2092bd4af812243672510e1ac53e2e0ad" -[[package]] -name = "oneshot" -version = "0.1.11" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b4ce411919553d3f9fa53a0880544cda985a112117a0444d5ff1e870a893d6ea" - [[package]] name = "onig" version = "6.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "336b9c63443aceef14bea841b899035ae3abe89b7c486aaf4c5bd8aafedac3f0" dependencies = [ - "bitflags", + "bitflags 2.9.3", "libc", "once_cell", "onig_sys", @@ -4349,33 +2886,21 @@ dependencies = [ [[package]] name = "ordered-float" -version = "5.0.0" +version = "5.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e2c1f9f56e534ac6a9b8a4600bdf0f530fb393b5f393e7b4d03489c3cf0c3f01" +checksum = "7f4779c6901a562440c3786d08192c6fbda7c1c2060edd10006b05ee35d10f2d" dependencies = [ "num-traits", + "rand 0.8.5", + "schemars", + "serde", ] -[[package]] -name = "ownedbytes" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2fbd56f7631767e61784dc43f8580f403f4475bd4aaa4da003e6295e1bab4a7e" -dependencies = [ - "stable_deref_trait", -] - -[[package]] -name = "parking" -version = "2.2.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f38d5652c16fde515bb1ecef450ab0f6a219d619a7274976324d5e377f7dceba" - [[package]] name = "parking_lot" -version = "0.12.4" +version = "0.12.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "70d58bf43669b5795d1576d0641cfb6fbb2057bf629506267a92807158584a13" +checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a" dependencies = [ "lock_api", "parking_lot_core", @@ -4383,52 +2908,17 @@ dependencies = [ [[package]] name = "parking_lot_core" -version = "0.9.11" +version = "0.9.12" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc838d2a56b5b1a6c25f55575dfc605fabb63bb2365f6c2353ef9159aa69e4a5" +checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1" dependencies = [ + "backtrace", "cfg-if", "libc", + "petgraph", "redox_syscall", "smallvec", - "windows-targets 0.52.6", -] - -[[package]] -name = "parquet" -version = "56.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f0dbd48ad52d7dccf8ea1b90a3ddbfaea4f69878dd7683e51c507d4bc52b5b27" -dependencies = [ - "ahash", - "arrow-array", - "arrow-buffer", - "arrow-cast", - "arrow-data", - "arrow-ipc", - "arrow-schema", - "arrow-select", - "base64 0.22.1", - "brotli", - "bytes", - "chrono", - "flate2", - "futures", - "half", - "hashbrown 0.16.1", - "lz4_flex", - "num", - "num-bigint", - "object_store", - "paste", - "ring", - "seq-macro", - "simdutf8", - "snap", - "thrift", - "tokio", - "twox-hash", - "zstd", + "windows-link 0.2.0", ] [[package]] @@ -4437,18 +2927,6 @@ version = "1.0.15" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a" -[[package]] -name = "path_abs" -version = "0.5.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "05ef02f6342ac01d8a93b65f96db53fe68a92a15f41144f97fb00a9e669633c3" -dependencies = [ - "serde", - "serde_derive", - "std_prelude", - "stfu8", -] - [[package]] name = "percent-encoding" version = "2.3.2" @@ -4456,47 +2934,74 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220" [[package]] -name = "permutation" -version = "0.4.1" +name = "permutation_iterator" +version = "0.1.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "df202b0b0f5b8e389955afd5f27b007b00fb948162953f1db9c70d2c7e3157d7" +checksum = "b55405179fe06e4e3820ddaf9f9b51cdff9e7496af9554acdb2b1921a86ca9cb" +dependencies = [ + "blake2-rfc", + "rand 0.7.3", +] [[package]] name = "petgraph" -version = "0.7.1" +version = "0.6.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3672b37090dbd86368a4145bc067582552b29c27377cad4e0a306c97f9bd7772" +checksum = "b4c5cc86750666a3ed20bdaf5ca2a0344f9c67674cae0515bec2da16fbaa47db" dependencies = [ "fixedbitset", "indexmap 2.11.1", ] [[package]] -name = "petgraph" -version = "0.8.2" +name = "ph" +version = "0.8.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "54acf3a685220b533e437e264e4d932cfbdc4cc7ec0cd232ed73c08d03b8a7ca" +checksum = "b2fbaf8da280599aae4047ea0659a1e79cf61739bce5bdc50ca88dc7e6357060" dependencies = [ - "fixedbitset", - "hashbrown 0.15.5", - "indexmap 2.11.1", - "serde", + "aligned-vec", + "binout", + "bitm", + "dyn_size_of", + "rayon", + "seedable_hash", ] [[package]] name = "phf" -version = "0.12.1" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c1562dc717473dbaa4c1f85a36410e03c047b2e7df7f45ee938fbef64ae7fadf" +dependencies = [ + "phf_shared", + "serde", +] + +[[package]] +name = "phf_codegen" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49aa7f9d80421bca176ca8dbfebe668cc7a2684708594ec9f3c0db0805d5d6e1" +dependencies = [ + "phf_generator", + "phf_shared", +] + +[[package]] +name = "phf_generator" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "913273894cec178f401a31ec4b656318d95473527be05c0752cc41cdc32be8b7" +checksum = "135ace3a761e564ec88c03a77317a7c6b80bb7f7135ef2544dbe054243b89737" dependencies = [ + "fastrand", "phf_shared", ] [[package]] name = "phf_shared" -version = "0.12.1" +version = "0.13.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "06005508882fb681fd97892ecff4b7fd0fee13ef1aa569f8695dae7ab9099981" +checksum = "e57fef6bc5981e38c2ce2d63bfa546861309f875b8a75f092d1d54ae2d64f266" dependencies = [ "siphasher", ] @@ -4554,6 +3059,16 @@ dependencies = [ "portable-atomic", ] +[[package]] +name = "posting_list" +version = "0.0.0" +source = "git+https://github.com/qdrant/qdrant.git?branch=dev#c033e26cf45071bdb018d9345db8edcb1c901f30" +dependencies = [ + "bitpacking", + "common", + "zerocopy", +] + [[package]] name = "potential_utf" version = "0.1.2" @@ -4563,12 +3078,6 @@ dependencies = [ "zerovec", ] -[[package]] -name = "powerfmt" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "439ee305def115ba05938db6eb1644ff94165c5ab5e9420d1c1bcedbba909391" - [[package]] name = "ppv-lite86" version = "0.2.21" @@ -4589,12 +3098,49 @@ dependencies = [ ] [[package]] -name = "proc-macro-crate" -version = "3.4.0" +name = "proc-macro-error" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "da25490ff9892aab3fcf7c36f08cfb902dd3e71ca0f9f9517bea02a73a5ce38c" +dependencies = [ + "proc-macro-error-attr", + "proc-macro2", + "quote", + "syn 1.0.109", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a1be40180e52ecc98ad80b184934baf3d0d29f979574e439af5a55274b35f869" +dependencies = [ + "proc-macro2", + "quote", + "version_check", +] + +[[package]] +name = "proc-macro-error-attr2" +version = "2.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "96de42df36bb9bba5542fe9f1a054b8cc87e172759a1868aa05c1f3acc89dfc5" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "proc-macro-error2" +version = "2.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "219cb19e96be00ab2e37d6e299658a0cfa83e52429179969b0f0121b4ac46983" +checksum = "11ec05c52be0a07b08061f7dd003e7d7092e0472bc731b4af7bb1ef876109802" dependencies = [ - "toml_edit", + "proc-macro-error-attr2", + "proc-macro2", + "quote", + "syn 2.0.106", ] [[package]] @@ -4606,11 +3152,32 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "procfs" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "25485360a54d6861439d60facef26de713b1e126bf015ec8f98239467a2b82f7" +dependencies = [ + "bitflags 2.9.3", + "procfs-core", + "rustix 1.1.3", +] + +[[package]] +name = "procfs-core" +version = "0.18.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6401bf7b6af22f78b563665d15a22e9aef27775b79b149a66ca022468a4e405" +dependencies = [ + "bitflags 2.9.3", + "hex", +] + [[package]] name = "prost" -version = "0.13.5" +version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2796faa41db3ec313a31f7624d9286acf277b52de526150b7e69f3debf891ee5" +checksum = "deb1435c188b76130da55f17a466d252ff7b1418b2ad3e037d127b94e3411f29" dependencies = [ "bytes", "prost-derive", @@ -4618,19 +3185,22 @@ dependencies = [ [[package]] name = "prost-build" -version = "0.13.5" +version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "be769465445e8c1474e9c5dac2018218498557af32d9ed057325ec9a41ae81bf" +checksum = "22505a5c94da8e3b7c2996394d1c933236c4d743e81a410bcca4e6989fc066a4" dependencies = [ + "bytes", "heck", - "itertools 0.14.0", + "itertools 0.11.0", "log", "multimap", "once_cell", - "petgraph 0.7.1", + "petgraph", "prettyplease", "prost", "prost-types", + "pulldown-cmark", + "pulldown-cmark-to-cmark", "regex", "syn 2.0.106", "tempfile", @@ -4638,12 +3208,12 @@ dependencies = [ [[package]] name = "prost-derive" -version = "0.13.5" +version = "0.12.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8a56d757972c98b346a9b766e3f02746cde6dd1cd1d1d563472929fdd74bec4d" +checksum = "81bddcdb20abf9501610992b6759a4c888aef7d1a7247ef75e2404275ac24af1" dependencies = [ "anyhow", - "itertools 0.14.0", + "itertools 0.11.0", "proc-macro2", "quote", "syn 2.0.106", @@ -4651,21 +3221,100 @@ dependencies = [ [[package]] name = "prost-types" -version = "0.13.5" +version = "0.12.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "9091c90b0a32608e984ff2fa4091273cbdd755d54935c51d520887f4a1dbd5b0" +dependencies = [ + "prost", +] + +[[package]] +name = "prost-wkt" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fb7ec2850c138ebaa7ab682503b5d08c3cb330343e9c94776612928b6ddb53f" +dependencies = [ + "chrono", + "inventory", + "prost", + "serde", + "serde_derive", + "serde_json", + "typetag", +] + +[[package]] +name = "prost-wkt-build" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "598b7365952c2ed4e32902de0533653aafbe5ae3da436e8e2335c7d375a1cef3" +dependencies = [ + "heck", + "prost", + "prost-build", + "prost-types", + "quote", +] + +[[package]] +name = "prost-wkt-types" +version = "0.5.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "52c2c1bf36ddb1a1c396b3601a3cec27c2462e45f07c386894ec3ccf5332bd16" +checksum = "1a8eadc2381640a49c1fbfb9f4a857794b4e5bf5a2cbc2d858cfdb74f64dcd22" dependencies = [ + "chrono", "prost", + "prost-build", + "prost-types", + "prost-wkt", + "prost-wkt-build", + "regex", + "serde", + "serde_derive", + "serde_json", +] + +[[package]] +name = "pulldown-cmark" +version = "0.9.6" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "57206b407293d2bcd3af849ce869d52068623f19e1b5ff8e8778e3309439682b" +dependencies = [ + "bitflags 2.9.3", + "memchr", + "unicase", ] [[package]] -name = "psm" -version = "0.1.29" +name = "pulldown-cmark-to-cmark" +version = "10.0.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1fa96cb91275ed31d6da3e983447320c4eb219ac180fa1679a0889ff32861e2d" +checksum = "0194e6e1966c23cc5fd988714f85b18d548d773e81965413555d96569931833d" +dependencies = [ + "pulldown-cmark", +] + +[[package]] +name = "quantization" +version = "0.1.0" +source = "git+https://github.com/qdrant/qdrant.git?branch=dev#c033e26cf45071bdb018d9345db8edcb1c901f30" dependencies = [ - "ar_archive_writer", + "arrayvec 0.7.6", + "bytemuck", "cc", + "common", + "fs-err", + "io", + "memory", + "num-traits", + "ordered-float 5.1.0", + "parking_lot", + "permutation_iterator", + "rand 0.9.2", + "rayon", + "serde", + "serde_json", + "strum", ] [[package]] @@ -4680,9 +3329,9 @@ dependencies = [ "quinn-proto", "quinn-udp", "rustc-hash", - "rustls", + "rustls 0.23.31", "socket2 0.5.10", - "thiserror 2.0.16", + "thiserror 2.0.18", "tokio", "tracing", "web-time", @@ -4700,10 +3349,10 @@ dependencies = [ "rand 0.9.2", "ring", "rustc-hash", - "rustls", + "rustls 0.23.31", "rustls-pki-types", "slab", - "thiserror 2.0.16", + "thiserror 2.0.18", "tinyvec", "tracing", "web-time", @@ -4744,6 +3393,19 @@ version = "0.7.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "dc33ff2d4973d518d823d61aa239014831e521c75da58e3df4840d3f47749d09" +[[package]] +name = "rand" +version = "0.7.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a6b1679d49b24bbfe0c803429aa1874472f50d9b363131f0e89fc356b544d03" +dependencies = [ + "getrandom 0.1.16", + "libc", + "rand_chacha 0.2.2", + "rand_core 0.5.1", + "rand_hc", +] + [[package]] name = "rand" version = "0.8.5" @@ -4753,6 +3415,7 @@ dependencies = [ "libc", "rand_chacha 0.3.1", "rand_core 0.6.4", + "serde", ] [[package]] @@ -4765,6 +3428,16 @@ dependencies = [ "rand_core 0.9.3", ] +[[package]] +name = "rand_chacha" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f4c8ed856279c9737206bf725bf36935d8666ead7aa69b52be55af369d193402" +dependencies = [ + "ppv-lite86", + "rand_core 0.5.1", +] + [[package]] name = "rand_chacha" version = "0.3.1" @@ -4779,10 +3452,19 @@ dependencies = [ name = "rand_chacha" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb" +dependencies = [ + "ppv-lite86", + "rand_core 0.9.3", +] + +[[package]] +name = "rand_core" +version = "0.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "90bde5296fc891b0cef12a6d03ddccc162ce7b2aff54160af9338f8d40df6d19" dependencies = [ - "ppv-lite86", - "rand_core 0.9.3", + "getrandom 0.1.16", ] [[package]] @@ -4792,6 +3474,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c" dependencies = [ "getrandom 0.2.16", + "serde", ] [[package]] @@ -4803,16 +3486,6 @@ dependencies = [ "getrandom 0.3.3", ] -[[package]] -name = "rand_distr" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "32cb0b9bc82b0a0876c2dd994a7e7a2683d3e7390ca40e6886785ef0c7e3ee31" -dependencies = [ - "num-traits", - "rand 0.8.5", -] - [[package]] name = "rand_distr" version = "0.5.1" @@ -4824,33 +3497,14 @@ dependencies = [ ] [[package]] -name = "rand_xoshiro" -version = "0.7.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f703f4665700daf5512dcca5f43afa6af89f09db47fb56be587f80636bda2d41" -dependencies = [ - "rand_core 0.9.3", -] - -[[package]] -name = "random_word" -version = "0.5.2" +name = "rand_hc" +version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e47a395bdb55442b883c89062d6bcff25dc90fa5f8369af81e0ac6d49d78cf81" +checksum = "ca3129af7b92a17112d59ad498c6f81eaf463253766b90396d39ea7a39d6613c" dependencies = [ - "ahash", - "brotli", - "paste", - "rand 0.9.2", - "unicase", + "rand_core 0.5.1", ] -[[package]] -name = "rangemap" -version = "1.6.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f93e7e49bb0bf967717f7bd674458b3d6b0c5f48ec7e3038166026a69fc22223" - [[package]] name = "rawpointer" version = "0.2.1" @@ -4888,33 +3542,13 @@ dependencies = [ "crossbeam-utils", ] -[[package]] -name = "recursive" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0786a43debb760f491b1bc0269fe5e84155353c67482b9e60d0cfb596054b43e" -dependencies = [ - "recursive-proc-macro-impl", - "stacker", -] - -[[package]] -name = "recursive-proc-macro-impl" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "76009fbe0614077fc1a2ce255e3a1881a2e3a3527097d5dc6d8212c585e7e38b" -dependencies = [ - "quote", - "syn 2.0.106", -] - [[package]] name = "redox_syscall" version = "0.5.17" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5407465600fb0548f1442edf71dd20683c6ed326200ace4b1ef0763521bb3b77" dependencies = [ - "bitflags", + "bitflags 2.9.3", ] [[package]] @@ -4936,27 +3570,7 @@ checksum = "a4e608c6638b9c18977b00b475ac1f28d14e84b27d8d42f70e0bf1e3dec127ac" dependencies = [ "getrandom 0.2.16", "libredox", - "thiserror 2.0.16", -] - -[[package]] -name = "ref-cast" -version = "1.0.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4a0ae411dbe946a674d89546582cea4ba2bb8defac896622d6496f14c23ba5cf" -dependencies = [ - "ref-cast-impl", -] - -[[package]] -name = "ref-cast-impl" -version = "1.0.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1165225c21bff1f3bbce98f5a1f889949bc902d3575308cc7b0de30b4f6d27c7" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.106", + "thiserror 2.0.18", ] [[package]] @@ -4990,41 +3604,39 @@ checksum = "7a2d987857b319362043e95f5353c0535c1f58eec5336fdfcf626430af7def58" [[package]] name = "reqwest" -version = "0.12.23" +version = "0.12.28" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d429f34c8092b2d42c7c93cec323bb4adeb7c67698f70839adec842ec10c7ceb" +checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147" dependencies = [ - "async-compression", "base64 0.22.1", "bytes", - "encoding_rs", + "futures-channel", "futures-core", "futures-util", - "h2", - "http", - "http-body", + "h2 0.4.12", + "http 1.3.1", + "http-body 1.0.1", "http-body-util", - "hyper", + "hyper 1.7.0", "hyper-rustls", "hyper-util", "js-sys", "log", - "mime", "mime_guess", "percent-encoding", "pin-project-lite", "quinn", - "rustls", + "rustls 0.23.31", "rustls-native-certs", "rustls-pki-types", "serde", "serde_json", "serde_urlencoded", - "sync_wrapper", + "sync_wrapper 1.0.2", "tokio", - "tokio-rustls", + "tokio-rustls 0.26.2", "tokio-util", - "tower", + "tower 0.5.2", "tower-http", "tower-service", "url", @@ -5065,11 +3677,36 @@ dependencies = [ "windows-sys 0.52.0", ] +[[package]] +name = "rle-decode-fast" +version = "1.0.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "3582f63211428f83597b51b2ddb88e2a91a9d52d12831f9d08f5e624e8977422" + +[[package]] +name = "rmp" +version = "0.8.15" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4ba8be72d372b2c9b35542551678538b562e7cf86c3315773cae48dfbfe7790c" +dependencies = [ + "num-traits", +] + +[[package]] +name = "rmp-serde" +version = "1.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72f81bee8c8ef9b577d1681a70ebbc962c232461e397b22c208c43c04b67a155" +dependencies = [ + "rmp", + "serde", +] + [[package]] name = "roaring" -version = "0.10.12" +version = "0.11.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "19e8d2cfa184d94d0726d650a9f4a1be7f9b76ac9fdb954219878dc00c1c1e7b" +checksum = "8ba9ce64a8f45d7fc86358410bb1a82e8c987504c0d4900e9141d69a9f26c885" dependencies = [ "bytemuck", "byteorder", @@ -5094,9 +3731,8 @@ dependencies = [ [[package]] name = "rust-stemmers" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e46a2036019fdb888131db7a4c847a1063a7493f971ed94ea82c67eada63ca54" +version = "1.2.1" +source = "git+https://github.com/qdrant/rust-stemmers.git?tag=v1.2.1#aee4c73b4012230b1163bf82d086cbf4b3f1102e" dependencies = [ "serde", "serde_derive", @@ -5129,7 +3765,7 @@ version = "0.38.44" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fdb5bc1ae2baa591800df16c9ca78619bf65c0488b41b96ccec5d11220d8c154" dependencies = [ - "bitflags", + "bitflags 2.9.3", "errno", "libc", "linux-raw-sys 0.4.15", @@ -5138,17 +3774,31 @@ dependencies = [ [[package]] name = "rustix" -version = "1.1.2" +version = "1.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cd15f8a2c5551a84d56efdc1cd049089e409ac19a3072d5037a17fd70719ff3e" +checksum = "146c9e247ccc180c1f61615433868c99f3de3ae256a30a43b49f67c2d9171f34" dependencies = [ - "bitflags", + "bitflags 2.9.3", "errno", "libc", "linux-raw-sys 0.11.0", "windows-sys 0.61.0", ] +[[package]] +name = "rustls" +version = "0.22.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bf4ef73721ac7bcd79b2b315da7779d8fc09718c6b3d2d1b2d94850eb8c18432" +dependencies = [ + "log", + "ring", + "rustls-pki-types", + "rustls-webpki 0.102.8", + "subtle", + "zeroize", +] + [[package]] name = "rustls" version = "0.23.31" @@ -5159,7 +3809,7 @@ dependencies = [ "once_cell", "ring", "rustls-pki-types", - "rustls-webpki", + "rustls-webpki 0.103.4", "subtle", "zeroize", ] @@ -5176,6 +3826,15 @@ dependencies = [ "security-framework", ] +[[package]] +name = "rustls-pemfile" +version = "2.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "dce314e5fee3f39953d46bb63bb8a46d40c2f8fb7cc5a3b6cab2bde9721d6e50" +dependencies = [ + "rustls-pki-types", +] + [[package]] name = "rustls-pki-types" version = "1.12.0" @@ -5186,6 +3845,17 @@ dependencies = [ "zeroize", ] +[[package]] +name = "rustls-webpki" +version = "0.102.8" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "64ca1bc8749bd4cf37b5ce386cc146580777b4e8572c7b97baf22c83f444bee9" +dependencies = [ + "ring", + "rustls-pki-types", + "untrusted", +] + [[package]] name = "rustls-webpki" version = "0.103.4" @@ -5239,40 +3909,45 @@ dependencies = [ [[package]] name = "schemars" -version = "0.9.0" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4cd191f9397d57d581cddd31014772520aa448f65ef991055d7f61582c65165f" +checksum = "3fbf2ae1b8bc8e02df939598064d22402220cd5bbcca1c76f7d6a310974d5615" dependencies = [ + "chrono", "dyn-clone", - "ref-cast", + "indexmap 1.9.3", + "indexmap 2.11.1", + "schemars_derive", "serde", "serde_json", + "url", + "uuid", ] [[package]] -name = "schemars" -version = "1.0.4" +name = "schemars_derive" +version = "0.8.22" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82d20c4491bc164fa2f6c5d44565947a52ad80b9505d8e36f8d54c27c739fcd0" +checksum = "32e265784ad618884abaea0600a9adf15393368d840e0222d101a072f3f7534d" dependencies = [ - "dyn-clone", - "ref-cast", - "serde", - "serde_json", + "proc-macro2", + "quote", + "serde_derive_internals", + "syn 2.0.106", ] -[[package]] -name = "scoped-tls" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1cf6437eb19a8f4a6cc0f7dca544973b0b78843adbfeb3683d1a94a0024a294" - [[package]] name = "scopeguard" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" +[[package]] +name = "seahash" +version = "4.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1c107b6f4780854c8b126e228ea8869f4d7b71260f962fefb57b996b8959ba6b" + [[package]] name = "secrecy" version = "0.10.3" @@ -5289,7 +3964,7 @@ version = "3.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "60b369d18893388b345804dc0007963c99b7d665ae71d275812d828c6f089640" dependencies = [ - "bitflags", + "bitflags 2.9.3", "core-foundation", "core-foundation-sys", "libc", @@ -5306,32 +3981,127 @@ dependencies = [ "libc", ] +[[package]] +name = "seedable_hash" +version = "0.1.7" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47190540123956611cf01db81ad6dee21ca70e1d94a8ff5a962cf6d93b217c7c" +dependencies = [ + "wyhash", + "xxhash-rust", +] + +[[package]] +name = "segment" +version = "0.6.0" +source = "git+https://github.com/qdrant/qdrant.git?branch=dev#c033e26cf45071bdb018d9345db8edcb1c901f30" +dependencies = [ + "ahash", + "atomic_refcell", + "atomicwrites", + "bincode 1.3.3", + "bitvec", + "bytemuck", + "byteorder", + "cc", + "cgroups-rs", + "charabia", + "chrono", + "common", + "data-encoding", + "delegate", + "ecow", + "fnv", + "fs-err", + "fs_extra", + "geo", + "geohash", + "gpu", + "gridstore", + "half 2.7.1", + "indexmap 2.11.1", + "integer-encoding", + "io", + "io-uring", + "is_sorted", + "itertools 0.14.0", + "log", + "macro_rules_attribute", + "macros", + "memmap2", + "memory", + "nom 8.0.0", + "num-cmp", + "num-derive", + "num-traits", + "ordered-float 5.1.0", + "parking_lot", + "posting_list", + "procfs", + "quantization", + "rand 0.9.2", + "rayon", + "roaring", + "rust-stemmers", + "schemars", + "seahash", + "self_cell", + "semver", + "serde", + "serde-untagged", + "serde-value", + "serde_cbor", + "serde_json", + "serde_variant", + "sha2", + "smallvec", + "sparse", + "strum", + "sysinfo", + "tap", + "tar", + "tempfile", + "thiserror 2.0.18", + "tinyvec", + "uuid", + "validator", + "vaporetto", + "zerocopy", +] + +[[package]] +name = "self_cell" +version = "1.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b12e76d157a900eb52e81bc6e9f3069344290341720e9178cde2407113ac8d89" + [[package]] name = "semtools" version = "2.0.0" dependencies = [ "anyhow", - "arrow-array", - "arrow-schema", "async-openai", "bytes", "clap", "dirs 5.0.1", + "edge", "futures", "grep", "grep-matcher", "grep-regex", "grep-searcher", "hex", - "lancedb", "mime_guess", "model2vec-rs", + "ordered-float 5.1.0", "rand 0.8.5", "regex", "reqwest", + "segment", "serde", "serde_json", "sha2", + "shard", "simsimd", "tempfile", "tokio", @@ -5342,12 +4112,9 @@ name = "semver" version = "1.0.26" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56e6fa9c48d24d85fb3de5ad847117517440f6beceb7798af16b4a87d616b8d0" - -[[package]] -name = "seq-macro" -version = "0.3.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1bc711410fbe7399f390ca1c3b60ad0f53f80e95c5eb935e52268a0e2cd49acc" +dependencies = [ + "serde", +] [[package]] name = "serde" @@ -5359,6 +4126,38 @@ dependencies = [ "serde_derive", ] +[[package]] +name = "serde-untagged" +version = "0.1.9" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f9faf48a4a2d2693be24c6289dbe26552776eb7737074e6722891fadbe6c5058" +dependencies = [ + "erased-serde", + "serde", + "serde_core", + "typeid", +] + +[[package]] +name = "serde-value" +version = "0.7.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f3a1a3341211875ef120e117ea7fd5228530ae7e7036a779fdc9117be6b3282c" +dependencies = [ + "ordered-float 2.10.1", + "serde", +] + +[[package]] +name = "serde_cbor" +version = "0.11.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2bef2ebfde456fb76bbcf9f59315333decc4fda0b2b44b420243c11e0f5ec1f5" +dependencies = [ + "half 1.8.3", + "serde", +] + [[package]] name = "serde_core" version = "1.0.228" @@ -5379,29 +4178,30 @@ dependencies = [ "syn 2.0.106", ] +[[package]] +name = "serde_derive_internals" +version = "0.29.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "18d26a20a969b9e3fdf2fc2d9f21eda6c40e2de84c9408bb5d3b05d499aae711" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] + [[package]] name = "serde_json" version = "1.0.143" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d401abef1d108fbd9cbaebc3e46611f4b1021f714a0597a71f41ee463f5f4a5a" dependencies = [ + "indexmap 2.11.1", "itoa", "memchr", "ryu", "serde", ] -[[package]] -name = "serde_repr" -version = "0.1.20" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "175ee3e80ae9982737ca543e96133087cbd9a485eecc3bc4de9c1a37b47ea59c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.106", -] - [[package]] name = "serde_urlencoded" version = "0.7.1" @@ -5415,64 +4215,57 @@ dependencies = [ ] [[package]] -name = "serde_with" -version = "3.14.0" +name = "serde_variant" +version = "0.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f2c45cd61fefa9db6f254525d46e392b852e0e61d9a1fd36e5bd183450a556d5" +checksum = "0a0068df419f9d9b6488fdded3f1c818522cdea328e02ce9d9f147380265a432" dependencies = [ - "base64 0.22.1", - "chrono", - "hex", - "indexmap 1.9.3", - "indexmap 2.11.1", - "schemars 0.9.0", - "schemars 1.0.4", "serde", - "serde_derive", - "serde_json", - "serde_with_macros", - "time", -] - -[[package]] -name = "serde_with_macros" -version = "3.14.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "de90945e6565ce0d9a25098082ed4ee4002e047cb59892c318d66821e14bb30f" -dependencies = [ - "darling 0.20.11", - "proc-macro2", - "quote", - "syn 2.0.106", ] [[package]] name = "sha2" -version = "0.10.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" -dependencies = [ - "cfg-if", - "cpufeatures", - "digest", -] - -[[package]] -name = "sharded-slab" -version = "0.1.7" +version = "0.10.9" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f40ca3c46823713e0d4209592e8d6e826aa57e928f09752619fc696c499637f6" +checksum = "a7507d819769d01a365ab707794a4084392c824f54a7a6a7862f8c3d0892b283" dependencies = [ - "lazy_static", + "cfg-if", + "cpufeatures", + "digest", ] [[package]] -name = "shellexpand" -version = "3.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8b1fdf65dd6331831494dd616b30351c38e96e45921a27745cf98490458b90bb" +name = "shard" +version = "0.1.0" +source = "git+https://github.com/qdrant/qdrant.git?branch=dev#c033e26cf45071bdb018d9345db8edcb1c901f30" dependencies = [ - "dirs 6.0.0", + "ahash", + "api", + "bitvec", + "common", + "fs-err", + "indexmap 2.11.1", + "io", + "itertools 0.14.0", + "log", + "ordered-float 5.1.0", + "parking_lot", + "rand 0.9.2", + "rmp-serde", + "schemars", + "segment", + "serde", + "serde_cbor", + "serde_json", + "smallvec", + "sparse", + "strum", + "tempfile", + "thiserror 2.0.18", + "tonic", + "uuid", + "validator", + "wal", ] [[package]] @@ -5481,6 +4274,12 @@ version = "1.3.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64" +[[package]] +name = "sif-itree" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "142099cd6db3c4fab61e5133c62ff80b26674391e195860791fda0b1be3e5080" + [[package]] name = "signal-hook-registry" version = "1.4.6" @@ -5491,10 +4290,10 @@ dependencies = [ ] [[package]] -name = "simdutf8" -version = "0.1.5" +name = "simd-adler32" +version = "0.3.8" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e3a9fe34e3e7a50316060351f37187a3f546bce95496156754b601a5fa71b76e" +checksum = "e320a6c5ad31d271ad523dcf3ad13e2767ad8b1cb8f047f75a8aeaf8da139da2" [[package]] name = "simsimd" @@ -5511,15 +4310,6 @@ version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "56199f7ddabf13fe5074ce809e7d3f42b42ae711800501b5b16ea82ad029c39d" -[[package]] -name = "sketches-ddsketch" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1e9a774a6c28142ac54bb25d25562e6bcf957493a184f15ad4eebccb23e410a" -dependencies = [ - "serde", -] - [[package]] name = "slab" version = "0.4.11" @@ -5527,37 +4317,16 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "7a2ae44ef20feb57a68b23d846850f861394c2e02dc425a50098ae8c90267589" [[package]] -name = "smallvec" -version = "1.15.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" - -[[package]] -name = "snafu" -version = "0.8.9" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e84b3f4eacbf3a1ce05eac6763b4d629d60cbc94d632e4092c54ade71f1e1a2" -dependencies = [ - "snafu-derive", -] - -[[package]] -name = "snafu-derive" -version = "0.8.9" +name = "slice-group-by" +version = "0.3.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c1c97747dbf44bb1ca44a561ece23508e99cb592e862f22222dcf42f51d1e451" -dependencies = [ - "heck", - "proc-macro2", - "quote", - "syn 2.0.106", -] +checksum = "826167069c09b99d56f31e9ae5c99049e932a98c9dc2dac47645b08dbbf76ba7" [[package]] -name = "snap" -version = "1.1.1" +name = "smallvec" +version = "1.15.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1b6b67fb9a61334225b5b790716f609cd58395f895b3fe8b328786812a40bc3b" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" [[package]] name = "socket2" @@ -5602,6 +4371,32 @@ dependencies = [ "smallvec", ] +[[package]] +name = "sparse" +version = "0.1.0" +source = "git+https://github.com/qdrant/qdrant.git?branch=dev#c033e26cf45071bdb018d9345db8edcb1c901f30" +dependencies = [ + "bincode 1.3.3", + "bitpacking", + "common", + "fs-err", + "gridstore", + "half 2.7.1", + "io", + "itertools 0.14.0", + "log", + "memmap2", + "memory", + "ordered-float 5.1.0", + "parking_lot", + "rand 0.9.2", + "schemars", + "serde", + "serde_json", + "tempfile", + "validator", +] + [[package]] name = "spm_precompiled" version = "0.1.4" @@ -5614,47 +4409,12 @@ dependencies = [ "unicode-segmentation", ] -[[package]] -name = "sqlparser" -version = "0.58.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ec4b661c54b1e4b603b37873a18c59920e4c51ea8ea2cf527d925424dbd4437c" -dependencies = [ - "log", - "recursive", - "sqlparser_derive", -] - -[[package]] -name = "sqlparser_derive" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "da5fc6819faabb412da764b99d3b713bb55083c11e7e0c00144d386cd6a1939c" -dependencies = [ - "proc-macro2", - "quote", - "syn 2.0.106", -] - [[package]] name = "stable_deref_trait" version = "1.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" -[[package]] -name = "stacker" -version = "0.1.22" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1f8b29fb42aafcea4edeeb6b2f2d7ecd0d969c48b4cf0d2e64aafc471dd6e59" -dependencies = [ - "cc", - "cfg-if", - "libc", - "psm", - "windows-sys 0.59.0", -] - [[package]] name = "static_assertions" version = "1.1.0" @@ -5662,16 +4422,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" [[package]] -name = "std_prelude" -version = "0.2.12" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8207e78455ffdf55661170876f88daf85356e4edd54e0a3dbc79586ca1e50cbe" - -[[package]] -name = "stfu8" -version = "0.2.7" +name = "strsim" +version = "0.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e51f1e89f093f99e7432c491c382b88a6860a5adbe6bf02574bf0a08efff1978" +checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" [[package]] name = "strsim" @@ -5681,23 +4435,22 @@ checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" [[package]] name = "strum" -version = "0.26.3" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fec0f0aef304996cf250b31b5a10dee7980c85da9d759361292b8bca5a18f06" +checksum = "af23d6f6c1a224baef9d3f61e287d2761385a5b88fdab4eb4c6f11aeb54c4bcf" dependencies = [ "strum_macros", ] [[package]] name = "strum_macros" -version = "0.26.4" +version = "0.27.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4c6bee85a5a24955dc440386795aa378cd9cf82acd5f764469152d2270e581be" +checksum = "7695ce3845ea4b33927c055a39dc438a45b059f7c1b3d91d38d10355fb8cbca7" dependencies = [ "heck", "proc-macro2", "quote", - "rustversion", "syn 2.0.106", ] @@ -5714,7 +4467,6 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" dependencies = [ "proc-macro2", - "quote", "unicode-ident", ] @@ -5729,6 +4481,12 @@ dependencies = [ "unicode-ident", ] +[[package]] +name = "sync_wrapper" +version = "0.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2047c6ded9c721764247e62cd3b03c09ffc529b2ba5b10ec482ae507a4a70160" + [[package]] name = "sync_wrapper" version = "1.0.2" @@ -5750,173 +4508,46 @@ dependencies = [ ] [[package]] -name = "tagptr" -version = "0.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7b2093cf4c8eb1e67749a6762251bc9cd836b6fc171623bd0a9d324d37af2417" - -[[package]] -name = "tantivy" -version = "0.24.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "64a966cb0e76e311f09cf18507c9af192f15d34886ee43d7ba7c7e3803660c43" -dependencies = [ - "aho-corasick", - "arc-swap", - "base64 0.22.1", - "bitpacking", - "bon", - "byteorder", - "census", - "crc32fast", - "crossbeam-channel", - "downcast-rs", - "fastdivide", - "fnv", - "fs4", - "htmlescape", - "hyperloglogplus", - "itertools 0.14.0", - "levenshtein_automata", - "log", - "lru", - "lz4_flex", - "measure_time", - "memmap2", - "once_cell", - "oneshot", - "rayon", - "regex", - "rust-stemmers", - "rustc-hash", - "serde", - "serde_json", - "sketches-ddsketch", - "smallvec", - "tantivy-bitpacker", - "tantivy-columnar", - "tantivy-common", - "tantivy-fst", - "tantivy-query-grammar", - "tantivy-stacker", - "tantivy-tokenizer-api", - "tempfile", - "thiserror 2.0.16", - "time", - "uuid", - "winapi", -] - -[[package]] -name = "tantivy-bitpacker" -version = "0.8.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1adc286a39e089ae9938935cd488d7d34f14502544a36607effd2239ff0e2494" -dependencies = [ - "bitpacking", -] - -[[package]] -name = "tantivy-columnar" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6300428e0c104c4f7db6f95b466a6f5c1b9aece094ec57cdd365337908dc7344" -dependencies = [ - "downcast-rs", - "fastdivide", - "itertools 0.14.0", - "serde", - "tantivy-bitpacker", - "tantivy-common", - "tantivy-sstable", - "tantivy-stacker", -] - -[[package]] -name = "tantivy-common" -version = "0.9.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e91b6ea6090ce03dc72c27d0619e77185d26cc3b20775966c346c6d4f7e99d7f" -dependencies = [ - "async-trait", - "byteorder", - "ownedbytes", - "serde", - "time", -] - -[[package]] -name = "tantivy-fst" -version = "0.5.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d60769b80ad7953d8a7b2c70cdfe722bbcdcac6bccc8ac934c40c034d866fc18" -dependencies = [ - "byteorder", - "regex-syntax", - "utf8-ranges", -] - -[[package]] -name = "tantivy-query-grammar" -version = "0.24.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e810cdeeebca57fc3f7bfec5f85fdbea9031b2ac9b990eb5ff49b371d52bbe6a" -dependencies = [ - "nom 7.1.3", - "serde", - "serde_json", -] - -[[package]] -name = "tantivy-sstable" -version = "0.5.0" +name = "sysinfo" +version = "0.38.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "709f22c08a4c90e1b36711c1c6cad5ae21b20b093e535b69b18783dd2cb99416" +checksum = "fe840c5b1afe259a5657392a4dbb74473a14c8db999c3ec2f4ae812e028a94da" dependencies = [ - "futures-util", - "itertools 0.14.0", - "tantivy-bitpacker", - "tantivy-common", - "tantivy-fst", - "zstd", + "libc", + "memchr", + "ntapi", + "objc2-core-foundation", + "objc2-io-kit", + "windows 0.62.1", ] [[package]] -name = "tantivy-stacker" -version = "0.5.0" +name = "tap" +version = "1.0.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2bcdebb267671311d1e8891fd9d1301803fdb8ad21ba22e0a30d0cab49ba59c1" -dependencies = [ - "murmurhash32", - "rand_distr 0.4.3", - "tantivy-common", -] +checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" [[package]] -name = "tantivy-tokenizer-api" -version = "0.5.0" +name = "tar" +version = "0.4.44" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "dfa942fcee81e213e09715bbce8734ae2180070b97b33839a795ba1de201547d" +checksum = "1d863878d212c87a19c1a610eb53bb01fe12951c0501cf5a0d65f724914a667a" dependencies = [ - "serde", + "filetime", + "libc", + "xattr", ] -[[package]] -name = "tap" -version = "1.0.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "55937e1799185b12863d447f42597ed69d9928686b8d88a1df17376a097d8369" - [[package]] name = "tempfile" -version = "3.22.0" +version = "3.24.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "84fa4d11fadde498443cca10fd3ac23c951f0dc59e080e9f4b93d4df4e4eea53" +checksum = "655da9c7eb6305c55742045d5a8d2037996d61d8de95806335c7c86ce0f82e9c" dependencies = [ "fastrand", "getrandom 0.3.3", "once_cell", - "rustix 1.1.2", + "rustix 1.1.3", "windows-sys 0.61.0", ] @@ -5940,11 +4571,11 @@ dependencies = [ [[package]] name = "thiserror" -version = "2.0.16" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3467d614147380f2e4e374161426ff399c91084acd2363eaf549172b3d5e60c0" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" dependencies = [ - "thiserror-impl 2.0.16", + "thiserror-impl 2.0.18", ] [[package]] @@ -5960,9 +4591,9 @@ dependencies = [ [[package]] name = "thiserror-impl" -version = "2.0.16" +version = "2.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6c5e1be1c48b9172ee610da68fd9cd2770e7a4056cb3fc98710ee6906f0c7960" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" dependencies = [ "proc-macro2", "quote", @@ -5970,71 +4601,17 @@ dependencies = [ ] [[package]] -name = "thread-tree" -version = "0.3.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ffbd370cb847953a25954d9f63e14824a36113f8c72eecf6eccef5dc4b45d630" -dependencies = [ - "crossbeam-channel", -] - -[[package]] -name = "thread_local" -version = "1.1.9" +name = "thread-priority" +version = "3.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f60246a4944f24f6e018aa17cdeffb7818b76356965d03b07d6a9886e8962185" +checksum = "2210811179577da3d54eb69ab0b50490ee40491a25d95b8c6011ba40771cb721" dependencies = [ + "bitflags 2.9.3", "cfg-if", -] - -[[package]] -name = "thrift" -version = "0.17.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7e54bc85fc7faa8bc175c4bab5b92ba8d9a3ce893d0e9f42cc455c8ab16a9e09" -dependencies = [ - "byteorder", - "integer-encoding", - "ordered-float 2.10.1", -] - -[[package]] -name = "time" -version = "0.3.43" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "83bde6f1ec10e72d583d91623c939f623002284ef622b87de38cfd546cbf2031" -dependencies = [ - "deranged", - "num-conv", - "powerfmt", - "serde", - "time-core", - "time-macros", -] - -[[package]] -name = "time-core" -version = "0.1.6" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40868e7c1d2f0b8d73e4a8c7f0ff63af4f6d19be117e90bd73eb1d62cf831c6b" - -[[package]] -name = "time-macros" -version = "0.2.24" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "30cfb0125f12d9c277f35663a0a33f8c30190f4e4574868a330595412d34ebf3" -dependencies = [ - "num-conv", - "time-core", -] - -[[package]] -name = "tiny-keccak" -version = "2.0.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2c9d3793400a45f954c52e73d068316d76b6f4e36977e3fcebb13a2721e80237" -dependencies = [ - "crunchy", + "libc", + "log", + "rustversion", + "windows 0.61.3", ] [[package]] @@ -6049,9 +4626,9 @@ dependencies = [ [[package]] name = "tinyvec" -version = "1.9.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09b3661f17e86524eccd4371ab0429194e0d7c008abb45f7a7495b1719463c71" +checksum = "bfa5fdc3bce6191a1dbc8c02d5c8bffcf557bafa17c124c5264a458f1b0613fa" dependencies = [ "tinyvec_macros", ] @@ -6076,7 +4653,7 @@ dependencies = [ "esaxx-rs", "getrandom 0.3.3", "hf-hub", - "indicatif", + "indicatif 0.17.11", "itertools 0.14.0", "log", "macro_rules_attribute", @@ -6091,7 +4668,7 @@ dependencies = [ "serde", "serde_json", "spm_precompiled", - "thiserror 2.0.16", + "thiserror 2.0.18", "unicode-normalization-alignments", "unicode-segmentation", "unicode_categories", @@ -6099,42 +4676,60 @@ dependencies = [ [[package]] name = "tokio" -version = "1.47.1" +version = "1.49.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "89e49afdadebb872d3145a5638b59eb0691ea23e46ca484037cfab3b76b95038" +checksum = "72a2903cd7736441aac9df9d7688bd0ce48edccaadf181c3b90be801e81d3d86" dependencies = [ - "backtrace", "bytes", - "io-uring", "libc", "mio", "parking_lot", "pin-project-lite", "signal-hook-registry", - "slab", "socket2 0.6.0", "tokio-macros", - "windows-sys 0.59.0", + "windows-sys 0.61.0", +] + +[[package]] +name = "tokio-io-timeout" +version = "1.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0bd86198d9ee903fedd2f9a2e72014287c0d9167e4ae43b5853007205dda1b76" +dependencies = [ + "pin-project-lite", + "tokio", ] [[package]] name = "tokio-macros" -version = "2.5.0" +version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "6e06d43f1345a3bcd39f6a56dbb7dcab2ba47e68e8ac134855e7e2bdbaf8cab8" +checksum = "af407857209536a95c8e56f8231ef2c2e2aff839b22e07a1ffcbc617e9db9fa5" dependencies = [ "proc-macro2", "quote", "syn 2.0.106", ] +[[package]] +name = "tokio-rustls" +version = "0.25.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "775e0c0f0adb3a2f22a00c4745d728b479985fc15ee7ca6a2608388c5569860f" +dependencies = [ + "rustls 0.22.4", + "rustls-pki-types", + "tokio", +] + [[package]] name = "tokio-rustls" version = "0.26.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8e727b36a1a0e8b74c376ac2211e40c2c8af09fb4013c60d910495810f008e9b" dependencies = [ - "rustls", + "rustls 0.23.31", "tokio", ] @@ -6163,33 +4758,67 @@ dependencies = [ ] [[package]] -name = "toml_datetime" -version = "0.7.5+spec-1.1.0" +name = "tonic" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "92e1cfed4a3038bc5a127e35a2d360f145e1f4b971b551a2ba5fd7aedf7e1347" +checksum = "76c4eb7a4e9ef9d4763600161f12f5070b92a578e1b634db88a6887844c91a13" dependencies = [ - "serde_core", + "async-stream", + "async-trait", + "axum", + "base64 0.21.7", + "bytes", + "flate2", + "h2 0.3.27", + "http 0.2.12", + "http-body 0.4.6", + "hyper 0.14.32", + "hyper-timeout", + "percent-encoding", + "pin-project", + "prost", + "rustls-pemfile", + "rustls-pki-types", + "tokio", + "tokio-rustls 0.25.0", + "tokio-stream", + "tower 0.4.13", + "tower-layer", + "tower-service", + "tracing", ] [[package]] -name = "toml_edit" -version = "0.23.5" +name = "tonic-build" +version = "0.11.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c2ad0b7ae9cfeef5605163839cb9221f453399f15cfb5c10be9885fcf56611f9" +checksum = "be4ef6dd70a610078cb4e338a0f79d06bc759ff1b22d2120c2ff02ae264ba9c2" dependencies = [ - "indexmap 2.11.1", - "toml_datetime", - "toml_parser", - "winnow", + "prettyplease", + "proc-macro2", + "prost-build", + "quote", + "syn 2.0.106", ] [[package]] -name = "toml_parser" -version = "1.0.6+spec-1.1.0" +name = "tower" +version = "0.4.13" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a3198b4b0a8e11f09dd03e133c0280504d0801269e9afa46362ffde1cbeebf44" +checksum = "b8fa9be0de6cf49e536ce1851f987bd21a43b771b09473c3549a6c853db37c1c" dependencies = [ - "winnow", + "futures-core", + "futures-util", + "indexmap 1.9.3", + "pin-project", + "pin-project-lite", + "rand 0.8.5", + "slab", + "tokio", + "tokio-util", + "tower-layer", + "tower-service", + "tracing", ] [[package]] @@ -6201,7 +4830,7 @@ dependencies = [ "futures-core", "futures-util", "pin-project-lite", - "sync_wrapper", + "sync_wrapper 1.0.2", "tokio", "tower-layer", "tower-service", @@ -6213,14 +4842,14 @@ version = "0.6.8" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8" dependencies = [ - "bitflags", + "bitflags 2.9.3", "bytes", "futures-util", - "http", - "http-body", + "http 1.3.1", + "http-body 1.0.1", "iri-string", "pin-project-lite", - "tower", + "tower 0.5.2", "tower-layer", "tower-service", ] @@ -6266,58 +4895,49 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b9d12581f227e93f094d3af2ae690a574abb8a2b9b7a96e7cfe9647b2b617678" dependencies = [ "once_cell", - "valuable", ] [[package]] -name = "tracing-log" -version = "0.2.0" +name = "try-lock" +version = "0.2.5" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee855f1f400bd0e5c02d150ae5de3840039a3f54b025156404e34c23c03f47c3" -dependencies = [ - "log", - "once_cell", - "tracing-core", -] +checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" [[package]] -name = "tracing-subscriber" -version = "0.3.20" +name = "typeid" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2054a14f5307d601f88daf0553e1cbf472acc4f2c51afab632431cdcd72124d5" -dependencies = [ - "matchers", - "nu-ansi-term", - "once_cell", - "regex-automata", - "sharded-slab", - "smallvec", - "thread_local", - "tracing", - "tracing-core", - "tracing-log", -] +checksum = "bc7d623258602320d5c55d1bc22793b57daff0ec7efc270ea7d55ce1d5f5471c" [[package]] -name = "try-lock" -version = "0.2.5" +name = "typenum" +version = "1.18.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b" +checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" [[package]] -name = "twox-hash" -version = "2.1.2" +name = "typetag" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ea3136b675547379c4bd395ca6b938e5ad3c3d20fad76e7fe85f9e0d011419c" +checksum = "be2212c8a9b9bcfca32024de14998494cf9a5dfa59ea1b829de98bac374b86bf" dependencies = [ - "rand 0.9.2", + "erased-serde", + "inventory", + "once_cell", + "serde", + "typetag-impl", ] [[package]] -name = "typenum" -version = "1.18.0" +name = "typetag-impl" +version = "0.2.21" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1dccffe3ce07af9386bfd29e80c0ab1a8205a2fc34e4bcd40364df902cfa8f3f" +checksum = "27a7a9b72ba121f6f1f6c3632b85604cac41aedb5ddc70accbebb6cac83de846" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.106", +] [[package]] name = "unicase" @@ -6331,6 +4951,15 @@ version = "1.0.18" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "5a5f39404a5da50712a4c1eecf25e90dd62b613502b7e925fd4e4d19b5c96512" +[[package]] +name = "unicode-normalization" +version = "0.1.25" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "5fd4f6878c9cb28d874b009da9e8d183b5abc80117c40bbd187a1fde336be6e8" +dependencies = [ + "tinyvec", +] + [[package]] name = "unicode-normalization-alignments" version = "0.1.12" @@ -6358,12 +4987,24 @@ version = "0.1.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "39ec24b3121d976906ece63c9daad25b85969647682eee313cb5779fdd69e14e" +[[package]] +name = "unit-prefix" +version = "0.5.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "81e544489bf3d8ef66c953931f56617f423cd4b5494be343d9b9d3dda037b9a3" + [[package]] name = "untrusted" version = "0.9.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1" +[[package]] +name = "unty" +version = "0.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6d49784317cd0d1ee7ec5c716dd598ec5b4483ea832a2dced265471cc0f690ae" + [[package]] name = "ureq" version = "2.12.1" @@ -6374,7 +5015,7 @@ dependencies = [ "flate2", "log", "once_cell", - "rustls", + "rustls 0.23.31", "rustls-pki-types", "serde", "serde_json", @@ -6395,12 +5036,6 @@ dependencies = [ "serde", ] -[[package]] -name = "utf8-ranges" -version = "1.0.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7fcfc827f90e53a02eaef5e535ee14266c1d569214c6aa70133a624d8a3164ba" - [[package]] name = "utf8_iter" version = "1.0.4" @@ -6415,21 +5050,56 @@ checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" [[package]] name = "uuid" -version = "1.18.1" +version = "1.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "2f87b8aa10b915a06587d0dec516c282ff295b475d94abf425d62b57710070a2" +checksum = "ee48d38b119b0cd71fe4141b30f5ba9c7c5d9f4e7a3a8b4a674e4b6ef789976f" dependencies = [ "getrandom 0.3.3", "js-sys", - "serde", + "serde_core", "wasm-bindgen", ] [[package]] -name = "valuable" -version = "0.1.1" +name = "validator" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "43fb22e1a008ece370ce08a3e9e4447a910e92621bb49b85d6e48a45397e7cfa" +dependencies = [ + "idna", + "once_cell", + "regex", + "serde", + "serde_derive", + "serde_json", + "url", + "validator_derive", +] + +[[package]] +name = "validator_derive" +version = "0.20.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ba73ea9cf16a25df0c8caa16c51acb937d5712a8429db78a3ee29d5dcacd3a65" +checksum = "b7df16e474ef958526d1205f6dda359fdfab79d9aa6d54bafcb92dcd07673dca" +dependencies = [ + "darling", + "once_cell", + "proc-macro-error2", + "proc-macro2", + "quote", + "syn 2.0.106", +] + +[[package]] +name = "vaporetto" +version = "0.6.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2d7437bd3d45100e1ed1a284187ce4e9ee863f1fdac97b7eaa614623741464c6" +dependencies = [ + "bincode 2.0.1", + "daachorse", + "hashbrown 0.15.5", +] [[package]] name = "version_check" @@ -6437,6 +5107,31 @@ version = "0.9.5" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "0b928f33d975fc6ad9f86c8f283853ad26bdd5b10b7f1542aa2fa15e2289105a" +[[package]] +name = "virtue" +version = "0.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "051eb1abcf10076295e815102942cc58f9d5e3b4560e46e53c21e8ff6f3af7b1" + +[[package]] +name = "wal" +version = "0.1.4" +source = "git+https://github.com/qdrant/wal.git?rev=2209eb2c4a359d9c32f63d868c580df360efa4a9#2209eb2c4a359d9c32f63d868c580df360efa4a9" +dependencies = [ + "byteorder", + "crc32c", + "crossbeam-channel", + "docopt", + "env_logger", + "fs4", + "log", + "memmap2", + "rand 0.9.2", + "rand_distr", + "rustix 1.1.3", + "serde", +] + [[package]] name = "walkdir" version = "2.5.0" @@ -6456,6 +5151,12 @@ dependencies = [ "try-lock", ] +[[package]] +name = "wasi" +version = "0.9.0+wasi-snapshot-preview1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cccddf32554fecc6acb585f82a32a72e28b48f8c4c1883ddfeeeaa96f7d8e519" + [[package]] name = "wasi" version = "0.11.1+wasi-snapshot-preview1" @@ -6593,6 +5294,16 @@ dependencies = [ "rustls-pki-types", ] +[[package]] +name = "whatlang" +version = "0.16.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "471d1c1645d361eb782a1650b1786a8fb58dd625e681a04c09f5ff7c8764a7b0" +dependencies = [ + "hashbrown 0.14.5", + "once_cell", +] + [[package]] name = "winapi" version = "0.3.9" @@ -6630,11 +5341,23 @@ version = "0.61.3" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9babd3a767a4c1aef6900409f85f5d53ce2544ccdfaa86dad48c91782c6d6893" dependencies = [ - "windows-collections", - "windows-core", - "windows-future", + "windows-collections 0.2.0", + "windows-core 0.61.2", + "windows-future 0.2.1", "windows-link 0.1.3", - "windows-numerics", + "windows-numerics 0.2.0", +] + +[[package]] +name = "windows" +version = "0.62.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "49e6c4a1f363c8210c6f77ba24f645c61c6fb941eccf013da691f7e09515b8ac" +dependencies = [ + "windows-collections 0.3.1", + "windows-core 0.62.1", + "windows-future 0.3.1", + "windows-numerics 0.3.0", ] [[package]] @@ -6643,7 +5366,16 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3beeceb5e5cfd9eb1d76b381630e82c4241ccd0d27f1a39ed41b2760b255c5e8" dependencies = [ - "windows-core", + "windows-core 0.61.2", +] + +[[package]] +name = "windows-collections" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "123e712f464a8a60ce1a13f4c446d2d43ab06464cb5842ff68f5c71b6fb7852e" +dependencies = [ + "windows-core 0.62.1", ] [[package]] @@ -6655,8 +5387,21 @@ dependencies = [ "windows-implement", "windows-interface", "windows-link 0.1.3", - "windows-result", - "windows-strings", + "windows-result 0.3.4", + "windows-strings 0.4.2", +] + +[[package]] +name = "windows-core" +version = "0.62.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6844ee5416b285084d3d3fffd743b925a6c9385455f64f6d4fa3031c4c2749a9" +dependencies = [ + "windows-implement", + "windows-interface", + "windows-link 0.2.0", + "windows-result 0.4.0", + "windows-strings 0.5.0", ] [[package]] @@ -6665,16 +5410,27 @@ version = "0.2.1" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "fc6a41e98427b19fe4b73c550f060b59fa592d7d686537eebf9385621bfbad8e" dependencies = [ - "windows-core", + "windows-core 0.61.2", "windows-link 0.1.3", - "windows-threading", + "windows-threading 0.1.0", +] + +[[package]] +name = "windows-future" +version = "0.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "68f3db6b24b120200d649cd4811b4947188ed3a8d2626f7075146c5d178a9a4a" +dependencies = [ + "windows-core 0.62.1", + "windows-link 0.2.0", + "windows-threading 0.2.0", ] [[package]] name = "windows-implement" -version = "0.60.0" +version = "0.60.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a47fddd13af08290e67f4acabf4b459f647552718f683a7b415d290ac744a836" +checksum = "053e2e040ab57b9dc951b72c264860db7eb3b0200ba345b4e4c3b14f67855ddf" dependencies = [ "proc-macro2", "quote", @@ -6683,9 +5439,9 @@ dependencies = [ [[package]] name = "windows-interface" -version = "0.59.1" +version = "0.59.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bd9211b69f8dcdfa817bfd14bf1c97c9188afa36f4750130fcdf3f400eca9fa8" +checksum = "3f316c4a2570ba26bbec722032c4099d8c8bc095efccdc15688708623367e358" dependencies = [ "proc-macro2", "quote", @@ -6710,10 +5466,20 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "9150af68066c4c5c07ddc0ce30421554771e528bde427614c61038bc2c92c2b1" dependencies = [ - "windows-core", + "windows-core 0.61.2", "windows-link 0.1.3", ] +[[package]] +name = "windows-numerics" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2ce3498fe0aba81e62e477408383196b4b0363db5e0c27646f932676283b43d8" +dependencies = [ + "windows-core 0.62.1", + "windows-link 0.2.0", +] + [[package]] name = "windows-result" version = "0.3.4" @@ -6723,6 +5489,15 @@ dependencies = [ "windows-link 0.1.3", ] +[[package]] +name = "windows-result" +version = "0.4.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7084dcc306f89883455a206237404d3eaf961e5bd7e0f312f7c91f57eb44167f" +dependencies = [ + "windows-link 0.2.0", +] + [[package]] name = "windows-strings" version = "0.4.2" @@ -6732,6 +5507,15 @@ dependencies = [ "windows-link 0.1.3", ] +[[package]] +name = "windows-strings" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7218c655a553b0bed4426cf54b20d7ba363ef543b52d515b3e48d7fd55318dda" +dependencies = [ + "windows-link 0.2.0", +] + [[package]] name = "windows-sys" version = "0.48.0" @@ -6834,6 +5618,15 @@ dependencies = [ "windows-link 0.1.3", ] +[[package]] +name = "windows-threading" +version = "0.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ab47f085ad6932defa48855254c758cdd0e2f2d48e62a34118a268d8f345e118" +dependencies = [ + "windows-link 0.2.0", +] + [[package]] name = "windows_aarch64_gnullvm" version = "0.48.5" @@ -6972,55 +5765,30 @@ version = "0.53.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "271414315aff87387382ec3d271b52d7ae78726f5d44ac98b4f4030c91880486" -[[package]] -name = "winnow" -version = "0.7.14" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5a5364e9d77fcdeeaa6062ced926ee3381faa2ee02d3eb83a5c27a8825540829" -dependencies = [ - "memchr", -] - [[package]] name = "wit-bindgen-rt" version = "0.39.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "6f42320e61fe2cfd34354ecb597f86f413484a798ba44a8ca1165c58d42da6c1" dependencies = [ - "bitflags", + "bitflags 2.9.3", ] [[package]] -name = "wkb" -version = "0.9.2" +name = "writeable" +version = "0.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a120b336c7ad17749026d50427c23d838ecb50cd64aaea6254b5030152f890a9" -dependencies = [ - "byteorder", - "geo-traits", - "num_enum", - "thiserror 1.0.69", -] +checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" [[package]] -name = "wkt" -version = "0.14.0" +name = "wyhash" +version = "0.5.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "efb2b923ccc882312e559ffaa832a055ba9d1ac0cc8e86b3e25453247e4b81d7" +checksum = "baf6e163c25e3fac820b4b453185ea2dea3b6a3e0a721d4d23d75bd33734c295" dependencies = [ - "geo-traits", - "geo-types", - "log", - "num-traits", - "thiserror 1.0.69", + "rand_core 0.6.4", ] -[[package]] -name = "writeable" -version = "0.6.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ea2f10b9bb0928dfb1b42b65e1f9e36f7f54dbdf08457afefb38afcdec4fa2bb" - [[package]] name = "wyz" version = "0.5.1" @@ -7031,19 +5799,20 @@ dependencies = [ ] [[package]] -name = "xxhash-rust" -version = "0.8.15" +name = "xattr" +version = "1.6.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" +checksum = "32e45ad4206f6d2479085147f02bc2ef834ac85886624a23575ae137c8aa8156" +dependencies = [ + "libc", + "rustix 1.1.3", +] [[package]] -name = "xz2" -version = "0.1.7" +name = "xxhash-rust" +version = "0.8.15" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "388c44dc09d76f1536602ead6d325eb532f5c122f17782bd57fb47baeeb767e2" -dependencies = [ - "lzma-sys", -] +checksum = "fdd20c5420375476fbd4394763288da7eb0cc0b8c11deed431a91562af7335d3" [[package]] name = "yoke" @@ -7071,18 +5840,18 @@ dependencies = [ [[package]] name = "zerocopy" -version = "0.8.26" +version = "0.8.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1039dd0d3c310cf05de012d8a39ff557cb0d23087fd44cad61df08fc31907a2f" +checksum = "57cf3aa6855b23711ee9852dfc97dfaa51c45feaba5b645d0c777414d494a961" dependencies = [ "zerocopy-derive", ] [[package]] name = "zerocopy-derive" -version = "0.8.26" +version = "0.8.38" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ecf5b4cc5364572d7f4c329661bcc82724222973f2cab6f050a4e5c22f75181" +checksum = "8a616990af1a287837c4fe6596ad77ef57948f787e46ce28e166facc0cc1cb75" dependencies = [ "proc-macro2", "quote", @@ -7149,12 +5918,6 @@ dependencies = [ "syn 2.0.106", ] -[[package]] -name = "zlib-rs" -version = "0.5.5" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "40990edd51aae2c2b6907af74ffb635029d5788228222c4bb811e9351c0caad3" - [[package]] name = "zstd" version = "0.13.3" diff --git a/Cargo.toml b/Cargo.toml index 049904c4..248db743 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -52,10 +52,11 @@ model2vec-rs = { version = "0.1.3", optional = true } simsimd = { version = "6.5.1", optional = true } # Workspace-specific dependencies -lancedb = { version = "0.23.1", default-features = false, optional = true } -arrow-schema = { version = "56.2.0", optional = true } -arrow-array = { version = "56.2.0", optional = true } rand = { version = "0.8.5", optional = true } +edge = { git = "https://github.com/qdrant/qdrant.git", branch = "dev", package = "edge", optional = true } +segment = { git = "https://github.com/qdrant/qdrant.git", branch = "dev", package = "segment", optional = true } +shard = { git = "https://github.com/qdrant/qdrant.git", branch = "dev", package = "shard", optional = true } +ordered-float = { version = "5.1.0", optional = true } # Ask dependencies async-openai = { version = "0.31.0-beta.1", features = ["chat-completion", "responses"], optional = true } @@ -66,11 +67,11 @@ grep-regex = { version = "0.1", optional = true } regex = { version = "1.11", optional = true } [dev-dependencies] -tempfile = "3.14.0" +tempfile = "3.24.0" [features] default = ["parse", "search", "workspace", "ask"] parse = ["bytes", "reqwest", "tokio", "sha2", "hex", "mime_guess"] search = ["model2vec-rs", "simsimd"] -workspace = ["tokio", "lancedb", "arrow-schema", "arrow-array", "rand"] +workspace = ["tokio", "rand", "edge", "segment", "shard", "ordered-float"] ask = ["async-openai", "model2vec-rs", "simsimd", "tokio", "grep", "grep-searcher", "grep-matcher", "grep-regex", "regex"] diff --git a/src/bin/workspace.rs b/src/bin/workspace.rs index 4946ca19..b57a1382 100644 --- a/src/bin/workspace.rs +++ b/src/bin/workspace.rs @@ -50,9 +50,8 @@ async fn main() -> Result<()> { if args.json { // Try to get document count from store, or use 0 for new workspace - let total_documents = if let Ok(store) = Store::open(&ws.config.root_dir).await - { - if let Ok(stats) = store.get_stats().await { + let total_documents = if let Ok(store) = Store::open(&ws.config.root_dir) { + if let Ok(stats) = store.get_stats() { stats.total_documents } else { 0 @@ -97,8 +96,8 @@ async fn main() -> Result<()> { let ws = Workspace::open()?; // Open store and get stats - let store = Store::open(&ws.config.root_dir).await?; - let stats = store.get_stats().await?; + let store = Store::open(&ws.config.root_dir)?; + let stats = store.get_stats()?; if args.json { let output = WorkspaceOutput { @@ -139,10 +138,10 @@ async fn main() -> Result<()> { { let _name = Workspace::active().context("No active workspace")?; let ws = Workspace::open()?; - let store = Store::open(&ws.config.root_dir).await?; + let store = Store::open(&ws.config.root_dir)?; // Get all document paths from the workspace - let all_paths = store.get_all_document_paths().await?; + let all_paths = store.get_all_document_paths()?; let total_before = all_paths.len(); // Check which files no longer exist @@ -158,7 +157,7 @@ async fn main() -> Result<()> { if !missing_paths.is_empty() { // Remove stale documents - store.delete_documents(&missing_paths).await?; + store.delete_documents(&missing_paths)?; } if args.json { diff --git a/src/search/mod.rs b/src/search/mod.rs index 070db7b1..eb3e6d85 100644 --- a/src/search/mod.rs +++ b/src/search/mod.rs @@ -151,10 +151,10 @@ pub async fn search_with_workspace( ) -> Result> { let query_embedding = model.encode_single(query); let ws = Workspace::open()?; - let store = Store::open(&ws.config.root_dir).await?; + let store = Store::open(&ws.config.root_dir)?; // Step 1: Analyze document states (changed/new/unchanged) - let doc_states = store.analyze_document_states(files).await?; + let doc_states = store.analyze_document_states(files)?; // Step 2: Process documents that need embedding updates let mut line_embeddings_to_upsert = Vec::new(); @@ -190,21 +190,18 @@ pub async fn search_with_workspace( // Step 3: Update workspace with new/changed line embeddings if !line_embeddings_to_upsert.is_empty() { - store - .upsert_line_embeddings(&line_embeddings_to_upsert) - .await?; + store.upsert_line_embeddings(&line_embeddings_to_upsert)?; } // Also update document metadata for tracking changes if !docs_to_upsert.is_empty() { - store.upsert_document_metadata(&docs_to_upsert).await?; + store.upsert_document_metadata(&docs_to_upsert)?; } // Step 4: Search line embeddings directly from the workspace let max_distance = config.max_distance.map(|d| d as f32); - let ranked_lines = store - .search_line_embeddings(&query_embedding, files, config.top_k, max_distance) - .await?; + let ranked_lines = + store.search_line_embeddings(&query_embedding, files, config.top_k, max_distance)?; Ok(ranked_lines) } diff --git a/src/workspace/store.rs b/src/workspace/store.rs index 49964ba4..75cf169e 100644 --- a/src/workspace/store.rs +++ b/src/workspace/store.rs @@ -1,18 +1,29 @@ -use anyhow::{Context, Result, anyhow, bail}; -use arrow_array::types::Float32Type; -use arrow_array::{ - FixedSizeListArray, Float32Array, Float64Array, Int32Array, Int64Array, RecordBatch, - RecordBatchIterator, StringArray, UInt64Array, +//! Qdrant Edge storage wrapper +use anyhow::{Result, anyhow}; + +use edge::EdgeShard; +use ordered_float::OrderedFloat; +use segment::data_types::vectors::{NamedQuery, VectorInternal, VectorStructInternal}; +use segment::json_path::JsonPath; +use segment::types::{ + AnyVariants, Condition, Distance, ExtendedPointId, FieldCondition, Filter, Match, Payload, + PayloadStorageType, SegmentConfig, ValueVariants, VectorDataConfig, VectorStorageType, + WithPayloadInterface, WithVector, }; -use arrow_schema::{DataType, Field, Schema}; -use futures::TryStreamExt; -use lancedb::index::Index; -use lancedb::query::{ExecutableQuery, QueryBase}; +use serde::{Deserialize, Serialize}; +use serde_json::Value; +use shard::count::CountRequestInternal; +use shard::operations::CollectionUpdateOperations; +use shard::operations::point_ops::{ + PointInsertOperationsInternal, PointOperations, PointStructPersisted, +}; +use shard::query::query_enum::QueryEnum; +use shard::query::{ScoringQuery, ShardQueryRequest}; +use shard::scroll::ScrollRequestInternal; use std::collections::HashMap; use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; use std::path::Path; -use std::sync::Arc; use crate::search::DocumentInfo; @@ -23,7 +34,16 @@ use crate::search::DocumentInfo; /// we treat all existing documents as version 1. pub const CURRENT_EMBEDDING_VERSION: u32 = 2; -#[derive(Debug, Clone)] +/// Embedding size (needed to inform Qdrant collection when it is instantiated) +pub const EMBEDDING_SIZE: usize = 256; + +/// Vector name used in the documents shard +const DOCUMENTS_VECTOR_NAME: &str = "documents"; + +/// Vector name used in the line embeddings shard +const LINE_EMBEDDINGS_VECTOR_NAME: &str = "line_embeddings"; + +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct DocMeta { pub path: String, pub size_bytes: u64, @@ -38,10 +58,11 @@ pub enum DocumentState { New(DocumentInfo), // Full document info for processing } -#[derive(Debug, Clone)] +#[derive(Debug, Clone, Serialize, Deserialize)] pub struct LineEmbedding { pub path: String, pub line_number: i32, + #[serde(skip)] pub embedding: Vec, } @@ -80,122 +101,153 @@ pub struct WorkspaceStats { pub index_type: Option, } +/// Metadata stored with each vector. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ChunkPayload { + /// Relative file path from repo root + pub path: String, + /// Chunk index within the file + pub chunk_index: usize, + /// Starting line number + pub start_line: usize, + /// Ending line number + pub end_line: usize, + /// The actual text content + pub text: String, + /// File hash for change detection + pub file_hash: String, +} + +/// A search result. +#[derive(Debug, Clone)] +pub struct SearchResult { + /// Score (similarity) + pub score: f32, + /// The payload + pub payload: ChunkPayload, +} + +/// Storage wrapper around Qdrant Edge. pub struct Store { - db: lancedb::Connection, + documents_shard: EdgeShard, + line_embeddings_shard: EdgeShard, } impl Store { - pub async fn open(workspace_dir: &str) -> Result { - let db_path = Path::new(workspace_dir) - .join("documents.lance") - .to_string_lossy() - .to_string(); - let db = lancedb::connect(&db_path) - .execute() - .await - .with_context(|| format!("failed to open LanceDB connection at {db_path}"))?; - - Ok(Self { db }) - } + /// Initialize or load storage for a workspace directory + pub fn open(workspace_dir: &str) -> Result { + let document_shard_path = Path::new(workspace_dir).join("documents"); + + let line_embeddings_shard_path = Path::new(workspace_dir).join("line_embeddings"); + + // Create shard directories + std::fs::create_dir_all(&document_shard_path)?; + std::fs::create_dir_all(&line_embeddings_shard_path)?; + + // Create segment config for the shard + let mut vector_data_document_shard = HashMap::new(); + vector_data_document_shard.insert( + DOCUMENTS_VECTOR_NAME.to_string(), + VectorDataConfig { + size: EMBEDDING_SIZE, + distance: Distance::Cosine, + storage_type: VectorStorageType::ChunkedMmap, + index: Default::default(), + quantization_config: None, + multivector_config: None, + datatype: None, + }, + ); - /// Get existing document metadata for the given paths - pub async fn get_existing_docs(&self, paths: &[String]) -> Result> { - let mut existing = HashMap::new(); + let segment_config_document_shard = SegmentConfig { + vector_data: vector_data_document_shard, + sparse_vector_data: HashMap::new(), + payload_storage_type: PayloadStorageType::Mmap, + }; - // Check if documents table exists - let tables = self - .db - .table_names() - .execute() - .await - .context("failed to list LanceDB tables")?; - if !tables.contains(&"documents".to_string()) { - return Ok(existing); - } + let document_shard = + EdgeShard::load(&document_shard_path, Some(segment_config_document_shard))?; + + let mut vector_data_line_embeddings_shard = HashMap::new(); + vector_data_line_embeddings_shard.insert( + DOCUMENTS_VECTOR_NAME.to_string(), + VectorDataConfig { + size: EMBEDDING_SIZE, + distance: Distance::Cosine, + storage_type: VectorStorageType::ChunkedMmap, + index: Default::default(), + quantization_config: None, + multivector_config: None, + datatype: None, + }, + ); + + let segment_config_line_embeddings_shard = SegmentConfig { + vector_data: vector_data_line_embeddings_shard, + sparse_vector_data: HashMap::new(), + payload_storage_type: PayloadStorageType::Mmap, + }; + + let line_embeddings_shard = EdgeShard::load( + &line_embeddings_shard_path, + Some(segment_config_line_embeddings_shard), + )?; + + Ok(Self { + documents_shard: document_shard, + line_embeddings_shard: line_embeddings_shard, + }) + } - let tbl = self - .db - .open_table("documents") - .execute() - .await - .context("failed to open 'documents' table")?; + pub fn get_existing_docs(&self, paths: &[String]) -> Result> { + let mut existing = HashMap::new(); - // Query in chunks to avoid overly long IN(...) filters for chunk in paths.chunks(1000) { - let filter_expr = build_in_filter(chunk); - - let stream = tbl - .query() - .only_if(filter_expr) - .execute() - .await - .context("failed to execute documents query")?; - - let batches: Vec = stream - .try_collect() - .await - .context("failed to collect query result batches")?; - - for batch in batches { - let schema = batch.schema(); - let path_idx = schema - .index_of("path") - .context("missing 'path' column in documents schema")?; - let size_idx = schema - .index_of("size_bytes") - .context("missing 'size_bytes' column in documents schema")?; - let mtime_idx = schema - .index_of("mtime") - .context("missing 'mtime' column in documents schema")?; - // Optional version column (backwards compatibility) - let version_idx = schema.index_of("_version").ok(); - - let path_array = batch - .column(path_idx) - .as_any() - .downcast_ref::() - .ok_or_else(|| anyhow!("unexpected type for 'path' column"))?; - let size_array = batch - .column(size_idx) - .as_any() - .downcast_ref::() - .ok_or_else(|| anyhow!("unexpected type for 'size_bytes' column"))?; - let mtime_array = batch - .column(mtime_idx) - .as_any() - .downcast_ref::() - .ok_or_else(|| anyhow!("unexpected type for 'mtime' column"))?; - - // Handle version column if it exists (backwards compatible) - // Prefer UInt32 but allow Int32 fallback. - let version_accessor: Option> = if let Some(v_idx) = version_idx { - let col = batch.column(v_idx); - if let Some(v) = col.as_any().downcast_ref::() { - Some((0..batch.num_rows()).map(|i| v.value(i)).collect()) - } else if let Some(v) = col.as_any().downcast_ref::() { - Some((0..batch.num_rows()).map(|i| v.value(i) as u32).collect()) - } else { - return Err(anyhow!("unexpected type for '_version' column")); + let scroll_result = self.documents_shard.scroll(ScrollRequestInternal { + offset: None, + order_by: None, + with_vector: WithVector::Bool(false), + with_payload: Some(WithPayloadInterface::Bool(true)), + filter: Some(Filter { + must: Some(vec![ + Condition::Field(FieldCondition::new_match( + JsonPath { + first_key: "path".to_string(), + rest: vec![], + }, + Match::from(AnyVariants::Strings(chunk.iter().cloned().collect())), + )), + Condition::Field(FieldCondition::new_match( + JsonPath { + first_key: "version".to_string(), + rest: vec![], + }, + Match::new_value(ValueVariants::Integer( + CURRENT_EMBEDDING_VERSION as i64, + )), + )), + ]), + must_not: None, + should: None, + min_should: None, + }), + limit: None, + }); + let records = match scroll_result { + Ok(r) => { + let (recs, _) = r; + recs + } + Err(e) => return Err(anyhow!(e.to_string())), + }; + for record in records { + match record.payload { + None => {} + Some(r) => { + let meta = payload_to_doc_meta(&r)?; + let path = meta.clone().path; + existing.insert(path, meta); } - } else { - None // Missing column → default later - }; - - for i in 0..batch.num_rows() { - let path = path_array.value(i).to_string(); - let size_bytes = size_array.value(i); - let mtime = mtime_array.value(i); - let version = version_accessor.as_ref().map(|v| v[i]).unwrap_or(1); // default for legacy rows - - existing.insert( - path.clone(), - DocMeta { - path, - size_bytes, - mtime, - _version: version, - }, - ); } } } @@ -203,437 +255,243 @@ impl Store { Ok(existing) } - /// Delete documents and all associated line embeddings by path - pub async fn delete_documents(&self, paths: &[String]) -> Result<()> { + /// Delete document metadata by path + pub fn delete_document_metadata(&self, paths: &[String]) -> Result<()> { if paths.is_empty() { return Ok(()); } - // Delete from both tables to maintain synchronization - self.delete_document_metadata(paths).await?; - self.delete_line_embeddings(paths).await?; - - Ok(()) - } + let mut point_ids: Vec = vec![]; - /// Delete only document metadata by path (internal method) - async fn delete_document_metadata(&self, paths: &[String]) -> Result<()> { - if paths.is_empty() { - return Ok(()); - } - - let tables = self - .db - .table_names() - .execute() - .await - .context("failed to list LanceDB tables")?; - if !tables.contains(&"documents".to_string()) { - return Ok(()); // Nothing to delete + // collect all point IDs to be deleted + for chunk in paths.chunks(1000) { + let scroll_result = self.documents_shard.scroll(ScrollRequestInternal { + offset: None, + order_by: None, + with_vector: WithVector::Bool(false), + with_payload: Some(WithPayloadInterface::Bool(true)), + filter: Some(Filter { + must: Some(vec![ + Condition::Field(FieldCondition::new_match( + JsonPath { + first_key: "path".to_string(), + rest: vec![], + }, + Match::from(AnyVariants::Strings(chunk.iter().cloned().collect())), + )), + Condition::Field(FieldCondition::new_match( + JsonPath { + first_key: "version".to_string(), + rest: vec![], + }, + Match::new_value(ValueVariants::Integer( + CURRENT_EMBEDDING_VERSION as i64, + )), + )), + ]), + must_not: None, + should: None, + min_should: None, + }), + limit: None, + }); + let records = match scroll_result { + Ok(r) => { + let (recs, _) = r; + recs + } + Err(e) => return Err(anyhow!(e.to_string())), + }; + for record in records { + point_ids.push(record.id); + } } - let tbl = self - .db - .open_table("documents") - .execute() - .await - .context("failed to open 'documents' table")?; + let operation = CollectionUpdateOperations::PointOperation(PointOperations::DeletePoints { + ids: point_ids, + }); - // Delete in chunks - for chunk in paths.chunks(1000) { - let filter_expr = build_in_filter(chunk); - tbl.delete(&filter_expr).await.with_context(|| { - format!("failed to delete documents with filter: {filter_expr}") - })?; - } + self.documents_shard + .update(operation) + .map_err(|e| anyhow!(e.to_string()))?; Ok(()) } /// Delete line embeddings by path - pub async fn delete_line_embeddings(&self, paths: &[String]) -> Result<()> { + pub fn delete_line_embeddings(&self, paths: &[String]) -> Result<()> { if paths.is_empty() { return Ok(()); } - let tables = self - .db - .table_names() - .execute() - .await - .context("failed to list LanceDB tables")?; - if !tables.contains(&"line_embeddings".to_string()) { - return Ok(()); // Nothing to delete - } - - let tbl = self - .db - .open_table("line_embeddings") - .execute() - .await - .context("failed to open 'line_embeddings' table")?; + let mut point_ids: Vec = vec![]; - // Delete in chunks + // collect all point IDs to be deleted for chunk in paths.chunks(1000) { - let filter_expr = build_in_filter(chunk); - tbl.delete(&filter_expr).await.with_context(|| { - format!("failed to delete line embeddings with filter: {filter_expr}") - })?; + let scroll_result = self.line_embeddings_shard.scroll(ScrollRequestInternal { + offset: None, + order_by: None, + with_vector: WithVector::Bool(false), + with_payload: Some(WithPayloadInterface::Bool(true)), + filter: Some(Filter::new_must(Condition::Field( + FieldCondition::new_match( + JsonPath { + first_key: "path".to_string(), + rest: vec![], + }, + Match::from(AnyVariants::Strings(chunk.iter().cloned().collect())), + ), + ))), + limit: None, + }); + let records = match scroll_result { + Ok(r) => { + let (recs, _) = r; + recs + } + Err(e) => return Err(anyhow!(e.to_string())), + }; + for record in records { + point_ids.push(record.id); + } } + let operation = CollectionUpdateOperations::PointOperation(PointOperations::DeletePoints { + ids: point_ids, + }); + + self.line_embeddings_shard + .update(operation) + .map_err(|e| anyhow!(e.to_string()))?; + Ok(()) } - /// Upsert document metadata for tracking file changes (no embeddings stored) - pub async fn upsert_document_metadata(&self, metas: &[DocMeta]) -> Result<()> { - if metas.is_empty() { + /// Delete documents and all associated line embeddings by path + pub fn delete_documents(&self, paths: &[String]) -> Result<()> { + if paths.is_empty() { return Ok(()); } - // First, delete any existing documents with the same paths - let paths: Vec = metas.iter().map(|m| m.path.clone()).collect(); - self.delete_document_metadata(&paths).await?; - - // Define schema for metadata only - let schema = Arc::new(Schema::new(vec![ - Field::new("id", DataType::Int32, false), - Field::new("path", DataType::Utf8, false), - Field::new("size_bytes", DataType::UInt64, false), - Field::new("mtime", DataType::Int64, false), - Field::new("_version", DataType::UInt32, false), - ])); - - // Build a single RecordBatch - let id_array = Int32Array::from_iter_values(metas.iter().map(|meta| meta.id())); - let path_array = - StringArray::from(metas.iter().map(|m| m.path.as_str()).collect::>()); - let size_bytes_array = UInt64Array::from_iter_values(metas.iter().map(|m| m.size_bytes)); - let mtime_array = Int64Array::from_iter_values(metas.iter().map(|m| m.mtime)); - let version_array = - arrow_array::UInt32Array::from_iter_values(metas.iter().map(|m| m._version)); - - let batch = RecordBatch::try_new( - schema.clone(), - vec![ - Arc::new(id_array), - Arc::new(path_array), - Arc::new(size_bytes_array), - Arc::new(mtime_array), - Arc::new(version_array), - ], - )?; - - // Wrap into a RecordBatchReader - let batches = RecordBatchIterator::new(vec![batch].into_iter().map(Ok), schema.clone()); - - // Create table if needed, otherwise open and append - let tables = self - .db - .table_names() - .execute() - .await - .context("failed to list LanceDB tables")?; - let table_existed = tables.contains(&"documents".to_string()); - - if !table_existed { - // Create table with initial data - self.db - .create_table("documents", Box::new(batches)) - .execute() - .await - .context("failed to create 'documents' table")?; - } else { - let tbl = self - .db - .open_table("documents") - .execute() - .await - .context("failed to open 'documents' table")?; - tbl.add(Box::new(batches)) - .execute() - .await - .context("failed to append batches to 'documents' table")?; - } + // Delete from both tables to maintain synchronization + self.delete_document_metadata(paths)?; + self.delete_line_embeddings(paths)?; Ok(()) } - /// Upsert line-level embeddings for documents - pub async fn upsert_line_embeddings(&self, line_embeddings: &[LineEmbedding]) -> Result<()> { - if line_embeddings.is_empty() { + /// Upsert documents metadata (no embeddings stored) + pub fn upsert_document_metadata(&self, metas: &[DocMeta]) -> Result<()> { + if metas.is_empty() { return Ok(()); } - let dim = line_embeddings[0].embedding.len(); - if dim == 0 { - bail!("embeddings must be non-empty vectors"); - } - if line_embeddings.iter().any(|e| e.embedding.len() != dim) { - bail!("all embeddings must have equal length"); - } - - // First, delete any existing lines with the same paths - let paths: Vec = line_embeddings.iter().map(|le| le.path.clone()).collect(); - let unique_paths: std::collections::HashSet = paths.into_iter().collect(); - let unique_paths: Vec = unique_paths.into_iter().collect(); - self.delete_line_embeddings(&unique_paths).await?; - - // Define schema for line embeddings - let schema = Arc::new(Schema::new(vec![ - Field::new("id", DataType::Int32, false), - Field::new("path", DataType::Utf8, false), - Field::new("line_number", DataType::Int32, false), - Field::new( - "vector", - DataType::FixedSizeList( - Arc::new(Field::new("item", DataType::Float32, true)), - dim as i32, - ), - true, - ), - ])); - - // Build RecordBatch - let id_array = Int32Array::from_iter_values(line_embeddings.iter().map(|le| le.id())); - let path_array = StringArray::from( - line_embeddings - .iter() - .map(|le| le.path.as_str()) - .collect::>(), - ); - let line_number_array = - Int32Array::from_iter_values(line_embeddings.iter().map(|le| le.line_number)); - let vector_array = FixedSizeListArray::from_iter_primitive::( - line_embeddings - .iter() - .map(|le| Some(le.embedding.iter().cloned().map(Some))), - dim as i32, - ); + let mut point_id = self.count_documents()? as u64; - let batch = RecordBatch::try_new( - schema.clone(), - vec![ - Arc::new(id_array), - Arc::new(path_array), - Arc::new(line_number_array), - Arc::new(vector_array), - ], - )?; + for chunk in metas.chunks(1000) { + let mut points: Vec = vec![]; + for meta in chunk { + point_id += 1_u64; + let payload_json = + serde_json::to_value(&meta).map_err(|e| anyhow!(e.to_string()))?; + let vector: Vec = vec![]; + let point = make_point(point_id, vector, payload_json, DOCUMENTS_VECTOR_NAME); + points.push(point); + } + let operation = CollectionUpdateOperations::PointOperation( + PointOperations::UpsertPoints(PointInsertOperationsInternal::PointsList(points)), + ); + self.documents_shard + .update(operation) + .map_err(|e| anyhow!(e.to_string()))?; - let batches = RecordBatchIterator::new(vec![batch].into_iter().map(Ok), schema.clone()); - - // Create or append to line_embeddings table - let tables = self - .db - .table_names() - .execute() - .await - .context("failed to list LanceDB tables")?; - let table_existed = tables.contains(&"line_embeddings".to_string()); - - if !table_existed { - self.db - .create_table("line_embeddings", Box::new(batches)) - .execute() - .await - .context("failed to create 'line_embeddings' table")?; - } else { - let tbl = self - .db - .open_table("line_embeddings") - .execute() - .await - .context("failed to open 'line_embeddings' table")?; - tbl.add(Box::new(batches)) - .execute() - .await - .context("failed to append batches to 'line_embeddings' table")?; + // flush to disk + self.flush_documents(); } - // Ensure vector index exists - self.ensure_line_vector_index().await?; - Ok(()) } - /// Ensures vector index exists for line embeddings table - async fn ensure_line_vector_index(&self) -> Result<()> { - let tbl = self - .db - .open_table("line_embeddings") - .execute() - .await - .context("failed to open 'line_embeddings' table")?; - - // Check if vector index exists - let indices = tbl - .list_indices() - .await - .context("failed to list indices for 'line_embeddings' table")?; - let has_vector_index = indices - .iter() - .any(|idx| idx.columns.contains(&"vector".to_string())); - - if !has_vector_index { - // Create new index - handle case where there are too few rows for PQ index - match tbl.create_index(&["vector"], Index::Auto).execute().await { - Ok(_) => { - // Index created successfully - } - Err(e) => { - // Check if this is a PQ training error due to insufficient rows - let error_msg = e.to_string(); - if error_msg.contains("Not enough rows to train PQ") - || error_msg.contains("Requires 256 rows") - { - // Log a warning but continue - the database will still work without the index - // It will just use brute-force search instead of approximate search - eprintln!( - "Warning: Skipping line embeddings vector index creation due to insufficient data (need at least 256 rows for PQ index). Database will use brute-force search." - ); - } else if error_msg.contains("No space left on device") { - return Err(anyhow!( - "Insufficient disk space to create vector index. Consider freeing up space or using a different workspace location." - )); - } else if error_msg.contains("Permission denied") { - return Err(anyhow!( - "Permission denied while creating vector index. Check workspace directory permissions." - )); - } else { - // For other errors, we should still fail - return Err(e.into()); - } - } - } - } else { - // Optimize existing index to include new data - // This is much faster than recreating the entire index - if tbl.optimize(Default::default()).await.is_err() { - // If optimization fails, we could fall back to recreating the index - // but for now just log and continue - eprintln!("Warning: Failed to optimize line embeddings vector index"); + /// Upsert line embeddings + pub fn upsert_line_embeddings(&self, line_embeddings: &[LineEmbedding]) -> Result<()> { + if line_embeddings.is_empty() { + return Ok(()); + } + let mut point_id = self.count_line_embeddings()? as u64; + + for chunk in line_embeddings.chunks(1000) { + let mut points: Vec = vec![]; + + for line_embedding in chunk { + point_id += 1_u64; + let payload_json = + serde_json::to_value(&line_embedding).map_err(|e| anyhow!(e.to_string()))?; + let point = make_point( + point_id, + line_embedding.embedding.clone(), + payload_json, + LINE_EMBEDDINGS_VECTOR_NAME, + ); + points.push(point); } + + let operation = CollectionUpdateOperations::PointOperation( + PointOperations::UpsertPoints(PointInsertOperationsInternal::PointsList(points)), + ); + self.line_embeddings_shard + .update(operation) + .map_err(|e| anyhow!(e.to_string()))?; + + // flush to disk + self.flush_line_embeddings(); } Ok(()) } - /// Get statistics about the workspace store - pub async fn get_stats(&self) -> Result { - let tables = self - .db - .table_names() - .execute() - .await - .context("failed to list LanceDB tables")?; - - if !tables.contains(&"documents".to_string()) { - return Ok(WorkspaceStats { - total_documents: 0, - has_index: false, - index_type: None, - }); - } - - let tbl = self - .db - .open_table("documents") - .execute() - .await - .context("failed to open 'documents' table")?; - - // Get document count - let stream = tbl - .query() - .execute() - .await - .context("failed to execute count query on 'documents'")?; - let batches: Vec = stream - .try_collect() - .await - .context("failed to collect result batches for stats")?; - let total_documents = batches.iter().map(|batch| batch.num_rows()).sum(); - - // Check if vector index exists - let line_tbl = self - .db - .open_table("line_embeddings") - .execute() - .await - .context("failed to open 'line_embeddings' table")?; - let indices = line_tbl - .list_indices() - .await - .context("failed to list indices for 'line_embeddings' table")?; - let has_vector_index = indices - .iter() - .any(|idx| idx.columns.contains(&"vector".to_string())); - - let index_type = if has_vector_index { - // LanceDB Auto index creates IVF_PQ for vector columns by default - Some("IVF_PQ".to_string()) - } else { - None - }; + /// Get workspace statistics + pub fn get_stats(&self) -> Result { + let total_documents = self.count_documents()?; Ok(WorkspaceStats { total_documents, - has_index: has_vector_index, - index_type, + has_index: true, + index_type: Some("HNSW".to_string()), }) } - /// Get all document paths in the workspace - pub async fn get_all_document_paths(&self) -> Result> { - let tables = self - .db - .table_names() - .execute() - .await - .context("failed to list LanceDB tables")?; + /// Get paths for all stored documents + pub fn get_all_document_paths(&self) -> Result> { + let scroll_result = self + .documents_shard + .scroll(ScrollRequestInternal { + offset: None, + order_by: None, + with_vector: WithVector::Bool(false), + with_payload: Some(WithPayloadInterface::Bool(true)), + filter: None, + limit: None, + }) + .map_err(|e| anyhow!(e.to_string()))?; - if !tables.contains(&"documents".to_string()) { - return Ok(Vec::new()); - } + let (records, _) = scroll_result; + let mut paths: Vec = vec![]; - let tbl = self - .db - .open_table("documents") - .execute() - .await - .context("failed to open 'documents' table")?; - let stream = tbl - .query() - .execute() - .await - .context("failed to execute query for all document paths")?; - let batches: Vec = stream - .try_collect() - .await - .context("failed to collect batches for all document paths")?; - - let mut paths = Vec::new(); - for batch in batches { - let schema = batch.schema(); - let path_idx = schema - .index_of("path") - .context("missing 'path' column in documents schema")?; - let path_array = batch - .column(path_idx) - .as_any() - .downcast_ref::() - .ok_or_else(|| anyhow!("unexpected type for 'path' column"))?; - - for i in 0..batch.num_rows() { - paths.push(path_array.value(i).to_string()); + for record in records { + match record.payload { + Some(p) => { + let doc_meta = payload_to_doc_meta(&p)?; + paths.push(doc_meta.path); + } + None => {} } } Ok(paths) } - /// Search line embeddings directly for precise results - pub async fn search_line_embeddings( + /// Search within line embeddings + pub fn search_line_embeddings( &self, query_vec: &[f32], subset_paths: &[String], @@ -645,111 +503,57 @@ impl Store { return Ok(Vec::new()); } - let tables = self - .db - .table_names() - .execute() - .await - .context("failed to list LanceDB tables")?; - if !tables.contains(&"line_embeddings".to_string()) { - return Ok(Vec::new()); - } - - let tbl = self - .db - .open_table("line_embeddings") - .execute() - .await - .context("failed to open 'line_embeddings' table")?; + let mut all_results: Vec = vec![]; - let mut all_results = Vec::new(); - - // Search in chunks to avoid overly long IN(...) filters for chunk in subset_paths.chunks(1000) { - let filter_expr = build_in_filter(chunk); - - let query = tbl - .query() - .only_if(filter_expr) - .nearest_to(query_vec) - .context("failed to set nearest_to on line embeddings query")? - .distance_type(lancedb::DistanceType::Cosine) - .limit(top_k * 2); // Get more results per chunk to improve global ranking - - let stream = query - .execute() - .await - .context("failed to execute line embeddings search")?; - - let batches: Vec = stream - .try_collect() - .await - .context("failed to collect line embeddings search batches")?; - - for batch in batches { - let schema = batch.schema(); - - let path_idx = schema - .index_of("path") - .context("missing 'path' column in line embeddings result")?; - let line_number_idx = schema - .index_of("line_number") - .context("missing 'line_number' column in line embeddings result")?; - let distance_idx = schema - .index_of("_distance") - .or_else(|_| schema.index_of("distance")) - .context("missing 'distance' column in line embeddings result")?; - - let path_array = batch - .column(path_idx) - .as_any() - .downcast_ref::() - .ok_or_else(|| anyhow!("unexpected type for 'path' column"))?; - let line_number_array = batch - .column(line_number_idx) - .as_any() - .downcast_ref::() - .ok_or_else(|| anyhow!("unexpected type for 'line_number' column"))?; - let dist_col = batch.column(distance_idx); - - // Handle both f32 and f64 distance types - if let Some(dist_array) = dist_col.as_any().downcast_ref::() { - for i in 0..batch.num_rows() { - let distance = dist_array.value(i); - if let Some(max_dist) = max_distance - && distance > max_dist - { - continue; - } - - all_results.push(RankedLine { - path: path_array.value(i).to_string(), - line_number: line_number_array.value(i), - distance, - }); - } - } else if let Some(dist_array) = dist_col.as_any().downcast_ref::() { - for i in 0..batch.num_rows() { - let distance = dist_array.value(i) as f32; - if let Some(max_dist) = max_distance - && distance > max_dist - { - continue; - } - - all_results.push(RankedLine { - path: path_array.value(i).to_string(), - line_number: line_number_array.value(i), - distance, - }); + let query: Vec = query_vec.into(); + let vector: VectorInternal = query.into(); + let score_threshold: Option> = match max_distance { + Some(max_dist) => Some(OrderedFloat(1_f32 - max_dist)), + None => None, + }; + let results = self + .line_embeddings_shard + .query(ShardQueryRequest { + prefetches: vec![], + query: Some(ScoringQuery::Vector(QueryEnum::Nearest(NamedQuery { + query: vector, + using: Some(LINE_EMBEDDINGS_VECTOR_NAME.to_string()), + }))), + filter: Some(Filter::new_must(Condition::Field( + FieldCondition::new_match( + JsonPath { + first_key: "path".to_string(), + rest: vec![], + }, + Match::from(AnyVariants::Strings(chunk.iter().cloned().collect())), + ), + ))), + score_threshold: score_threshold, + limit: top_k * 2, + offset: 0, + params: None, + with_vector: WithVector::Bool(false), + with_payload: WithPayloadInterface::Bool(true), + }) + .map_err(|e| anyhow!(e.to_string()))?; + + for result in results { + match result.payload { + Some(p) => { + let line_embd = payload_to_line_embedding(&p)?; + let ranked_line = RankedLine { + line_number: line_embd.line_number, + path: line_embd.path, + distance: result.score, + }; + all_results.push(ranked_line); } - } else { - bail!("unsupported distance column type in line embeddings search"); + None => {} } } } - // Sort by distance and take global top-k all_results.sort_by(|a, b| { a.distance .partial_cmp(&b.distance) @@ -760,12 +564,10 @@ impl Store { Ok(all_results) } - pub async fn analyze_document_states( - &self, - file_paths: &[String], - ) -> Result> { + /// Analyze the state of documents within the workspace + pub fn analyze_document_states(&self, file_paths: &[String]) -> Result> { // Get existing document metadata from workspace - let existing_docs = self.get_existing_docs(file_paths).await?; + let existing_docs = self.get_existing_docs(file_paths)?; let mut states = Vec::new(); @@ -826,566 +628,94 @@ impl Store { Ok(states) } -} -pub fn build_in_filter(paths: &[String]) -> String { - let escaped: Vec = paths - .iter() - .map(|p| p.replace('\'', "''")) - .map(|p| format!("'{p}'")) - .collect(); - format!("path IN ({})", escaped.join(",")) -} - -#[cfg(test)] -mod tests { - use super::*; - use tempfile::TempDir; - - // Helper function to create a test store - async fn create_test_store() -> (Store, TempDir) { - let temp_dir = TempDir::new().expect("Failed to create temp dir"); - let store = Store::open(temp_dir.path().to_str().unwrap()) - .await - .expect("Failed to create store"); - (store, temp_dir) - } - - // Helper function to create test documents - fn create_test_docs() -> (Vec, Vec>) { - let docs = vec![ - DocMeta { - path: "/test/doc1.txt".to_string(), - size_bytes: 100, - mtime: 1234567890, - _version: CURRENT_EMBEDDING_VERSION, - }, - DocMeta { - path: "/test/doc2.txt".to_string(), - size_bytes: 200, - mtime: 1234567891, - _version: CURRENT_EMBEDDING_VERSION, - }, - DocMeta { - path: "/test/doc3.txt".to_string(), - size_bytes: 150, - mtime: 1234567892, - _version: CURRENT_EMBEDDING_VERSION, - }, - ]; - - let embeddings = vec![ - vec![0.1, 0.2, 0.3, 0.4], - vec![0.5, 0.6, 0.7, 0.8], - vec![0.9, 1.0, 1.1, 1.2], - ]; - - (docs, embeddings) - } - - #[tokio::test] - async fn test_store_creation_and_stats_empty() { - let (store, _temp_dir) = create_test_store().await; - - let stats = store.get_stats().await.expect("Failed to get stats"); - - assert_eq!(stats.total_documents, 0); - assert!(!stats.has_index); - assert_eq!(stats.index_type, None); - } - - #[tokio::test] - async fn test_upsert_documents_and_stats() { - let (store, _temp_dir) = create_test_store().await; - let (docs, embeddings) = create_test_docs(); - - // Insert documents - store - .upsert_document_metadata(&docs) - .await - .expect("Failed to upsert documents"); - - let line_embeddings: Vec = docs - .iter() - .enumerate() - .map(|(i, doc)| LineEmbedding { - path: doc.path.clone(), - line_number: i as i32, - embedding: embeddings[i].clone(), + /// Get the number of indexed points in the documents shard + pub fn count_documents(&self) -> Result { + let count = self + .documents_shard + .count(CountRequestInternal { + filter: None, + exact: true, }) - .collect(); - - store - .upsert_line_embeddings(&line_embeddings) - .await - .expect("Failed to upsert line embeddings"); - - // Check stats - let stats = store.get_stats().await.expect("Failed to get stats"); - - assert_eq!(stats.total_documents, 3); - // Index may or may not be created depending on number of documents - // (LanceDB requires 256+ rows for PQ index training) - if stats.has_index { - assert_eq!(stats.index_type, Some("IVF_PQ".to_string())); - } - } - - #[tokio::test] - async fn test_get_all_document_paths() { - let (store, _temp_dir) = create_test_store().await; - let (docs, _embeddings) = create_test_docs(); - - // Initially should be empty - let paths = store - .get_all_document_paths() - .await - .expect("Failed to get document paths"); - assert!(paths.is_empty()); - - // Insert documents - store - .upsert_document_metadata(&docs) - .await - .expect("Failed to upsert documents"); - - // Should now have paths - let paths = store - .get_all_document_paths() - .await - .expect("Failed to get document paths"); - - assert_eq!(paths.len(), 3); - assert!(paths.contains(&"/test/doc1.txt".to_string())); - assert!(paths.contains(&"/test/doc2.txt".to_string())); - assert!(paths.contains(&"/test/doc3.txt".to_string())); - } - - #[tokio::test] - async fn test_get_existing_docs() { - let (store, _temp_dir) = create_test_store().await; - let (docs, _embeddings) = create_test_docs(); - - // Insert documents - store - .upsert_document_metadata(&docs) - .await - .expect("Failed to upsert documents"); - - // Test getting existing docs - let query_paths = vec![ - "/test/doc1.txt".to_string(), - "/test/doc2.txt".to_string(), - "/test/nonexistent.txt".to_string(), - ]; - - let existing = store - .get_existing_docs(&query_paths) - .await - .expect("Failed to get existing docs"); - - assert_eq!(existing.len(), 2); - assert!(existing.contains_key("/test/doc1.txt")); - assert!(existing.contains_key("/test/doc2.txt")); - assert!(!existing.contains_key("/test/nonexistent.txt")); - - // Verify metadata - let doc1_meta = existing.get("/test/doc1.txt").unwrap(); - assert_eq!(doc1_meta.size_bytes, 100); - assert_eq!(doc1_meta.mtime, 1234567890); - } - - #[tokio::test] - async fn test_delete_documents() { - let (store, _temp_dir) = create_test_store().await; - let (docs, _embeddings) = create_test_docs(); - - // Insert documents - store - .upsert_document_metadata(&docs) - .await - .expect("Failed to upsert documents"); - - // Verify all documents exist - let all_paths = store - .get_all_document_paths() - .await - .expect("Failed to get document paths"); - assert_eq!(all_paths.len(), 3); - - // Delete some documents - let to_delete = vec!["/test/doc1.txt".to_string(), "/test/doc3.txt".to_string()]; - store - .delete_documents(&to_delete) - .await - .expect("Failed to delete documents"); - - // Verify only doc2 remains - let remaining_paths = store - .get_all_document_paths() - .await - .expect("Failed to get document paths"); - assert_eq!(remaining_paths.len(), 1); - assert!(remaining_paths.contains(&"/test/doc2.txt".to_string())); - } - - #[tokio::test] - async fn test_upsert_replaces_existing() { - let (store, _temp_dir) = create_test_store().await; - - // Insert initial document - let initial_doc = DocMeta { - path: "/test/doc.txt".to_string(), - size_bytes: 100, - mtime: 1000, - _version: CURRENT_EMBEDDING_VERSION, - }; - let _initial_embedding = [vec![1.0, 2.0, 3.0, 4.0]]; - - store - .upsert_document_metadata(&[initial_doc]) - .await - .expect("Failed to insert initial document"); - - // Verify document exists - let paths = store - .get_all_document_paths() - .await - .expect("Failed to get paths"); - assert_eq!(paths.len(), 1); - - // Update the same document - let updated_doc = DocMeta { - path: "/test/doc.txt".to_string(), - size_bytes: 200, - mtime: 2000, - _version: CURRENT_EMBEDDING_VERSION, - }; - let _updated_embedding = [vec![5.0, 6.0, 7.0, 8.0]]; - - store - .upsert_document_metadata(&[updated_doc]) - .await - .expect("Failed to update document"); - - // Should still have only one document - let paths = store - .get_all_document_paths() - .await - .expect("Failed to get paths"); - assert_eq!(paths.len(), 1); - - // Verify metadata was updated - let existing = store - .get_existing_docs(&["/test/doc.txt".to_string()]) - .await - .expect("Failed to get existing docs"); - let doc_meta = existing.get("/test/doc.txt").unwrap(); - assert_eq!(doc_meta.size_bytes, 200); - assert_eq!(doc_meta.mtime, 2000); - } - - #[test] - fn test_build_in_filter() { - let paths = vec![ - "file1.txt".to_string(), - "file2.txt".to_string(), - "file with spaces.txt".to_string(), - "file'with'quotes.txt".to_string(), - ]; - - let filter = build_in_filter(&paths); - - assert!(filter.starts_with("path IN (")); - assert!(filter.ends_with(")")); - assert!(filter.contains("'file1.txt'")); - assert!(filter.contains("'file2.txt'")); - assert!(filter.contains("'file with spaces.txt'")); - // Single quotes should be escaped - assert!(filter.contains("'file''with''quotes.txt'")); - } - - #[test] - fn test_doc_meta_id_generation() { - let doc1 = DocMeta { - path: "test1.txt".to_string(), - size_bytes: 100, - mtime: 1000, - _version: CURRENT_EMBEDDING_VERSION, - }; - let doc2 = DocMeta { - path: "test2.txt".to_string(), - size_bytes: 100, - mtime: 1000, - _version: CURRENT_EMBEDDING_VERSION, - }; - - let id1 = doc1.id(); - let id2 = doc2.id(); - - // IDs should be different (random generation) - assert_ne!(id1, id2); - // IDs should be valid i32 values - assert!(id1 >= 0); - assert!(id2 >= 0); - } - - // Helper to create test files for analyze_document_states tests - fn create_test_files(temp_dir: &tempfile::TempDir) -> Vec { - use std::fs; - - let file1_path = temp_dir.path().join("test1.txt"); - let file2_path = temp_dir.path().join("test2.txt"); - let file3_path = temp_dir.path().join("test3.txt"); - - fs::write(&file1_path, "This is test file 1\nWith multiple lines").unwrap(); - fs::write(&file2_path, "This is test file 2\nWith different content").unwrap(); - fs::write(&file3_path, "This is test file 3\nWith more content").unwrap(); + .map_err(|e| anyhow!(e.to_string()))?; - vec![ - file1_path.to_string_lossy().to_string(), - file2_path.to_string_lossy().to_string(), - file3_path.to_string_lossy().to_string(), - ] + Ok(count) } - #[tokio::test] - async fn test_analyze_document_states_all_new() { - use tempfile::TempDir; - - let temp_dir = TempDir::new().unwrap(); - let file_paths = create_test_files(&temp_dir); - - // Create empty store - let store = Store::open(temp_dir.path().to_str().unwrap()) - .await - .unwrap(); - - let states = store.analyze_document_states(&file_paths).await.unwrap(); - - assert_eq!(states.len(), 3); + /// Get the number of indexed points in the documents shard + pub fn count_line_embeddings(&self) -> Result { + let count = self + .line_embeddings_shard + .count(CountRequestInternal { + filter: None, + exact: true, + }) + .map_err(|e| anyhow!(e.to_string()))?; - // All should be new documents - for state in &states { - if let DocumentState::New(doc_info) = state { - assert!(file_paths.contains(&doc_info.filename)); - assert!(!doc_info.content.is_empty()); - assert!(doc_info.meta.size_bytes > 0); - assert!(doc_info.meta.mtime > 0); - } else { - panic!("Expected New document state"); - } - } + Ok(count) } - #[tokio::test] - async fn test_analyze_document_states_unchanged() { - use std::fs; - use std::time::UNIX_EPOCH; - use tempfile::TempDir; - - let temp_dir = TempDir::new().unwrap(); - let file_paths = create_test_files(&temp_dir); - - // Create store and add documents - let store = Store::open(temp_dir.path().to_str().unwrap()) - .await - .unwrap(); - - // Insert documents with current metadata - let mut docs = Vec::new(); - for path in &file_paths { - let metadata = fs::metadata(path).unwrap(); - let doc_meta = DocMeta { - path: path.clone(), - size_bytes: metadata.len(), - mtime: metadata - .modified() - .unwrap() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs() as i64, - _version: CURRENT_EMBEDDING_VERSION, - }; - docs.push(doc_meta); - } - store.upsert_document_metadata(&docs).await.unwrap(); - - // Analyze states - should all be unchanged - let states = store.analyze_document_states(&file_paths).await.unwrap(); - - assert_eq!(states.len(), 3); - - for state in &states { - if let DocumentState::Unchanged(filename) = state { - assert!(file_paths.contains(filename)); - } else { - panic!("Expected Unchanged document state"); - } - } + /// Flush all documents data to disk. + pub fn flush_documents(&self) { + self.documents_shard.flush(); } - #[tokio::test] - async fn test_analyze_document_states_changed() { - use tempfile::TempDir; - - let temp_dir = TempDir::new().unwrap(); - let file_paths = create_test_files(&temp_dir); - - // Create store and add documents with old metadata - let store = Store::open(temp_dir.path().to_str().unwrap()) - .await - .unwrap(); - - let mut docs = Vec::new(); - for path in &file_paths { - let doc_meta = DocMeta { - path: path.clone(), - size_bytes: 10, // Different from actual size - mtime: 1000, // Old timestamp - _version: 1, // simulate old version - }; - docs.push(doc_meta); - } - store.upsert_document_metadata(&docs).await.unwrap(); - - // Analyze states - should all be changed - let states = store.analyze_document_states(&file_paths).await.unwrap(); - - assert_eq!(states.len(), 3); - - for state in &states { - if let DocumentState::Changed(doc_info) = state { - assert!(file_paths.contains(&doc_info.filename)); - assert!(!doc_info.content.is_empty()); - } else { - panic!("Expected Changed document state"); - } - } + /// Flush all line embeddings data to disk. + pub fn flush_line_embeddings(&self) { + self.line_embeddings_shard.flush(); } +} - #[tokio::test] - async fn test_analyze_document_states_mixed() { - use std::fs; - use std::time::UNIX_EPOCH; - use tempfile::TempDir; - - let temp_dir = TempDir::new().unwrap(); - let file_paths = create_test_files(&temp_dir); - - // Create store and add only the first document - let store = Store::open(temp_dir.path().to_str().unwrap()) - .await - .unwrap(); - - let metadata = fs::metadata(&file_paths[0]).unwrap(); - let doc_meta = DocMeta { - path: file_paths[0].clone(), - size_bytes: metadata.len(), - mtime: metadata - .modified() - .unwrap() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs() as i64, - _version: CURRENT_EMBEDDING_VERSION, - }; - store.upsert_document_metadata(&[doc_meta]).await.unwrap(); - - // Analyze states - let states = store.analyze_document_states(&file_paths).await.unwrap(); - - assert_eq!(states.len(), 3); - - // First should be unchanged, others should be new - let mut unchanged_count = 0; - let mut new_count = 0; - - for state in &states { - match state { - DocumentState::Unchanged(filename) => { - assert_eq!(filename, &file_paths[0]); - unchanged_count += 1; - } - DocumentState::New(doc_info) => { - assert!(file_paths[1..].contains(&doc_info.filename)); - new_count += 1; - } - _ => panic!("Unexpected document state"), - } - } - - assert_eq!(unchanged_count, 1); - assert_eq!(new_count, 2); +/// Create a point struct for upserting. +fn make_point( + id: u64, + vector: Vec, + payload: Value, + vector_name: &str, +) -> PointStructPersisted { + let mut vectors = HashMap::new(); + vectors.insert(vector_name.to_string(), VectorInternal::from(vector)); + + PointStructPersisted { + id: ExtendedPointId::NumId(id), + vector: VectorStructInternal::Named(vectors).into(), + payload: Some(json_to_payload(payload)), } +} - #[tokio::test] - async fn test_analyze_document_states_version_mismatch() { - use std::fs; - use std::time::UNIX_EPOCH; - use tempfile::TempDir; - - let temp_dir = TempDir::new().unwrap(); - let file_paths = create_test_files(&temp_dir); - - // Create store and add documents with old version but correct size/mtime - let store = Store::open(temp_dir.path().to_str().unwrap()) - .await - .unwrap(); - - let mut old_docs = Vec::new(); - for path in &file_paths { - let metadata = fs::metadata(path).unwrap(); - let doc_meta = DocMeta { - path: path.clone(), - size_bytes: metadata.len(), - mtime: metadata - .modified() - .unwrap() - .duration_since(UNIX_EPOCH) - .unwrap() - .as_secs() as i64, - _version: 1, // older version than CURRENT_EMBEDDING_VERSION (2) - }; - old_docs.push(doc_meta); - } - store.upsert_document_metadata(&old_docs).await.unwrap(); - - let states = store.analyze_document_states(&file_paths).await.unwrap(); - assert_eq!(states.len(), 3); - for state in &states { - match state { - DocumentState::Changed(info) => { - assert!(file_paths.contains(&info.filename)); - } - _ => panic!("Expected Changed state due to version mismatch"), - } +/// Convert JSON value (DocMeta or LineEmbedding struct) to Qdrant Payload. +fn json_to_payload(value: Value) -> Payload { + if let Value::Object(map) = value { + let mut payload = Payload::default(); + for (k, v) in map { + payload.0.insert(k, v); } + payload + } else { + Payload::default() } +} - #[tokio::test] - async fn test_analyze_document_states_nonexistent_file() { - use tempfile::TempDir; - - let temp_dir = TempDir::new().unwrap(); - let mut file_paths = create_test_files(&temp_dir); - - // Add a nonexistent file to the list - file_paths.push("/nonexistent/file.txt".to_string()); - - let store = Store::open(temp_dir.path().to_str().unwrap()) - .await - .unwrap(); +/// Convert Qdrant Payload back to DocMeta +fn payload_to_doc_meta(payload: &Payload) -> Result { + let json_map: serde_json::Map = payload + .0 + .iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); - let states = store.analyze_document_states(&file_paths).await.unwrap(); + let json_value = Value::Object(json_map); + serde_json::from_value(json_value).map_err(|e| anyhow!(e.to_string())) +} - // Should only have states for existing files - assert_eq!(states.len(), 3); +/// Convert Qdrant Payload back to LineEmbedding +fn payload_to_line_embedding(payload: &Payload) -> Result { + let json_map: serde_json::Map = payload + .0 + .iter() + .map(|(k, v)| (k.clone(), v.clone())) + .collect(); - for state in &states { - if let DocumentState::New(doc_info) = state { - assert_ne!(doc_info.filename, "/nonexistent/file.txt"); - } - } - } + let json_value = Value::Object(json_map); + serde_json::from_value(json_value).map_err(|e| anyhow!(e.to_string())) } From 1c66d2dc4aaf1d6fb9ddc7a649cab2b77fe6858f Mon Sep 17 00:00:00 2001 From: "Clelia (Astra) Bertelli" Date: Fri, 6 Feb 2026 16:23:43 +0100 Subject: [PATCH 2/8] chore: add flushes; ci: make clippy happy --- src/workspace/store.rs | 49 +++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 27 deletions(-) diff --git a/src/workspace/store.rs b/src/workspace/store.rs index 75cf169e..2dfa2108 100644 --- a/src/workspace/store.rs +++ b/src/workspace/store.rs @@ -165,7 +165,7 @@ impl Store { payload_storage_type: PayloadStorageType::Mmap, }; - let document_shard = + let documents_shard = EdgeShard::load(&document_shard_path, Some(segment_config_document_shard))?; let mut vector_data_line_embeddings_shard = HashMap::new(); @@ -194,8 +194,8 @@ impl Store { )?; Ok(Self { - documents_shard: document_shard, - line_embeddings_shard: line_embeddings_shard, + documents_shard, + line_embeddings_shard, }) } @@ -364,6 +364,9 @@ impl Store { .update(operation) .map_err(|e| anyhow!(e.to_string()))?; + // flush changes to disk + self.flush_line_embeddings(); + Ok(()) } @@ -393,7 +396,7 @@ impl Store { for meta in chunk { point_id += 1_u64; let payload_json = - serde_json::to_value(&meta).map_err(|e| anyhow!(e.to_string()))?; + serde_json::to_value(meta).map_err(|e| anyhow!(e.to_string()))?; let vector: Vec = vec![]; let point = make_point(point_id, vector, payload_json, DOCUMENTS_VECTOR_NAME); points.push(point); @@ -425,7 +428,7 @@ impl Store { for line_embedding in chunk { point_id += 1_u64; let payload_json = - serde_json::to_value(&line_embedding).map_err(|e| anyhow!(e.to_string()))?; + serde_json::to_value(line_embedding).map_err(|e| anyhow!(e.to_string()))?; let point = make_point( point_id, line_embedding.embedding.clone(), @@ -478,12 +481,9 @@ impl Store { let mut paths: Vec = vec![]; for record in records { - match record.payload { - Some(p) => { - let doc_meta = payload_to_doc_meta(&p)?; - paths.push(doc_meta.path); - } - None => {} + if let Some(p) = record.payload { + let doc_meta = payload_to_doc_meta(&p)?; + paths.push(doc_meta.path); } } @@ -508,10 +508,8 @@ impl Store { for chunk in subset_paths.chunks(1000) { let query: Vec = query_vec.into(); let vector: VectorInternal = query.into(); - let score_threshold: Option> = match max_distance { - Some(max_dist) => Some(OrderedFloat(1_f32 - max_dist)), - None => None, - }; + let score_threshold: Option> = + max_distance.map(|max_dist| OrderedFloat(1_f32 - max_dist)); let results = self .line_embeddings_shard .query(ShardQueryRequest { @@ -529,7 +527,7 @@ impl Store { Match::from(AnyVariants::Strings(chunk.iter().cloned().collect())), ), ))), - score_threshold: score_threshold, + score_threshold, limit: top_k * 2, offset: 0, params: None, @@ -539,17 +537,14 @@ impl Store { .map_err(|e| anyhow!(e.to_string()))?; for result in results { - match result.payload { - Some(p) => { - let line_embd = payload_to_line_embedding(&p)?; - let ranked_line = RankedLine { - line_number: line_embd.line_number, - path: line_embd.path, - distance: result.score, - }; - all_results.push(ranked_line); - } - None => {} + if let Some(p) = result.payload { + let line_embd = payload_to_line_embedding(&p)?; + let ranked_line = RankedLine { + line_number: line_embd.line_number, + path: line_embd.path, + distance: result.score, + }; + all_results.push(ranked_line); } } } From 3eafc167fedbefd04928dd61eb182b3d36e2d426 Mon Sep 17 00:00:00 2001 From: "Clelia (Astra) Bertelli" Date: Fri, 6 Feb 2026 16:59:07 +0100 Subject: [PATCH 3/8] fix: use id method to keep ids consistent with previous version --- src/workspace/store.rs | 256 ++++++++++++++++++++++++++++++++++++----- 1 file changed, 230 insertions(+), 26 deletions(-) diff --git a/src/workspace/store.rs b/src/workspace/store.rs index 2dfa2108..82e7751d 100644 --- a/src/workspace/store.rs +++ b/src/workspace/store.rs @@ -67,23 +67,21 @@ pub struct LineEmbedding { } impl DocMeta { - pub fn id(&self) -> i32 { + pub fn id(&self) -> u64 { // Generate deterministic ID based on path hash for consistent upserts let mut hasher = DefaultHasher::new(); self.path.hash(&mut hasher); - // Use absolute value to ensure positive ID, avoid i32::MIN edge case - (hasher.finish() as i32).abs().max(1) + hasher.finish() } } impl LineEmbedding { - pub fn id(&self) -> i32 { + pub fn id(&self) -> u64 { // Generate deterministic ID based on path + line number for consistent upserts let mut hasher = DefaultHasher::new(); self.path.hash(&mut hasher); self.line_number.hash(&mut hasher); - // Use absolute value to ensure positive ID, avoid i32::MIN edge case - (hasher.finish() as i32).abs().max(1) + hasher.finish() } } @@ -389,16 +387,13 @@ impl Store { return Ok(()); } - let mut point_id = self.count_documents()? as u64; - for chunk in metas.chunks(1000) { let mut points: Vec = vec![]; for meta in chunk { - point_id += 1_u64; let payload_json = serde_json::to_value(meta).map_err(|e| anyhow!(e.to_string()))?; let vector: Vec = vec![]; - let point = make_point(point_id, vector, payload_json, DOCUMENTS_VECTOR_NAME); + let point = make_point(meta.id(), vector, payload_json, DOCUMENTS_VECTOR_NAME); points.push(point); } let operation = CollectionUpdateOperations::PointOperation( @@ -420,17 +415,15 @@ impl Store { if line_embeddings.is_empty() { return Ok(()); } - let mut point_id = self.count_line_embeddings()? as u64; for chunk in line_embeddings.chunks(1000) { let mut points: Vec = vec![]; for line_embedding in chunk { - point_id += 1_u64; let payload_json = serde_json::to_value(line_embedding).map_err(|e| anyhow!(e.to_string()))?; let point = make_point( - point_id, + line_embedding.id(), line_embedding.embedding.clone(), payload_json, LINE_EMBEDDINGS_VECTOR_NAME, @@ -637,19 +630,6 @@ impl Store { Ok(count) } - /// Get the number of indexed points in the documents shard - pub fn count_line_embeddings(&self) -> Result { - let count = self - .line_embeddings_shard - .count(CountRequestInternal { - filter: None, - exact: true, - }) - .map_err(|e| anyhow!(e.to_string()))?; - - Ok(count) - } - /// Flush all documents data to disk. pub fn flush_documents(&self) { self.documents_shard.flush(); @@ -714,3 +694,227 @@ fn payload_to_line_embedding(payload: &Payload) -> Result { let json_value = Value::Object(json_map); serde_json::from_value(json_value).map_err(|e| anyhow!(e.to_string())) } + +// #[cfg(test)] +// mod tests { +// use super::*; +// use tempfile::TempDir; + +// // Helper function to create a test store +// fn create_test_store() -> (Store, TempDir) { +// let temp_dir = TempDir::new().expect("Failed to create temp dir"); +// let store = Store::open(temp_dir.path().to_str().unwrap()).expect("Failed to create store"); +// (store, temp_dir) +// } + +// // Helper function to create test documents +// fn create_test_docs() -> (Vec, Vec>) { +// let docs = vec![ +// DocMeta { +// path: "/test/doc1.txt".to_string(), +// size_bytes: 100, +// mtime: 1234567890, +// _version: CURRENT_EMBEDDING_VERSION, +// }, +// DocMeta { +// path: "/test/doc2.txt".to_string(), +// size_bytes: 200, +// mtime: 1234567891, +// _version: CURRENT_EMBEDDING_VERSION, +// }, +// DocMeta { +// path: "/test/doc3.txt".to_string(), +// size_bytes: 150, +// mtime: 1234567892, +// _version: CURRENT_EMBEDDING_VERSION, +// }, +// ]; + +// let embeddings = vec![ +// vec![0.1, 0.2, 0.3, 0.4], +// vec![0.5, 0.6, 0.7, 0.8], +// vec![0.9, 1.0, 1.1, 1.2], +// ]; + +// (docs, embeddings) +// } + +// #[test] +// fn test_store_creation_and_stats_empty() { +// let (store, _temp_dir) = create_test_store(); + +// let stats = store.get_stats().expect("Failed to get stats"); + +// assert_eq!(stats.total_documents, 0); +// assert!(stats.has_index); +// assert_eq!(stats.index_type, Some("HNSW".to_string())); +// } + +// #[test] +// fn test_upsert_documents_and_stats() { +// let (store, _temp_dir) = create_test_store(); +// let (docs, embeddings) = create_test_docs(); + +// // Insert documents +// store +// .upsert_document_metadata(&docs) +// .expect("Failed to upsert documents"); + +// let line_embeddings: Vec = docs +// .iter() +// .enumerate() +// .map(|(i, doc)| LineEmbedding { +// path: doc.path.clone(), +// line_number: i as i32, +// embedding: embeddings[i].clone(), +// }) +// .collect(); + +// store +// .upsert_line_embeddings(&line_embeddings) +// .expect("Failed to upsert line embeddings"); + +// // Check stats +// let stats = store.get_stats().expect("Failed to get stats"); + +// assert_eq!(stats.total_documents, 3); +// assert!(stats.has_index); +// assert_eq!(stats.index_type, Some("HNSW".to_string())); +// } + +// #[test] +// fn test_get_all_document_paths() { +// let (store, _temp_dir) = create_test_store(); +// let (docs, _embeddings) = create_test_docs(); + +// // Initially should be empty +// let paths = store +// .get_all_document_paths() +// .expect("Failed to get document paths"); +// assert!(paths.is_empty()); + +// // Insert documents +// store +// .upsert_document_metadata(&docs) +// .expect("Failed to upsert documents"); + +// // Should now have paths +// let paths = store +// .get_all_document_paths() +// .expect("Failed to get document paths"); + +// assert_eq!(paths.len(), 3); +// assert!(paths.contains(&"/test/doc1.txt".to_string())); +// assert!(paths.contains(&"/test/doc2.txt".to_string())); +// assert!(paths.contains(&"/test/doc3.txt".to_string())); +// } + +// #[test] +// fn test_get_existing_docs() { +// let (store, _temp_dir) = create_test_store(); +// let (docs, _embeddings) = create_test_docs(); + +// // Insert documents +// store +// .upsert_document_metadata(&docs) +// .expect("Failed to upsert documents"); + +// // Test getting existing docs +// let query_paths = vec![ +// "/test/doc1.txt".to_string(), +// "/test/doc2.txt".to_string(), +// "/test/nonexistent.txt".to_string(), +// ]; + +// let existing = store +// .get_existing_docs(&query_paths) +// .expect("Failed to get existing docs"); + +// assert_eq!(existing.len(), 2); +// assert!(existing.contains_key("/test/doc1.txt")); +// assert!(existing.contains_key("/test/doc2.txt")); +// assert!(!existing.contains_key("/test/nonexistent.txt")); + +// // Verify metadata +// let doc1_meta = existing.get("/test/doc1.txt").unwrap(); +// assert_eq!(doc1_meta.size_bytes, 100); +// assert_eq!(doc1_meta.mtime, 1234567890); +// } + +// #[test] +// fn test_delete_documents() { +// let (store, _temp_dir) = create_test_store(); +// let (docs, _embeddings) = create_test_docs(); + +// // Insert documents +// store +// .upsert_document_metadata(&docs) +// .expect("Failed to upsert documents"); + +// // Verify all documents exist +// let all_paths = store +// .get_all_document_paths() +// .expect("Failed to get document paths"); +// assert_eq!(all_paths.len(), 3); + +// // Delete some documents +// let to_delete = vec!["/test/doc1.txt".to_string(), "/test/doc3.txt".to_string()]; +// store +// .delete_documents(&to_delete) +// .expect("Failed to delete documents"); + +// // Verify only doc2 remains +// let remaining_paths = store +// .get_all_document_paths() +// .expect("Failed to get document paths"); +// assert_eq!(remaining_paths.len(), 1); +// assert!(remaining_paths.contains(&"/test/doc2.txt".to_string())); +// } + +// #[test] +// fn test_upsert_replaces_existing() { +// let (store, _temp_dir) = create_test_store(); + +// // Insert initial document +// let initial_doc = DocMeta { +// path: "/test/doc.txt".to_string(), +// size_bytes: 100, +// mtime: 1000, +// _version: CURRENT_EMBEDDING_VERSION, +// }; +// let _initial_embedding = [vec![1.0, 2.0, 3.0, 4.0]]; + +// store +// .upsert_document_metadata(&[initial_doc]) +// .expect("Failed to insert initial document"); + +// // Verify document exists +// let paths = store.get_all_document_paths().expect("Failed to get paths"); +// assert_eq!(paths.len(), 1); + +// // Update the same document +// let updated_doc = DocMeta { +// path: "/test/doc.txt".to_string(), +// size_bytes: 200, +// mtime: 2000, +// _version: CURRENT_EMBEDDING_VERSION, +// }; +// let _updated_embedding = [vec![5.0, 6.0, 7.0, 8.0]]; + +// store +// .upsert_document_metadata(&[updated_doc]) +// .expect("Failed to update document"); + +// // Should still have only one document +// let paths = store.get_all_document_paths().expect("Failed to get paths"); +// assert_eq!(paths.len(), 1); + +// // Verify metadata was updated +// let existing = store +// .get_existing_docs(&["/test/doc.txt".to_string()]) +// .expect("Failed to get existing docs"); +// let doc_meta = existing.get("/test/doc.txt").unwrap(); +// assert_eq!(doc_meta.size_bytes, 200); +// assert_eq!(doc_meta.mtime, 2000); +// } +// } From 6f365d698bfc92234850bb08d131bb753876cef2 Mon Sep 17 00:00:00 2001 From: "Clelia (Astra) Bertelli" Date: Fri, 6 Feb 2026 20:30:51 +0100 Subject: [PATCH 4/8] feat: tests --- src/workspace/store.rs | 982 +++++++++++++++++++++++++++++------------ 1 file changed, 690 insertions(+), 292 deletions(-) diff --git a/src/workspace/store.rs b/src/workspace/store.rs index 82e7751d..7ef954f7 100644 --- a/src/workspace/store.rs +++ b/src/workspace/store.rs @@ -1,6 +1,7 @@ //! Qdrant Edge storage wrapper use anyhow::{Result, anyhow}; +use crate::search::DocumentInfo; use edge::EdgeShard; use ordered_float::OrderedFloat; use segment::data_types::vectors::{NamedQuery, VectorInternal, VectorStructInternal}; @@ -24,8 +25,7 @@ use std::collections::HashMap; use std::collections::hash_map::DefaultHasher; use std::hash::{Hash, Hasher}; use std::path::Path; - -use crate::search::DocumentInfo; +use std::str::FromStr; /// Current embedding/version number for stored document metadata. /// Bump this when the embedding model or preprocessing pipeline changes in a @@ -35,7 +35,8 @@ use crate::search::DocumentInfo; pub const CURRENT_EMBEDDING_VERSION: u32 = 2; /// Embedding size (needed to inform Qdrant collection when it is instantiated) -pub const EMBEDDING_SIZE: usize = 256; +pub const LINE_EMBEDDING_SIZE: usize = 256; +pub const DOCUMENT_EMBEDDING_SIZE: usize = 1; /// Vector name used in the documents shard const DOCUMENTS_VECTOR_NAME: &str = "documents"; @@ -99,32 +100,6 @@ pub struct WorkspaceStats { pub index_type: Option, } -/// Metadata stored with each vector. -#[derive(Debug, Clone, Serialize, Deserialize)] -pub struct ChunkPayload { - /// Relative file path from repo root - pub path: String, - /// Chunk index within the file - pub chunk_index: usize, - /// Starting line number - pub start_line: usize, - /// Ending line number - pub end_line: usize, - /// The actual text content - pub text: String, - /// File hash for change detection - pub file_hash: String, -} - -/// A search result. -#[derive(Debug, Clone)] -pub struct SearchResult { - /// Score (similarity) - pub score: f32, - /// The payload - pub payload: ChunkPayload, -} - /// Storage wrapper around Qdrant Edge. pub struct Store { documents_shard: EdgeShard, @@ -147,7 +122,7 @@ impl Store { vector_data_document_shard.insert( DOCUMENTS_VECTOR_NAME.to_string(), VectorDataConfig { - size: EMBEDDING_SIZE, + size: DOCUMENT_EMBEDDING_SIZE, distance: Distance::Cosine, storage_type: VectorStorageType::ChunkedMmap, index: Default::default(), @@ -168,9 +143,9 @@ impl Store { let mut vector_data_line_embeddings_shard = HashMap::new(); vector_data_line_embeddings_shard.insert( - DOCUMENTS_VECTOR_NAME.to_string(), + LINE_EMBEDDINGS_VECTOR_NAME.to_string(), VectorDataConfig { - size: EMBEDDING_SIZE, + size: LINE_EMBEDDING_SIZE, distance: Distance::Cosine, storage_type: VectorStorageType::ChunkedMmap, index: Default::default(), @@ -207,24 +182,12 @@ impl Store { with_vector: WithVector::Bool(false), with_payload: Some(WithPayloadInterface::Bool(true)), filter: Some(Filter { - must: Some(vec![ - Condition::Field(FieldCondition::new_match( - JsonPath { - first_key: "path".to_string(), - rest: vec![], - }, - Match::from(AnyVariants::Strings(chunk.iter().cloned().collect())), - )), - Condition::Field(FieldCondition::new_match( - JsonPath { - first_key: "version".to_string(), - rest: vec![], - }, - Match::new_value(ValueVariants::Integer( - CURRENT_EMBEDDING_VERSION as i64, - )), - )), - ]), + must: Some(vec![Condition::Field(FieldCondition::new_match( + JsonPath::from_str("path").map_err(|_| { + anyhow!("An error occurred while creating JSONPath from 'path'") + })?, + Match::from(AnyVariants::Strings(chunk.iter().cloned().collect())), + ))]), must_not: None, should: None, min_should: None, @@ -271,17 +234,15 @@ impl Store { filter: Some(Filter { must: Some(vec![ Condition::Field(FieldCondition::new_match( - JsonPath { - first_key: "path".to_string(), - rest: vec![], - }, + JsonPath::from_str("path").map_err(|_| { + anyhow!("An error occurred while creating JSONPath from 'path'") + })?, Match::from(AnyVariants::Strings(chunk.iter().cloned().collect())), )), Condition::Field(FieldCondition::new_match( - JsonPath { - first_key: "version".to_string(), - rest: vec![], - }, + JsonPath::from_str("_version").map_err(|_| { + anyhow!("An error occurred while creating JSONPath from 'path'") + })?, Match::new_value(ValueVariants::Integer( CURRENT_EMBEDDING_VERSION as i64, )), @@ -313,6 +274,9 @@ impl Store { .update(operation) .map_err(|e| anyhow!(e.to_string()))?; + // Flush changes to disk + self.flush_documents(); + Ok(()) } @@ -333,10 +297,9 @@ impl Store { with_payload: Some(WithPayloadInterface::Bool(true)), filter: Some(Filter::new_must(Condition::Field( FieldCondition::new_match( - JsonPath { - first_key: "path".to_string(), - rest: vec![], - }, + JsonPath::from_str("path").map_err(|_| { + anyhow!("An error occurred while creating JSONPath from 'path'") + })?, Match::from(AnyVariants::Strings(chunk.iter().cloned().collect())), ), ))), @@ -392,7 +355,7 @@ impl Store { for meta in chunk { let payload_json = serde_json::to_value(meta).map_err(|e| anyhow!(e.to_string()))?; - let vector: Vec = vec![]; + let vector: Vec = vec![1_f32]; let point = make_point(meta.id(), vector, payload_json, DOCUMENTS_VECTOR_NAME); points.push(point); } @@ -403,7 +366,7 @@ impl Store { .update(operation) .map_err(|e| anyhow!(e.to_string()))?; - // flush to disk + // // flush to disk self.flush_documents(); } @@ -513,10 +476,9 @@ impl Store { }))), filter: Some(Filter::new_must(Condition::Field( FieldCondition::new_match( - JsonPath { - first_key: "path".to_string(), - rest: vec![], - }, + JsonPath::from_str("path").map_err(|_| { + anyhow!("An error occurred while creating JSONPath from 'path'") + })?, Match::from(AnyVariants::Strings(chunk.iter().cloned().collect())), ), ))), @@ -535,7 +497,7 @@ impl Store { let ranked_line = RankedLine { line_number: line_embd.line_number, path: line_embd.path, - distance: result.score, + distance: 1_f32 - result.score, }; all_results.push(ranked_line); } @@ -695,226 +657,662 @@ fn payload_to_line_embedding(payload: &Payload) -> Result { serde_json::from_value(json_value).map_err(|e| anyhow!(e.to_string())) } -// #[cfg(test)] -// mod tests { -// use super::*; -// use tempfile::TempDir; - -// // Helper function to create a test store -// fn create_test_store() -> (Store, TempDir) { -// let temp_dir = TempDir::new().expect("Failed to create temp dir"); -// let store = Store::open(temp_dir.path().to_str().unwrap()).expect("Failed to create store"); -// (store, temp_dir) -// } - -// // Helper function to create test documents -// fn create_test_docs() -> (Vec, Vec>) { -// let docs = vec![ -// DocMeta { -// path: "/test/doc1.txt".to_string(), -// size_bytes: 100, -// mtime: 1234567890, -// _version: CURRENT_EMBEDDING_VERSION, -// }, -// DocMeta { -// path: "/test/doc2.txt".to_string(), -// size_bytes: 200, -// mtime: 1234567891, -// _version: CURRENT_EMBEDDING_VERSION, -// }, -// DocMeta { -// path: "/test/doc3.txt".to_string(), -// size_bytes: 150, -// mtime: 1234567892, -// _version: CURRENT_EMBEDDING_VERSION, -// }, -// ]; - -// let embeddings = vec![ -// vec![0.1, 0.2, 0.3, 0.4], -// vec![0.5, 0.6, 0.7, 0.8], -// vec![0.9, 1.0, 1.1, 1.2], -// ]; - -// (docs, embeddings) -// } - -// #[test] -// fn test_store_creation_and_stats_empty() { -// let (store, _temp_dir) = create_test_store(); - -// let stats = store.get_stats().expect("Failed to get stats"); - -// assert_eq!(stats.total_documents, 0); -// assert!(stats.has_index); -// assert_eq!(stats.index_type, Some("HNSW".to_string())); -// } - -// #[test] -// fn test_upsert_documents_and_stats() { -// let (store, _temp_dir) = create_test_store(); -// let (docs, embeddings) = create_test_docs(); - -// // Insert documents -// store -// .upsert_document_metadata(&docs) -// .expect("Failed to upsert documents"); - -// let line_embeddings: Vec = docs -// .iter() -// .enumerate() -// .map(|(i, doc)| LineEmbedding { -// path: doc.path.clone(), -// line_number: i as i32, -// embedding: embeddings[i].clone(), -// }) -// .collect(); - -// store -// .upsert_line_embeddings(&line_embeddings) -// .expect("Failed to upsert line embeddings"); - -// // Check stats -// let stats = store.get_stats().expect("Failed to get stats"); - -// assert_eq!(stats.total_documents, 3); -// assert!(stats.has_index); -// assert_eq!(stats.index_type, Some("HNSW".to_string())); -// } - -// #[test] -// fn test_get_all_document_paths() { -// let (store, _temp_dir) = create_test_store(); -// let (docs, _embeddings) = create_test_docs(); - -// // Initially should be empty -// let paths = store -// .get_all_document_paths() -// .expect("Failed to get document paths"); -// assert!(paths.is_empty()); - -// // Insert documents -// store -// .upsert_document_metadata(&docs) -// .expect("Failed to upsert documents"); - -// // Should now have paths -// let paths = store -// .get_all_document_paths() -// .expect("Failed to get document paths"); - -// assert_eq!(paths.len(), 3); -// assert!(paths.contains(&"/test/doc1.txt".to_string())); -// assert!(paths.contains(&"/test/doc2.txt".to_string())); -// assert!(paths.contains(&"/test/doc3.txt".to_string())); -// } - -// #[test] -// fn test_get_existing_docs() { -// let (store, _temp_dir) = create_test_store(); -// let (docs, _embeddings) = create_test_docs(); - -// // Insert documents -// store -// .upsert_document_metadata(&docs) -// .expect("Failed to upsert documents"); - -// // Test getting existing docs -// let query_paths = vec![ -// "/test/doc1.txt".to_string(), -// "/test/doc2.txt".to_string(), -// "/test/nonexistent.txt".to_string(), -// ]; - -// let existing = store -// .get_existing_docs(&query_paths) -// .expect("Failed to get existing docs"); - -// assert_eq!(existing.len(), 2); -// assert!(existing.contains_key("/test/doc1.txt")); -// assert!(existing.contains_key("/test/doc2.txt")); -// assert!(!existing.contains_key("/test/nonexistent.txt")); - -// // Verify metadata -// let doc1_meta = existing.get("/test/doc1.txt").unwrap(); -// assert_eq!(doc1_meta.size_bytes, 100); -// assert_eq!(doc1_meta.mtime, 1234567890); -// } - -// #[test] -// fn test_delete_documents() { -// let (store, _temp_dir) = create_test_store(); -// let (docs, _embeddings) = create_test_docs(); - -// // Insert documents -// store -// .upsert_document_metadata(&docs) -// .expect("Failed to upsert documents"); - -// // Verify all documents exist -// let all_paths = store -// .get_all_document_paths() -// .expect("Failed to get document paths"); -// assert_eq!(all_paths.len(), 3); - -// // Delete some documents -// let to_delete = vec!["/test/doc1.txt".to_string(), "/test/doc3.txt".to_string()]; -// store -// .delete_documents(&to_delete) -// .expect("Failed to delete documents"); - -// // Verify only doc2 remains -// let remaining_paths = store -// .get_all_document_paths() -// .expect("Failed to get document paths"); -// assert_eq!(remaining_paths.len(), 1); -// assert!(remaining_paths.contains(&"/test/doc2.txt".to_string())); -// } - -// #[test] -// fn test_upsert_replaces_existing() { -// let (store, _temp_dir) = create_test_store(); - -// // Insert initial document -// let initial_doc = DocMeta { -// path: "/test/doc.txt".to_string(), -// size_bytes: 100, -// mtime: 1000, -// _version: CURRENT_EMBEDDING_VERSION, -// }; -// let _initial_embedding = [vec![1.0, 2.0, 3.0, 4.0]]; - -// store -// .upsert_document_metadata(&[initial_doc]) -// .expect("Failed to insert initial document"); - -// // Verify document exists -// let paths = store.get_all_document_paths().expect("Failed to get paths"); -// assert_eq!(paths.len(), 1); - -// // Update the same document -// let updated_doc = DocMeta { -// path: "/test/doc.txt".to_string(), -// size_bytes: 200, -// mtime: 2000, -// _version: CURRENT_EMBEDDING_VERSION, -// }; -// let _updated_embedding = [vec![5.0, 6.0, 7.0, 8.0]]; - -// store -// .upsert_document_metadata(&[updated_doc]) -// .expect("Failed to update document"); - -// // Should still have only one document -// let paths = store.get_all_document_paths().expect("Failed to get paths"); -// assert_eq!(paths.len(), 1); - -// // Verify metadata was updated -// let existing = store -// .get_existing_docs(&["/test/doc.txt".to_string()]) -// .expect("Failed to get existing docs"); -// let doc_meta = existing.get("/test/doc.txt").unwrap(); -// assert_eq!(doc_meta.size_bytes, 200); -// assert_eq!(doc_meta.mtime, 2000); -// } -// } +#[cfg(test)] +mod tests { + use super::*; + use serde_json::json; + use tempfile::TempDir; + + // Helper function to create a test store + fn create_test_store() -> (Store, TempDir) { + let temp_dir = TempDir::new().expect("Failed to create temp dir"); + let store = Store::open(temp_dir.path().to_str().unwrap()).expect("Failed to create store"); + (store, temp_dir) + } + + // Helper function to create test documents + fn create_test_docs() -> (Vec, Vec>) { + let docs = vec![ + DocMeta { + path: "/test/doc1.txt".to_string(), + size_bytes: 100, + mtime: 1234567890, + _version: CURRENT_EMBEDDING_VERSION, + }, + DocMeta { + path: "/test/doc2.txt".to_string(), + size_bytes: 200, + mtime: 1234567891, + _version: CURRENT_EMBEDDING_VERSION, + }, + DocMeta { + path: "/test/doc3.txt".to_string(), + size_bytes: 150, + mtime: 1234567892, + _version: CURRENT_EMBEDDING_VERSION, + }, + ]; + + let embeddings = vec![ + vec![0.1; 256], // All 0.1 + vec![0.5; 256], // All 0.5 + vec![0.75; 256], // All 0.5 + ]; + + (docs, embeddings) + } + + #[test] + fn test_store_creation_and_stats_empty() { + let (store, _temp_dir) = create_test_store(); + + let stats = store.get_stats().expect("Failed to get stats"); + + assert_eq!(stats.total_documents, 0); + assert!(stats.has_index); + assert_eq!(stats.index_type, Some("HNSW".to_string())); + + // explicitly drop store before _temp_dir to avoid + // EdgeShard panicking when trying to flush to a non-existing dir + // (caused by _temp_dir being dropped before store) + drop(store); + drop(_temp_dir); + } + + #[test] + fn test_upsert_documents_and_stats() { + let (store, _temp_dir) = create_test_store(); + let (docs, embeddings) = create_test_docs(); + + // Insert documents + store + .upsert_document_metadata(&docs) + .expect("Failed to upsert documents"); + + let line_embeddings: Vec = docs + .iter() + .enumerate() + .map(|(i, doc)| LineEmbedding { + path: doc.path.clone(), + line_number: i as i32, + embedding: embeddings[i].clone(), + }) + .collect(); + + store + .upsert_line_embeddings(&line_embeddings) + .expect("Failed to upsert line embeddings"); + + // Check stats + let stats = store.get_stats().expect("Failed to get stats"); + + assert_eq!(stats.total_documents, 3); + assert!(stats.has_index); + assert_eq!(stats.index_type, Some("HNSW".to_string())); + + drop(store); + drop(_temp_dir); + } + + #[test] + fn test_search_line_embeddings() { + let (store, _temp_dir) = create_test_store(); + let (docs, embeddings) = create_test_docs(); + + let line_embeddings: Vec = docs + .iter() + .enumerate() + .map(|(i, doc)| LineEmbedding { + path: doc.path.clone(), + line_number: i as i32, + embedding: embeddings[i].clone(), + }) + .collect(); + + store + .upsert_line_embeddings(&line_embeddings) + .expect("Failed to upsert line embeddings"); + + // Perform search + let exact_match_query: Vec = vec![0.1; 256]; + let search_results = store + .search_line_embeddings( + exact_match_query.as_slice(), + &["/test/doc1.txt".to_string()], + 1, + Some(0.1_f32), + ) + .expect("Should be able to retrieve search results"); + assert_eq!(search_results.len(), 1); + assert_eq!(search_results[0].line_number, 0); + assert_eq!(search_results[0].path, docs[0].path); + assert!(search_results[0].distance < 0.1); + + drop(store); + drop(_temp_dir); + } + + #[test] + fn test_get_all_document_paths() { + let (store, _temp_dir) = create_test_store(); + let (docs, _embeddings) = create_test_docs(); + + // Initially should be empty + let paths = store + .get_all_document_paths() + .expect("Failed to get document paths"); + assert!(paths.is_empty()); + + // Insert documents + store + .upsert_document_metadata(&docs) + .expect("Failed to upsert documents"); + + // Should now have paths + let paths = store + .get_all_document_paths() + .expect("Failed to get document paths"); + + assert_eq!(paths.len(), 3); + assert!(paths.contains(&"/test/doc1.txt".to_string())); + assert!(paths.contains(&"/test/doc2.txt".to_string())); + assert!(paths.contains(&"/test/doc3.txt".to_string())); + + drop(store); + drop(_temp_dir); + } + + #[test] + fn test_get_existing_docs() { + let (store, _temp_dir) = create_test_store(); + let (docs, _embeddings) = create_test_docs(); + + // Insert documents + store + .upsert_document_metadata(&docs) + .expect("Failed to upsert documents"); + + // Test getting existing docs + let query_paths = vec![ + "/test/doc1.txt".to_string(), + "/test/doc2.txt".to_string(), + "/test/nonexistent.txt".to_string(), + ]; + + let existing = store + .get_existing_docs(&query_paths) + .expect("Failed to get existing docs"); + + assert_eq!(existing.len(), 2); + assert!(existing.contains_key("/test/doc1.txt")); + assert!(existing.contains_key("/test/doc2.txt")); + assert!(!existing.contains_key("/test/nonexistent.txt")); + + // Verify metadata + let doc1_meta = existing.get("/test/doc1.txt").unwrap(); + assert_eq!(doc1_meta.size_bytes, 100); + assert_eq!(doc1_meta.mtime, 1234567890); + + drop(store); + drop(_temp_dir); + } + + #[test] + fn test_delete_documents() { + let (store, _temp_dir) = create_test_store(); + let (docs, _embeddings) = create_test_docs(); + + // Insert documents + store + .upsert_document_metadata(&docs) + .expect("Failed to upsert documents"); + + // Verify all documents exist + let all_paths = store + .get_all_document_paths() + .expect("Failed to get document paths"); + assert_eq!(all_paths.len(), 3); + + // Delete some documents + let to_delete = vec!["/test/doc1.txt".to_string(), "/test/doc3.txt".to_string()]; + store + .delete_documents(&to_delete) + .expect("Failed to delete documents"); + + // Verify only doc2 remains + let remaining_paths = store + .get_all_document_paths() + .expect("Failed to get document paths"); + assert_eq!(remaining_paths.len(), 1); + assert!(remaining_paths.contains(&"/test/doc2.txt".to_string())); + + drop(store); + drop(_temp_dir); + } + + #[test] + fn test_upsert_replaces_existing() { + let (store, _temp_dir) = create_test_store(); + + // Insert initial document + let initial_doc = DocMeta { + path: "/test/doc.txt".to_string(), + size_bytes: 100, + mtime: 1000, + _version: CURRENT_EMBEDDING_VERSION, + }; + let _initial_embedding = [vec![1.0, 2.0, 3.0, 4.0]]; + + store + .upsert_document_metadata(&[initial_doc]) + .expect("Failed to insert initial document"); + + // Verify document exists + let paths = store.get_all_document_paths().expect("Failed to get paths"); + assert_eq!(paths.len(), 1); + + // Update the same document + // NOTE: this works because the id of the data point depends on the hashing of the path. + // same path = same hash -> the update results in a replacement rather than an append + let updated_doc = DocMeta { + path: "/test/doc.txt".to_string(), + size_bytes: 200, + mtime: 2000, + _version: CURRENT_EMBEDDING_VERSION, + }; + let _updated_embedding = [vec![5.0, 6.0, 7.0, 8.0]]; + + store + .upsert_document_metadata(&[updated_doc]) + .expect("Failed to update document"); + + // Should still have only one document + let paths = store.get_all_document_paths().expect("Failed to get paths"); + assert_eq!(paths.len(), 1); + + // Verify metadata was updated + let existing = store + .get_existing_docs(&["/test/doc.txt".to_string()]) + .expect("Failed to get existing docs"); + let doc_meta = existing.get("/test/doc.txt").unwrap(); + assert_eq!(doc_meta.size_bytes, 200); + assert_eq!(doc_meta.mtime, 2000); + + drop(store); + drop(_temp_dir); + } + + #[test] + fn test_doc_meta_id_generation() { + let doc1 = DocMeta { + path: "test1.txt".to_string(), + size_bytes: 100, + mtime: 1000, + _version: CURRENT_EMBEDDING_VERSION, + }; + let doc2 = DocMeta { + path: "test2.txt".to_string(), + size_bytes: 100, + mtime: 1000, + _version: CURRENT_EMBEDDING_VERSION, + }; + + let id1 = doc1.id(); + let id2 = doc2.id(); + + // IDs should be different (random generation) + assert_ne!(id1, id2); + } + + // Helper to create test files for analyze_document_states tests + fn create_test_files(temp_dir: &tempfile::TempDir) -> Vec { + use std::fs; + + let file1_path = temp_dir.path().join("test1.txt"); + let file2_path = temp_dir.path().join("test2.txt"); + let file3_path = temp_dir.path().join("test3.txt"); + + fs::write(&file1_path, "This is test file 1\nWith multiple lines").unwrap(); + fs::write(&file2_path, "This is test file 2\nWith different content").unwrap(); + fs::write(&file3_path, "This is test file 3\nWith more content").unwrap(); + + vec![ + file1_path.to_string_lossy().to_string(), + file2_path.to_string_lossy().to_string(), + file3_path.to_string_lossy().to_string(), + ] + } + + #[test] + fn test_analyze_document_states_all_new() { + use tempfile::TempDir; + + let temp_dir = TempDir::new().unwrap(); + let file_paths = create_test_files(&temp_dir); + + // Create empty store + let store = Store::open(temp_dir.path().to_str().unwrap()).unwrap(); + + let states = store.analyze_document_states(&file_paths).unwrap(); + + assert_eq!(states.len(), 3); + + // All should be new documents + for state in &states { + if let DocumentState::New(doc_info) = state { + assert!(file_paths.contains(&doc_info.filename)); + assert!(!doc_info.content.is_empty()); + assert!(doc_info.meta.size_bytes > 0); + assert!(doc_info.meta.mtime > 0); + } else { + panic!("Expected New document state"); + } + } + + drop(store); + drop(temp_dir); + } + + #[test] + fn test_analyze_document_states_unchanged() { + use std::fs; + use std::time::UNIX_EPOCH; + use tempfile::TempDir; + + let temp_dir = TempDir::new().unwrap(); + let file_paths = create_test_files(&temp_dir); + + // Create store and add documents + let store = Store::open(temp_dir.path().to_str().unwrap()).unwrap(); + + // Insert documents with current metadata + let mut docs = Vec::new(); + for path in &file_paths { + let metadata = fs::metadata(path).unwrap(); + let doc_meta = DocMeta { + path: path.clone(), + size_bytes: metadata.len(), + mtime: metadata + .modified() + .unwrap() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs() as i64, + _version: CURRENT_EMBEDDING_VERSION, + }; + docs.push(doc_meta); + } + store.upsert_document_metadata(&docs).unwrap(); + + // Analyze states - should all be unchanged + let states = store.analyze_document_states(&file_paths).unwrap(); + + assert_eq!(states.len(), 3); + + for state in &states { + if let DocumentState::Unchanged(filename) = state { + assert!(file_paths.contains(filename)); + } else { + panic!("Expected Unchanged document state"); + } + } + + drop(store); + drop(temp_dir); + } + + #[test] + fn test_analyze_document_states_changed() { + use tempfile::TempDir; + + let temp_dir = TempDir::new().unwrap(); + let file_paths = create_test_files(&temp_dir); + + // Create store and add documents with old metadata + let store = Store::open(temp_dir.path().to_str().unwrap()).unwrap(); + + let mut docs = Vec::new(); + for path in &file_paths { + let doc_meta = DocMeta { + path: path.clone(), + size_bytes: 10, // Different from actual size + mtime: 1000, // Old timestamp + _version: 1, // simulate old version + }; + docs.push(doc_meta); + } + store.upsert_document_metadata(&docs).unwrap(); + + // Analyze states - should all be changed + let states = store.analyze_document_states(&file_paths).unwrap(); + + assert_eq!(states.len(), 3); + + for state in &states { + if let DocumentState::Changed(doc_info) = state { + assert!(file_paths.contains(&doc_info.filename)); + assert!(!doc_info.content.is_empty()); + } else { + panic!("Expected Changed document state"); + } + } + + drop(store); + drop(temp_dir); + } + + #[test] + fn test_analyze_document_states_mixed() { + use std::fs; + use std::time::UNIX_EPOCH; + use tempfile::TempDir; + + let temp_dir = TempDir::new().unwrap(); + let file_paths = create_test_files(&temp_dir); + + // Create store and add only the first document + let store = Store::open(temp_dir.path().to_str().unwrap()).unwrap(); + + let metadata = fs::metadata(&file_paths[0]).unwrap(); + let doc_meta = DocMeta { + path: file_paths[0].clone(), + size_bytes: metadata.len(), + mtime: metadata + .modified() + .unwrap() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs() as i64, + _version: CURRENT_EMBEDDING_VERSION, + }; + store.upsert_document_metadata(&[doc_meta]).unwrap(); + + // Analyze states + let states = store.analyze_document_states(&file_paths).unwrap(); + + assert_eq!(states.len(), 3); + + // First should be unchanged, others should be new + let mut unchanged_count = 0; + let mut new_count = 0; + + for state in &states { + match state { + DocumentState::Unchanged(filename) => { + assert_eq!(filename, &file_paths[0]); + unchanged_count += 1; + } + DocumentState::New(doc_info) => { + assert!(file_paths[1..].contains(&doc_info.filename)); + new_count += 1; + } + _ => panic!("Unexpected document state"), + } + } + + assert_eq!(unchanged_count, 1); + assert_eq!(new_count, 2); + + drop(store); + drop(temp_dir); + } + + #[test] + fn test_analyze_document_states_version_mismatch() { + use std::fs; + use std::time::UNIX_EPOCH; + use tempfile::TempDir; + + let temp_dir = TempDir::new().unwrap(); + let file_paths = create_test_files(&temp_dir); + + // Create store and add documents with old version but correct size/mtime + let store = Store::open(temp_dir.path().to_str().unwrap()).unwrap(); + + let mut old_docs = Vec::new(); + for path in &file_paths { + let metadata = fs::metadata(path).unwrap(); + let doc_meta = DocMeta { + path: path.clone(), + size_bytes: metadata.len(), + mtime: metadata + .modified() + .unwrap() + .duration_since(UNIX_EPOCH) + .unwrap() + .as_secs() as i64, + _version: 1, // older version than CURRENT_EMBEDDING_VERSION (2) + }; + old_docs.push(doc_meta); + } + store.upsert_document_metadata(&old_docs).unwrap(); + + let states = store.analyze_document_states(&file_paths).unwrap(); + assert_eq!(states.len(), 3); + for state in &states { + match state { + DocumentState::Changed(info) => { + assert!(file_paths.contains(&info.filename)); + } + _ => panic!("Expected Changed state due to version mismatch"), + } + } + + drop(store); + drop(temp_dir); + } + + #[test] + fn test_analyze_document_states_nonexistent_file() { + use tempfile::TempDir; + + let temp_dir = TempDir::new().unwrap(); + let mut file_paths = create_test_files(&temp_dir); + + // Add a nonexistent file to the list + file_paths.push("/nonexistent/file.txt".to_string()); + + let store = Store::open(temp_dir.path().to_str().unwrap()).unwrap(); + + let states = store.analyze_document_states(&file_paths).unwrap(); + + // Should only have states for existing files + assert_eq!(states.len(), 3); + + for state in &states { + if let DocumentState::New(doc_info) = state { + assert_ne!(doc_info.filename, "/nonexistent/file.txt"); + } + } + + drop(store); + drop(temp_dir); + } + + #[test] + fn test_json_to_payload_doc_meta() { + let doc_meta = DocMeta { + path: "hello.txt".to_string(), + size_bytes: 1200_u64, + mtime: 1234567890, + _version: CURRENT_EMBEDDING_VERSION, + }; + let doc_meta_json = + serde_json::to_value(doc_meta).expect("Should be able to conver DocMeta to JSON Value"); + let qdrant_payload = json_to_payload(doc_meta_json); + assert!(qdrant_payload.contains_key("path")); + assert!(qdrant_payload.contains_key("size_bytes")); + assert!(qdrant_payload.contains_key("mtime")); + assert!(qdrant_payload.contains_key("_version")); + for (k, v) in qdrant_payload.0.iter() { + match k.as_str() { + "path" => assert_eq!(v, &Value::from("hello.txt")), + "size_bytes" => assert_eq!(v, &Value::from(1200)), + "mtime" => assert_eq!(v, &Value::from(1234567890)), + "_version" => assert_eq!(v, &Value::from(CURRENT_EMBEDDING_VERSION)), + _ => panic!("Unexpected key: {}", k), + } + } + } + + #[test] + fn test_json_to_payload_line_embedding() { + let line_embedding = LineEmbedding { + path: "hello.txt".to_string(), + line_number: 12, + embedding: vec![0.1, 0.3, 0.4, 0.5], + }; + let doc_meta_json = serde_json::to_value(line_embedding) + .expect("Should be able to conver LineEmbedding to JSON Value"); + let qdrant_payload = json_to_payload(doc_meta_json); + assert!(qdrant_payload.contains_key("path")); + assert!(qdrant_payload.contains_key("line_number")); + assert!(!qdrant_payload.contains_key("embedding")); + for (k, v) in qdrant_payload.0.iter() { + match k.as_str() { + "path" => assert_eq!(v, &Value::from("hello.txt")), + "line_number" => assert_eq!(v, &Value::from(12)), + _ => panic!("Unexpected key: {}", k), + } + } + } + + #[test] + fn test_payload_to_doc_meta() { + let json_value = json!({ + "path": "hello.txt", + "size_bytes": 1000_u64, + "mtime": 1234567890_i64, + "_version": CURRENT_EMBEDDING_VERSION, + }); + let map: serde_json::Map = json_value + .as_object() + .expect("Should be able to convert JSON value to map") + .clone(); + let payload = Payload::from(map); + let doc_meta = + payload_to_doc_meta(&payload).expect("Should be able to convert Payload to DocMeta"); + assert_eq!(doc_meta.path, "hello.txt"); + assert_eq!(doc_meta.size_bytes, 1000_u64); + assert_eq!(doc_meta.mtime, 1234567890_i64); + assert_eq!(doc_meta._version, CURRENT_EMBEDDING_VERSION); + } + + #[test] + fn test_payload_to_line_embedding() { + let json_value = json!({ + "path": "hello.txt", + "line_number": 12_i32, + }); + let map: serde_json::Map = json_value + .as_object() + .expect("Should be able to convert JSON value to map") + .clone(); + let payload = Payload::from(map); + let line_embedding = payload_to_line_embedding(&payload) + .expect("Should be able to convert Payload to DocMeta"); + assert_eq!(line_embedding.path, "hello.txt"); + assert_eq!(line_embedding.line_number, 12_i32); + assert!(line_embedding.embedding.is_empty()); + } +} From 5381a9b810ce2345491aed7a8a1c3a2127e70316 Mon Sep 17 00:00:00 2001 From: "Clelia (Astra) Bertelli" Date: Fri, 6 Feb 2026 21:08:07 +0100 Subject: [PATCH 5/8] chore: version bump --- Cargo.lock | 2 +- Cargo.toml | 2 +- package.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4a60ed6f..ab0bc45b 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4077,7 +4077,7 @@ checksum = "b12e76d157a900eb52e81bc6e9f3069344290341720e9178cde2407113ac8d89" [[package]] name = "semtools" -version = "2.0.0" +version = "3.0.0" dependencies = [ "anyhow", "async-openai", diff --git a/Cargo.toml b/Cargo.toml index 248db743..daff399a 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "semtools" -version = "2.0.0" +version = "3.0.0" edition = "2024" license = "MIT" description = "Semantic search and document parsing tools for the command line" diff --git a/package.json b/package.json index 8f7e0658..874177ad 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@llamaindex/semtools", - "version": "2.0.0", + "version": "3.0.0", "description": "Semantic search and document parsing tools for the command line (Rust-backed, npm-distributed)", "license": "MIT", "author": "LlamaIndex", From 7ff4daf38b2358e6a9c77f0a1208df836c65323b Mon Sep 17 00:00:00 2001 From: Logan Markewich Date: Fri, 6 Feb 2026 15:28:27 -0600 Subject: [PATCH 6/8] some minor nits, made the hashing stable --- src/ask/tool_calling.rs | 5 +++++ src/workspace/store.rs | 37 ++++++++++++++++++++++++------------- 2 files changed, 29 insertions(+), 13 deletions(-) diff --git a/src/ask/tool_calling.rs b/src/ask/tool_calling.rs index ba9b5700..b2ca94d4 100644 --- a/src/ask/tool_calling.rs +++ b/src/ask/tool_calling.rs @@ -117,6 +117,11 @@ pub async fn call_tool( println!(" start_line: {}", start_line); println!(" end_line: {}", end_line); + // Update files_searched in cur_output + if !cur_output.files_searched.contains(&path.to_string()) { + cur_output.files_searched.push(path.to_string()); + } + ReadTool::read(path, start_line, end_line).await } _ => Err(anyhow::anyhow!("Unknown tool: {}", name)), diff --git a/src/workspace/store.rs b/src/workspace/store.rs index 7ef954f7..3388f6f0 100644 --- a/src/workspace/store.rs +++ b/src/workspace/store.rs @@ -22,8 +22,6 @@ use shard::query::query_enum::QueryEnum; use shard::query::{ScoringQuery, ShardQueryRequest}; use shard::scroll::ScrollRequestInternal; use std::collections::HashMap; -use std::collections::hash_map::DefaultHasher; -use std::hash::{Hash, Hasher}; use std::path::Path; use std::str::FromStr; @@ -36,6 +34,9 @@ pub const CURRENT_EMBEDDING_VERSION: u32 = 2; /// Embedding size (needed to inform Qdrant collection when it is instantiated) pub const LINE_EMBEDDING_SIZE: usize = 256; +/// We are not actually storing document-level embeddings, +/// but Qdrant requires a vector size to be defined for the collection, so we use a dummy size of 1. +/// This collection is being used for document-level metadata pub const DOCUMENT_EMBEDDING_SIZE: usize = 1; /// Vector name used in the documents shard @@ -70,19 +71,16 @@ pub struct LineEmbedding { impl DocMeta { pub fn id(&self) -> u64 { // Generate deterministic ID based on path hash for consistent upserts - let mut hasher = DefaultHasher::new(); - self.path.hash(&mut hasher); - hasher.finish() + fnv1a_hash(self.path.as_bytes()) } } impl LineEmbedding { pub fn id(&self) -> u64 { // Generate deterministic ID based on path + line number for consistent upserts - let mut hasher = DefaultHasher::new(); - self.path.hash(&mut hasher); - self.line_number.hash(&mut hasher); - hasher.finish() + let mut bytes = self.path.as_bytes().to_vec(); + bytes.extend_from_slice(&self.line_number.to_le_bytes()); + fnv1a_hash(&bytes) } } @@ -109,9 +107,9 @@ pub struct Store { impl Store { /// Initialize or load storage for a workspace directory pub fn open(workspace_dir: &str) -> Result { - let document_shard_path = Path::new(workspace_dir).join("documents"); + let document_shard_path = Path::new(workspace_dir).join("documents.qdrant"); - let line_embeddings_shard_path = Path::new(workspace_dir).join("line_embeddings"); + let line_embeddings_shard_path = Path::new(workspace_dir).join("line_embeddings.qdrant"); // Create shard directories std::fs::create_dir_all(&document_shard_path)?; @@ -241,7 +239,7 @@ impl Store { )), Condition::Field(FieldCondition::new_match( JsonPath::from_str("_version").map_err(|_| { - anyhow!("An error occurred while creating JSONPath from 'path'") + anyhow!("An error occurred while creating JSONPath from '_version'") })?, Match::new_value(ValueVariants::Integer( CURRENT_EMBEDDING_VERSION as i64, @@ -366,7 +364,7 @@ impl Store { .update(operation) .map_err(|e| anyhow!(e.to_string()))?; - // // flush to disk + // flush to disk self.flush_documents(); } @@ -603,6 +601,19 @@ impl Store { } } +/// Generate a stable hash for a byte slice using the FNV-1a algorithm. +fn fnv1a_hash(bytes: &[u8]) -> u64 { + const FNV_OFFSET_BASIS: u64 = 0xcbf29ce484222325; + const FNV_PRIME: u64 = 0x100000001b3; + + let mut hash = FNV_OFFSET_BASIS; + for &byte in bytes { + hash ^= byte as u64; + hash = hash.wrapping_mul(FNV_PRIME); + } + hash +} + /// Create a point struct for upserting. fn make_point( id: u64, From 8b5c78cebe5a880434fe9ea69c1d511d0f7fdd2f Mon Sep 17 00:00:00 2001 From: "Clelia (Astra) Bertelli" Date: Mon, 9 Feb 2026 15:34:27 +0100 Subject: [PATCH 7/8] fix: avoid re-configuring shard on existing data folder --- src/workspace/store.rs | 96 +++++++++++++++++++++++------------------- 1 file changed, 52 insertions(+), 44 deletions(-) diff --git a/src/workspace/store.rs b/src/workspace/store.rs index 3388f6f0..7c9ddcbd 100644 --- a/src/workspace/store.rs +++ b/src/workspace/store.rs @@ -111,57 +111,65 @@ impl Store { let line_embeddings_shard_path = Path::new(workspace_dir).join("line_embeddings.qdrant"); - // Create shard directories - std::fs::create_dir_all(&document_shard_path)?; - std::fs::create_dir_all(&line_embeddings_shard_path)?; - - // Create segment config for the shard - let mut vector_data_document_shard = HashMap::new(); - vector_data_document_shard.insert( - DOCUMENTS_VECTOR_NAME.to_string(), - VectorDataConfig { - size: DOCUMENT_EMBEDDING_SIZE, - distance: Distance::Cosine, - storage_type: VectorStorageType::ChunkedMmap, - index: Default::default(), - quantization_config: None, - multivector_config: None, - datatype: None, - }, - ); + let segment_config_document_shard: Option = if !document_shard_path.exists() + { + std::fs::create_dir_all(&document_shard_path)?; + // Create segment config for the shard + let mut vector_data_document_shard = HashMap::new(); + vector_data_document_shard.insert( + DOCUMENTS_VECTOR_NAME.to_string(), + VectorDataConfig { + size: DOCUMENT_EMBEDDING_SIZE, + distance: Distance::Cosine, + storage_type: VectorStorageType::ChunkedMmap, + index: Default::default(), + quantization_config: None, + multivector_config: None, + datatype: None, + }, + ); - let segment_config_document_shard = SegmentConfig { - vector_data: vector_data_document_shard, - sparse_vector_data: HashMap::new(), - payload_storage_type: PayloadStorageType::Mmap, + Some(SegmentConfig { + vector_data: vector_data_document_shard, + sparse_vector_data: HashMap::new(), + payload_storage_type: PayloadStorageType::Mmap, + }) + } else { + None }; - let documents_shard = - EdgeShard::load(&document_shard_path, Some(segment_config_document_shard))?; - - let mut vector_data_line_embeddings_shard = HashMap::new(); - vector_data_line_embeddings_shard.insert( - LINE_EMBEDDINGS_VECTOR_NAME.to_string(), - VectorDataConfig { - size: LINE_EMBEDDING_SIZE, - distance: Distance::Cosine, - storage_type: VectorStorageType::ChunkedMmap, - index: Default::default(), - quantization_config: None, - multivector_config: None, - datatype: None, - }, - ); + // Create shard directories + let segment_config_line_embeddings_shard: Option = + if !line_embeddings_shard_path.exists() { + std::fs::create_dir_all(&line_embeddings_shard_path)?; + let mut vector_data_line_embeddings_shard = HashMap::new(); + vector_data_line_embeddings_shard.insert( + LINE_EMBEDDINGS_VECTOR_NAME.to_string(), + VectorDataConfig { + size: LINE_EMBEDDING_SIZE, + distance: Distance::Cosine, + storage_type: VectorStorageType::ChunkedMmap, + index: Default::default(), + quantization_config: None, + multivector_config: None, + datatype: None, + }, + ); - let segment_config_line_embeddings_shard = SegmentConfig { - vector_data: vector_data_line_embeddings_shard, - sparse_vector_data: HashMap::new(), - payload_storage_type: PayloadStorageType::Mmap, - }; + Some(SegmentConfig { + vector_data: vector_data_line_embeddings_shard, + sparse_vector_data: HashMap::new(), + payload_storage_type: PayloadStorageType::Mmap, + }) + } else { + None + }; + + let documents_shard = EdgeShard::load(&document_shard_path, segment_config_document_shard)?; let line_embeddings_shard = EdgeShard::load( &line_embeddings_shard_path, - Some(segment_config_line_embeddings_shard), + segment_config_line_embeddings_shard, )?; Ok(Self { From 8a53947ae24746d0689c816eae6790cb7081c9d8 Mon Sep 17 00:00:00 2001 From: "Clelia (Astra) Bertelli" Date: Mon, 9 Feb 2026 20:47:27 +0100 Subject: [PATCH 8/8] fix: retrieval limit --- .gitignore | 3 +++ src/search/mod.rs | 8 ++++++++ src/workspace/store.rs | 45 ++++++++++++++++++++++++++++++++++++++---- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/.gitignore b/.gitignore index 8ecea0db..00ed1790 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ target .idea legacy_store +.venv/ +arxiv_dataset_1000_papers/ +*.log diff --git a/src/search/mod.rs b/src/search/mod.rs index eb3e6d85..7d52e437 100644 --- a/src/search/mod.rs +++ b/src/search/mod.rs @@ -190,11 +190,19 @@ pub async fn search_with_workspace( // Step 3: Update workspace with new/changed line embeddings if !line_embeddings_to_upsert.is_empty() { + eprintln!( + "Updating workspace with {} lines from new/changed docs...", + line_embeddings_to_upsert.len() + ); store.upsert_line_embeddings(&line_embeddings_to_upsert)?; } // Also update document metadata for tracking changes if !docs_to_upsert.is_empty() { + eprintln!( + "Updating workspace with {} new/changed documents...", + docs_to_upsert.len() + ); store.upsert_document_metadata(&docs_to_upsert)?; } diff --git a/src/workspace/store.rs b/src/workspace/store.rs index 7c9ddcbd..91be766c 100644 --- a/src/workspace/store.rs +++ b/src/workspace/store.rs @@ -45,6 +45,9 @@ const DOCUMENTS_VECTOR_NAME: &str = "documents"; /// Vector name used in the line embeddings shard const LINE_EMBEDDINGS_VECTOR_NAME: &str = "line_embeddings"; +/// Default limit for Qdrant retrieval +const DEFAULT_RETRIEVAL_LIMIT: usize = 10000; + #[derive(Debug, Clone, Serialize, Deserialize)] pub struct DocMeta { pub path: String, @@ -180,6 +183,11 @@ impl Store { pub fn get_existing_docs(&self, paths: &[String]) -> Result> { let mut existing = HashMap::new(); + let docs_count = self.count_documents(); + let retrieval_limit = match docs_count { + Ok(count) => count, + Err(_) => DEFAULT_RETRIEVAL_LIMIT, + }; for chunk in paths.chunks(1000) { let scroll_result = self.documents_shard.scroll(ScrollRequestInternal { @@ -198,7 +206,7 @@ impl Store { should: None, min_should: None, }), - limit: None, + limit: Some(retrieval_limit), }); let records = match scroll_result { Ok(r) => { @@ -229,6 +237,11 @@ impl Store { } let mut point_ids: Vec = vec![]; + let docs_count = self.count_documents(); + let retrieval_limit = match docs_count { + Ok(count) => count, + Err(_) => DEFAULT_RETRIEVAL_LIMIT, + }; // collect all point IDs to be deleted for chunk in paths.chunks(1000) { @@ -258,7 +271,7 @@ impl Store { should: None, min_should: None, }), - limit: None, + limit: Some(retrieval_limit), }); let records = match scroll_result { Ok(r) => { @@ -293,6 +306,11 @@ impl Store { } let mut point_ids: Vec = vec![]; + let line_embds_count = self.count_line_embeddings(); + let retrieval_limit = match line_embds_count { + Ok(count) => count, + Err(_) => DEFAULT_RETRIEVAL_LIMIT, + }; // collect all point IDs to be deleted for chunk in paths.chunks(1000) { @@ -309,7 +327,7 @@ impl Store { Match::from(AnyVariants::Strings(chunk.iter().cloned().collect())), ), ))), - limit: None, + limit: Some(retrieval_limit), }); let records = match scroll_result { Ok(r) => { @@ -427,6 +445,12 @@ impl Store { /// Get paths for all stored documents pub fn get_all_document_paths(&self) -> Result> { + let docs_count = self.count_documents(); + let retrieval_limit = match docs_count { + Ok(count) => count, + Err(_) => DEFAULT_RETRIEVAL_LIMIT, + }; + let scroll_result = self .documents_shard .scroll(ScrollRequestInternal { @@ -435,7 +459,7 @@ impl Store { with_vector: WithVector::Bool(false), with_payload: Some(WithPayloadInterface::Bool(true)), filter: None, - limit: None, + limit: Some(retrieval_limit), }) .map_err(|e| anyhow!(e.to_string()))?; @@ -598,6 +622,19 @@ impl Store { Ok(count) } + /// Get the number of indexed points in the documents shard + pub fn count_line_embeddings(&self) -> Result { + let count = self + .line_embeddings_shard + .count(CountRequestInternal { + filter: None, + exact: true, + }) + .map_err(|e| anyhow!(e.to_string()))?; + + Ok(count) + } + /// Flush all documents data to disk. pub fn flush_documents(&self) { self.documents_shard.flush();