From 1d7266311df9068357c159cc21efe059cea08bf7 Mon Sep 17 00:00:00 2001 From: xav-db Date: Sat, 13 Sep 2025 21:13:54 +0100 Subject: [PATCH 1/9] implementing configurable vector similarity --- helix-db/Cargo.toml | 13 ++- helix-db/benches/hnsw_benches.rs | 8 +- helix-db/src/helix_engine/bm25/bm25.rs | 48 ++++++----- helix-db/src/helix_engine/bm25/bm25_tests.rs | 53 +++++++++--- helix-db/src/helix_engine/tests/hnsw_tests.rs | 85 ++++++++++--------- .../tests/traversal_tests/drop_tests.rs | 10 ++- .../traversal_tests/edge_traversal_tests.rs | 17 +++- .../tests/traversal_tests/util_tests.rs | 60 +++++++++++-- .../traversal_tests/vector_traversal_tests.rs | 68 ++++++++++++--- .../src/helix_engine/tests/vector_tests.rs | 18 ++-- .../ops/vectors/brute_force_search.rs | 9 +- .../traversal_core/ops/vectors/insert.rs | 6 +- .../traversal_core/ops/vectors/search.rs | 18 ++-- helix-db/src/helix_engine/vector_core/hnsw.rs | 15 ++-- .../src/helix_engine/vector_core/vector.rs | 11 ++- .../helix_engine/vector_core/vector_core.rs | 22 +++-- .../vector_core/vector_distance.rs | 27 ++++-- helix-db/src/helix_gateway/mcp/tools.rs | 50 ++++++++--- helix-db/src/helix_gateway/mcp/tools_tests.rs | 21 +++-- 19 files changed, 384 insertions(+), 175 deletions(-) diff --git a/helix-db/Cargo.toml b/helix-db/Cargo.toml index 1b6fed8c..45a2d42a 100644 --- a/helix-db/Cargo.toml +++ b/helix-db/Cargo.toml @@ -15,7 +15,7 @@ helix-metrics = { path = "../metrics" } # external dependencies tokio = { version = "1.44.2", features = ["full"] } serde = { version = "1.0.217", features = ["derive"] } -bincode = "1.3.3" # TODO: Figure out bincode 2 impl with current serde impl +bincode = "1.3.3" # TODO: Figure out bincode 2 impl with current serde impl sonic-rs = "0.5.0" inventory = "0.3.16" twox-hash = "2.1.0" @@ -23,7 +23,10 @@ heed3 = "0.22.0" uuid = { version = "1.12.1", features = ["v4", "v6", "fast-rng"] } rand = "0.9.0" chrono = "0.4.39" -flume = { version = "0.11.1", default-features = false, features = ["async", "select"] } +flume = { version = "0.11.1", default-features = false, features = [ + "async", + "select", +] } itertools = "0.14.0" tempfile = "3.20.0" paste = "1.0.15" @@ -55,12 +58,8 @@ num_cpus = "1.17" # TODO: [features] debug-output = ["helix-macros/debug-output"] compiler = ["pest", "pest_derive"] - -# vector features -cosine = [] - build = ["compiler"] -vectors = ["cosine", "url"] +vectors = ["url"] server = ["build", "compiler", "vectors", "reqwest"] full = ["build", "compiler", "vectors"] dev = ["debug-output", "server"] diff --git a/helix-db/benches/hnsw_benches.rs b/helix-db/benches/hnsw_benches.rs index 355c0e27..693fca72 100644 --- a/helix-db/benches/hnsw_benches.rs +++ b/helix-db/benches/hnsw_benches.rs @@ -6,7 +6,7 @@ mod tests { helix_engine::vector_core::{ hnsw::HNSW, vector::HVector, - vector_core::{HNSWConfig, VectorCore}, + vector_core::{HNSWConfig, VectorCore}, vector_distance::SimilarityMethod, }, utils::tqdm::tqdm, }; @@ -84,7 +84,7 @@ mod tests { .iter() .filter_map(|base_vec| { query_hvector - .distance_to(base_vec) + .distance_to(base_vec, &SimilarityMethod::default()) .map(|dist| (base_vec.id.clone(), dist)) .ok() }) @@ -319,7 +319,7 @@ mod tests { let over_all_time = Instant::now(); for (i, data) in base_vectors.iter().enumerate() { let start_time = Instant::now(); - let vec = index.insert::(&mut txn, &data, None).unwrap(); + let vec = index.insert::(&mut txn, &data, None, &SimilarityMethod::default()).unwrap(); let time = start_time.elapsed(); base_all_vectors.push(vec); //println!("{} => inserting in {} ms", i, time.as_millis()); @@ -354,7 +354,7 @@ mod tests { let mut total_search_time = std::time::Duration::from_secs(0); for (qid, query) in query_vectors.iter() { let start_time = Instant::now(); - let results = index.search::(&txn, query, k, "vector", None, false).unwrap(); + let results = index.search::(&txn, query, k, "vector", None, false, &SimilarityMethod::default()).unwrap(); let search_duration = start_time.elapsed(); total_search_time += search_duration; diff --git a/helix-db/src/helix_engine/bm25/bm25.rs b/helix-db/src/helix_engine/bm25/bm25.rs index 06ea3e45..047c7488 100644 --- a/helix-db/src/helix_engine/bm25/bm25.rs +++ b/helix-db/src/helix_engine/bm25/bm25.rs @@ -1,14 +1,14 @@ use crate::{ + debug_println, helix_engine::{ storage_core::HelixGraphStorage, types::GraphError, - vector_core::{hnsw::HNSW, vector::HVector}, + vector_core::{hnsw::HNSW, vector::HVector, vector_distance::SimilarityMethod}, }, protocol::value::Value, - debug_println, }; -use heed3::{types::*, Database, Env, RoTxn, RwTxn}; +use heed3::{Database, Env, RoTxn, RwTxn, types::*}; use serde::{Deserialize, Serialize}; use std::collections::HashMap; use tokio::task; @@ -82,10 +82,10 @@ impl HBM25Config { let doc_lengths_db: Database, U32> = graph_env - .database_options() - .types::, U32>() - .name(DB_BM25_DOC_LENGTHS) - .create(wtxn)?; + .database_options() + .types::, U32>() + .name(DB_BM25_DOC_LENGTHS) + .create(wtxn)?; let term_frequencies_db: Database> = graph_env .database_options() @@ -150,7 +150,7 @@ impl BM25 for HBM25Config { let current_df = self.term_frequencies_db.get(txn, term_bytes)?.unwrap_or(0); self.term_frequencies_db .put(txn, term_bytes, &(current_df + 1))?; - } + } let mut metadata = if let Some(data) = self.metadata_db.get(txn, METADATA_KEY)? { bincode::deserialize::(data)? @@ -350,6 +350,7 @@ pub trait HybridSearch { query_vector: &[f64], alpha: f32, limit: usize, + similarity_method: SimilarityMethod, ) -> impl std::future::Future, GraphError>> + Send; } @@ -360,13 +361,13 @@ impl HybridSearch for HelixGraphStorage { query_vector: &[f64], alpha: f32, limit: usize, + similarity_method: SimilarityMethod, ) -> Result, GraphError> { let query_owned = query.to_string(); let query_vector_owned = query_vector.to_vec(); let graph_env_bm25 = self.graph_env.clone(); let graph_env_vector = self.graph_env.clone(); - let bm25_handle = task::spawn_blocking(move || -> Result, GraphError> { let txn = graph_env_bm25.read_txn()?; match self.bm25.as_ref() { @@ -375,18 +376,20 @@ impl HybridSearch for HelixGraphStorage { } }); - let vector_handle = task::spawn_blocking(move || -> Result>, GraphError> { - let txn = graph_env_vector.read_txn()?; - let results = self.vectors.search:: bool>( - &txn, - &query_vector_owned, - limit * 2, - "vector", - None, - false, - )?; - Ok(Some(results)) - }); + let vector_handle = + task::spawn_blocking(move || -> Result>, GraphError> { + let txn = graph_env_vector.read_txn()?; + let results = self.vectors.search:: bool>( + &txn, + &query_vector_owned, + limit * 2, + "vector", + None, + false, + &similarity_method, + )?; + Ok(Some(results)) + }); let (bm25_results, vector_results) = match tokio::try_join!(bm25_handle, vector_handle) { Ok((a, b)) => (a, b), @@ -409,7 +412,7 @@ impl HybridSearch for HelixGraphStorage { .entry(doc_id) .and_modify(|existing_score| *existing_score += (1.0 - alpha) * similarity) .or_insert((1.0 - alpha) * similarity); // correction made here from score as f32 to similarity - } + } } let mut results = combined_scores.into_iter().collect::>(); @@ -437,4 +440,3 @@ impl BM25Flatten for HashMap { }) } } - diff --git a/helix-db/src/helix_engine/bm25/bm25_tests.rs b/helix-db/src/helix_engine/bm25/bm25_tests.rs index 7e7b0972..80c45fef 100644 --- a/helix-db/src/helix_engine/bm25/bm25_tests.rs +++ b/helix-db/src/helix_engine/bm25/bm25_tests.rs @@ -7,7 +7,7 @@ mod tests { }, storage_core::{HelixGraphStorage, version_info::VersionInfo}, traversal_core::config::Config, - vector_core::{hnsw::HNSW, vector::HVector}, + vector_core::{hnsw::HNSW, vector::HVector, vector_distance::SimilarityMethod}, }, protocol::value::Value, }; @@ -1424,9 +1424,12 @@ mod tests { let mut wtxn = storage.graph_env.write_txn().unwrap(); let vectors = generate_random_vectors(800, 650); for vec in vectors { - let _ = storage - .vectors - .insert:: bool>(&mut wtxn, &vec, None); + let _ = storage.vectors.insert:: bool>( + &mut wtxn, + &vec, + None, + &SimilarityMethod::default(), + ); } wtxn.commit().unwrap(); @@ -1436,7 +1439,13 @@ mod tests { let limit = 10; let result = storage - .hybrid_search(query, &query_vector[0], alpha, limit) + .hybrid_search( + query, + &query_vector[0], + alpha, + limit, + SimilarityMethod::default(), + ) .await; match result { @@ -1466,9 +1475,12 @@ mod tests { let mut wtxn = storage.graph_env.write_txn().unwrap(); let vectors = generate_random_vectors(800, 650); for vec in vectors { - let _ = storage - .vectors - .insert:: bool>(&mut wtxn, &vec, None); + let _ = storage.vectors.insert:: bool>( + &mut wtxn, + &vec, + None, + &SimilarityMethod::default(), + ); } wtxn.commit().unwrap(); @@ -1477,7 +1489,13 @@ mod tests { // alpha = 0.0 (Vector only) let results_vector_only = storage - .hybrid_search(query, &query_vector[0], 0.0, 10) + .hybrid_search( + query, + &query_vector[0], + 0.0, + 10, + SimilarityMethod::default(), + ) .await; match results_vector_only { @@ -1509,9 +1527,12 @@ mod tests { let mut wtxn = storage.graph_env.write_txn().unwrap(); let vectors = generate_random_vectors(800, 650); for vec in vectors { - let _ = storage - .vectors - .insert:: bool>(&mut wtxn, &vec, None); + let _ = storage.vectors.insert:: bool>( + &mut wtxn, + &vec, + None, + &SimilarityMethod::default(), + ); } wtxn.commit().unwrap(); @@ -1520,7 +1541,13 @@ mod tests { // alpha = 1.0 (BM25 only) let results_bm25_only = storage - .hybrid_search(query, &query_vector[0], 1.0, 10) + .hybrid_search( + query, + &query_vector[0], + 1.0, + 10, + SimilarityMethod::default(), + ) .await; // all should be valid results or acceptable errors diff --git a/helix-db/src/helix_engine/tests/hnsw_tests.rs b/helix-db/src/helix_engine/tests/hnsw_tests.rs index 6c94a0c8..baa5406b 100644 --- a/helix-db/src/helix_engine/tests/hnsw_tests.rs +++ b/helix-db/src/helix_engine/tests/hnsw_tests.rs @@ -1,18 +1,14 @@ // MAKE SURE TO --release -use crate::{ - helix_engine::vector_core::{ - hnsw::HNSW, - vector::HVector, - vector_core::{HNSWConfig, VectorCore}, - }, +use crate::helix_engine::vector_core::{ + hnsw::HNSW, + vector::HVector, + vector_core::{HNSWConfig, VectorCore}, + vector_distance::SimilarityMethod, }; use heed3::{Env, EnvOpenOptions, RoTxn}; -use rand::{ - seq::SliceRandom, - Rng, -}; +use rand::{Rng, seq::SliceRandom}; use std::{ - collections::{HashSet, HashMap}, + collections::{HashMap, HashSet}, sync::{Arc, Mutex}, thread, }; @@ -59,30 +55,27 @@ fn calc_ground_truths( .iter() .filter_map(|base_vec| { query_hvector - .distance_to(base_vec) + .distance_to(base_vec, &SimilarityMethod::default()) .map(|dist| (base_vec.id.clone(), dist)) .ok() }) - .collect(); + .collect(); distances.sort_by(|a, b| { a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal) }); - let top_k_ids: Vec = distances - .into_iter() - .take(k) - .map(|(id, _)| id) - .collect(); + let top_k_ids: Vec = + distances.into_iter().take(k).map(|(id, _)| id).collect(); (query_id, top_k_ids) }) - .collect(); + .collect(); results.lock().unwrap().extend(local_results); }) }) - .collect(); + .collect(); for handle in handles { handle.join().unwrap(); @@ -116,22 +109,14 @@ fn tests_hnsw_config_build() { let env = setup_temp_env(); let mut txn = env.write_txn().unwrap(); - let config = HNSWConfig::new( - Some(32), - Some(256), - Some(256), - ); + let config = HNSWConfig::new(Some(32), Some(256), Some(256)); let index = VectorCore::new(&env, &mut txn, config).unwrap(); assert_eq!(index.config.m, 32); assert_eq!(index.config.ef_construct, 256); assert_eq!(index.config.ef, 256); - let config = HNSWConfig::new( - Some(6969), - Some(6969), - Some(6969), - ); + let config = HNSWConfig::new(Some(6969), Some(6969), Some(6969)); assert_eq!(config.m, 48); assert_eq!(config.ef_construct, 512); assert_eq!(config.ef, 512); @@ -148,7 +133,9 @@ fn test_hnsw_insert() { let vectors = gen_sim_vecs(n_base, dims, 0.8); for data in vectors { - let vec = index.insert::(&mut txn, &data, None).unwrap(); + let vec = index + .insert::(&mut txn, &data, None, &SimilarityMethod::default()) + .unwrap(); assert_eq!(vec.data, data); assert!(vec.properties.is_none()); } @@ -169,7 +156,11 @@ fn test_get_vector() { let mut all_vectors: Vec = Vec::with_capacity(n_base); for data in vectors { - all_vectors.push(index.insert::(&mut txn, &data, None).unwrap()); + all_vectors.push( + index + .insert::(&mut txn, &data, None, &SimilarityMethod::default()) + .unwrap(), + ); } for inserted_vec in all_vectors { @@ -211,7 +202,11 @@ fn test_hnsw_search() { let mut base_all_vectors: Vec = Vec::new(); for data in base_vectors.iter() { - base_all_vectors.push(index.insert::(&mut txn, &data, None).unwrap()); + base_all_vectors.push( + index + .insert::(&mut txn, &data, None, &SimilarityMethod::default()) + .unwrap(), + ); } txn.commit().unwrap(); @@ -225,7 +220,17 @@ fn test_hnsw_search() { let mut total_recall = 0.0; let mut total_precision = 0.0; for (qid, query) in query_vectors { - let results = index.search::(&txn, &query, k, "vector", None, false).unwrap(); + let results = index + .search::( + &txn, + &query, + k, + "vector", + None, + false, + &SimilarityMethod::default(), + ) + .unwrap(); let result_indices = results .into_iter() @@ -256,18 +261,14 @@ fn test_hnsw_search() { total_recall, total_precision ); assert!(total_recall >= 0.8, "recall not high enough!"); - assert!(total_precision>= 0.8, "precision not high enough!"); + assert!(total_precision >= 0.8, "precision not high enough!"); } #[test] -fn test_hnsw_search_property_ordering() { -} +fn test_hnsw_search_property_ordering() {} #[test] -fn test_hnsw_search_filter_ordering() { -} +fn test_hnsw_search_filter_ordering() {} #[test] -fn test_hnsw_delete() { -} - +fn test_hnsw_delete() {} diff --git a/helix-db/src/helix_engine/tests/traversal_tests/drop_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/drop_tests.rs index 2ccde09f..25db39d1 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/drop_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/drop_tests.rs @@ -21,7 +21,7 @@ use crate::{ }, traversal_value::{Traversable, TraversalValue}, }, - vector_core::vector::HVector, + vector_core::{vector::HVector, vector_distance::SimilarityMethod}, }, props, utils::filterable::Filterable, @@ -362,13 +362,19 @@ fn test_vector_deletion_in_existing_graph() { &[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "vector", None, + &SimilarityMethod::default(), ) .collect_to_obj(); other_vectors.push(other_vector); } let vector = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v:: bool>(&[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "vector", None) + .insert_v:: bool>( + &[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "vector", + None, + &SimilarityMethod::default(), + ) .collect_to_obj(); for other_vector in &other_vectors { diff --git a/helix-db/src/helix_engine/tests/traversal_tests/edge_traversal_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/edge_traversal_tests.rs index 80ca286e..68544f48 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/edge_traversal_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/edge_traversal_tests.rs @@ -20,7 +20,7 @@ use crate::{ }, traversal_value::{Traversable, TraversalValue}, }, - vector_core::vector::HVector, + vector_core::{vector::HVector, vector_distance::SimilarityMethod}, }, props, utils::filterable::Filterable, @@ -408,7 +408,12 @@ fn test_add_e_between_node_and_vector() { .collect_to_obj(); let vector = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v:: bool>(&[1.0, 2.0, 3.0], "vector", None) + .insert_v:: bool>( + &[1.0, 2.0, 3.0], + "vector", + None, + &SimilarityMethod::default(), + ) .collect_to_obj(); let _ = G::new_mut(Arc::clone(&storage), &mut txn) @@ -435,7 +440,13 @@ fn test_add_e_between_node_and_vector() { println!( "vectors: {:?}", G::new(Arc::clone(&storage), &txn) - .search_v:: bool, _>(&[1.0, 2.0, 3.0], 10, "vector", None) + .search_v:: bool, _>( + &[1.0, 2.0, 3.0], + 10, + "vector", + None, + &SimilarityMethod::default() + ) .collect_to::>() ); diff --git a/helix-db/src/helix_engine/tests/traversal_tests/util_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/util_tests.rs index 9598fc61..674e5da6 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/util_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/util_tests.rs @@ -17,7 +17,7 @@ use crate::{ }, traversal_value::Traversable, }, - vector_core::vector::HVector, + vector_core::{vector::HVector, vector_distance::SimilarityMethod}, }, props, }; @@ -212,22 +212,43 @@ fn test_order_vector_by_asc() { type FnTy = fn(&HVector, &RoTxn) -> bool; let vector = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "vector", Some(props! { "age" => 30 })) + .insert_v::( + &[1.0, 2.0, 3.0], + "vector", + Some(props! { "age" => 30 }), + &SimilarityMethod::default(), + ) .collect_to_obj(); let vector2 = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "vector", Some(props! { "age" => 20 })) + .insert_v::( + &[1.0, 2.0, 3.0], + "vector", + Some(props! { "age" => 20 }), + &SimilarityMethod::default(), + ) .collect_to_obj(); let vector3 = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "vector", Some(props! { "age" => 10 })) + .insert_v::( + &[1.0, 2.0, 3.0], + "vector", + Some(props! { "age" => 10 }), + &SimilarityMethod::default(), + ) .collect_to_obj(); txn.commit().unwrap(); let txn = storage.graph_env.read_txn().unwrap(); let traversal = G::new(Arc::clone(&storage), &txn) - .search_v::(&[1.0, 2.0, 3.0], 10, "vector", None) + .search_v::( + &[1.0, 2.0, 3.0], + 10, + "vector", + None, + &SimilarityMethod::default(), + ) .order_by_asc("age") .collect_to::>(); @@ -244,22 +265,43 @@ fn test_order_vector_by_desc() { type FnTy = fn(&HVector, &RoTxn) -> bool; let vector = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "vector", Some(props! { "age" => 30 })) + .insert_v::( + &[1.0, 2.0, 3.0], + "vector", + Some(props! { "age" => 30 }), + &SimilarityMethod::default(), + ) .collect_to_obj(); let vector2 = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "vector", Some(props! { "age" => 20 })) + .insert_v::( + &[1.0, 2.0, 3.0], + "vector", + Some(props! { "age" => 20 }), + &SimilarityMethod::default(), + ) .collect_to_obj(); let vector3 = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v::(&[1.0, 2.0, 3.0], "vector", Some(props! { "age" => 10 })) + .insert_v::( + &[1.0, 2.0, 3.0], + "vector", + Some(props! { "age" => 10 }), + &SimilarityMethod::default(), + ) .collect_to_obj(); txn.commit().unwrap(); let txn = storage.graph_env.read_txn().unwrap(); let traversal = G::new(Arc::clone(&storage), &txn) - .search_v::(&[1.0, 2.0, 3.0], 10, "vector", None) + .search_v::( + &[1.0, 2.0, 3.0], + 10, + "vector", + None, + &SimilarityMethod::default(), + ) .order_by_desc("age") .collect_to::>(); diff --git a/helix-db/src/helix_engine/tests/traversal_tests/vector_traversal_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/vector_traversal_tests.rs index cd8751b3..f5cc95be 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/vector_traversal_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/vector_traversal_tests.rs @@ -23,7 +23,7 @@ use crate::{ }, traversal_value::{Traversable, TraversalValue}, }, - vector_core::vector::HVector, + vector_core::{vector::HVector, vector_distance::SimilarityMethod}, }, props, }; @@ -55,7 +55,12 @@ fn test_from_v() { .collect_to_obj(); let vector = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v:: bool>(&[1.0, 2.0, 3.0], "vector", None) + .insert_v:: bool>( + &[1.0, 2.0, 3.0], + "vector", + None, + &SimilarityMethod::default(), + ) .collect_to_obj(); let _ = G::new_mut(Arc::clone(&storage), &mut txn) @@ -86,7 +91,12 @@ fn test_to_v() { .collect_to_obj(); let vector = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v:: bool>(&[1.0, 2.0, 3.0], "vector", None) + .insert_v:: bool>( + &[1.0, 2.0, 3.0], + "vector", + None, + &SimilarityMethod::default(), + ) .collect_to_obj(); let _ = G::new_mut(Arc::clone(&storage), &mut txn) @@ -127,7 +137,12 @@ fn test_brute_force_vector_search() { let mut vector_ids = Vec::new(); for vector in vectors { let vector_id = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v:: bool>(&vector, "vector", None) + .insert_v:: bool>( + &vector, + "vector", + None, + &SimilarityMethod::default(), + ) .collect_to_obj() .id(); let _ = G::new_mut(Arc::clone(&storage), &mut txn) @@ -151,7 +166,7 @@ fn test_brute_force_vector_search() { .n_from_id(&node.id()) .out_e("embedding") .to_v() - .brute_force_search_v(&[1.0, 2.0, 3.0], 10) + .brute_force_search_v(&[1.0, 2.0, 3.0], 10, &SimilarityMethod::default()) .collect_to::>(); println!("traversal: {traversal:?}"); @@ -213,7 +228,12 @@ fn test_vector_search() { rng.random::(), ]; let _ = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v:: bool>(&random_vector, "vector", None) + .insert_v:: bool>( + &random_vector, + "vector", + None, + &SimilarityMethod::default(), + ) .collect_to_obj(); println!("inserted vector: {i:?}"); i += 1; @@ -234,7 +254,12 @@ fn test_vector_search() { for vector in vectors { let node = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v:: bool>(&vector, "vector", None) + .insert_v:: bool>( + &vector, + "vector", + None, + &SimilarityMethod::default(), + ) .collect_to_obj(); inserted_vectors.push(node.id()); println!("inserted vector: {i:?}"); @@ -250,6 +275,7 @@ fn test_vector_search() { 2000, "vector", None, + &SimilarityMethod::default(), ) .collect_to::>(); // traversal.reverse(); @@ -268,7 +294,12 @@ fn test_delete_vector() { let mut txn = storage.graph_env.write_txn().unwrap(); let vector = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v:: bool>(&[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "vector", None) + .insert_v:: bool>( + &[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "vector", + None, + &SimilarityMethod::default(), + ) .collect_to_obj(); let node = G::new_mut(Arc::clone(&storage), &mut txn) .add_n("person", None, None) @@ -286,6 +317,7 @@ fn test_delete_vector() { 2000, "vector", None, + &SimilarityMethod::default(), ) .collect_to::>(); @@ -302,6 +334,7 @@ fn test_delete_vector() { 2000, "vector", None, + &SimilarityMethod::default(), ) .collect_to::>(), Arc::clone(&storage), @@ -318,6 +351,7 @@ fn test_delete_vector() { 2000, "vector", None, + &SimilarityMethod::default(), ) .collect_to::>(); @@ -352,7 +386,12 @@ fn test_drop_vectors_then_add_them_back() { .collect_to_obj(); let embedding = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v:: bool>(&[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "vector", None) + .insert_v:: bool>( + &[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "vector", + None, + &SimilarityMethod::default(), + ) .collect_to_obj(); let _ = G::new_mut(Arc::clone(&storage), &mut txn) @@ -412,6 +451,7 @@ fn test_drop_vectors_then_add_them_back() { &[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "Entity_Embedding", Some(props! { "name_embedding" => [1.0, 1.0, 1.0, 1.0, 1.0, 1.0].to_vec() }), + &SimilarityMethod::default(), ) .collect_to_obj(); let edge = G::new_mut(Arc::clone(&storage), &mut txn) @@ -434,6 +474,7 @@ fn test_drop_vectors_then_add_them_back() { 2000, "Entity_Embedding", None, + &SimilarityMethod::default(), ) .collect_to::>(); assert_eq!(traversal.len(), 1); @@ -450,7 +491,12 @@ fn test_drop_vectors_then_add_them_back() { let mut txn = storage.graph_env.write_txn().unwrap(); let embedding = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v:: bool>(&[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "vector", None) + .insert_v:: bool>( + &[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], + "vector", + None, + &SimilarityMethod::default(), + ) .collect_to_obj(); let _ = G::new_mut(Arc::clone(&storage), &mut txn) @@ -490,6 +536,7 @@ fn test_drop_vectors_then_add_them_back() { &[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "Entity_Embedding", Some(props! { "name_embedding" => [1.0, 1.0, 1.0, 1.0, 1.0, 1.0].to_vec() }), + &SimilarityMethod::default(), ) .collect_to_obj(); let edge = G::new_mut(Arc::clone(&storage), &mut txn) @@ -512,6 +559,7 @@ fn test_drop_vectors_then_add_them_back() { 2000, "Entity_Embedding", None, + &SimilarityMethod::default(), ) .collect_to::>(); assert_eq!(traversal.len(), 1); diff --git a/helix-db/src/helix_engine/tests/vector_tests.rs b/helix-db/src/helix_engine/tests/vector_tests.rs index 057047ea..b8874ee1 100644 --- a/helix-db/src/helix_engine/tests/vector_tests.rs +++ b/helix-db/src/helix_engine/tests/vector_tests.rs @@ -1,6 +1,6 @@ -use crate::helix_engine::vector_core::vector_distance::{MAX_DISTANCE, MIN_DISTANCE, ORTHOGONAL}; - use crate::helix_engine::vector_core::vector::HVector; +use crate::helix_engine::vector_core::vector_distance::SimilarityMethod; +use crate::helix_engine::vector_core::vector_distance::{MAX_DISTANCE, MIN_DISTANCE, ORTHOGONAL}; #[test] fn test_hvector_new() { @@ -20,7 +20,7 @@ fn test_hvector_from_slice() { fn test_hvector_distance_orthogonal() { let v1 = HVector::new(vec![1.0, 0.0]); let v2 = HVector::new(vec![0.0, 1.0]); - let distance = v1.distance_to(&v2).unwrap(); + let distance = v1.distance_to(&v2, &SimilarityMethod::default()).unwrap(); assert!(distance == ORTHOGONAL); } @@ -28,7 +28,7 @@ fn test_hvector_distance_orthogonal() { fn test_hvector_distance_min() { let v1 = HVector::new(vec![1.0, 2.0, 3.0]); let v2 = HVector::new(vec![1.0, 2.0, 3.0]); - let distance = v2.distance_to(&v1).unwrap(); + let distance = v2.distance_to(&v1, &SimilarityMethod::default()).unwrap(); assert!(distance.abs() == MIN_DISTANCE); } @@ -36,7 +36,7 @@ fn test_hvector_distance_min() { fn test_hvector_distance_max() { let v1 = HVector::new(vec![0.0, 0.0]); let v2 = HVector::new(vec![3.0, 4.0]); - let distance = v1.distance_to(&v2).unwrap(); + let distance = v1.distance_to(&v2, &SimilarityMethod::default()).unwrap(); assert!(distance == MAX_DISTANCE); } @@ -69,7 +69,7 @@ fn test_hvector_is_empty() { fn test_hvector_distance_different_dimensions() { let v1 = HVector::new(vec![1.0, 2.0, 3.0]); let v2 = HVector::new(vec![1.0, 2.0, 3.0, 4.0]); - let distance = v1.distance_to(&v2).unwrap(); + let distance = v1.distance_to(&v2, &SimilarityMethod::default()).unwrap(); println!("distance: {distance}"); assert!(distance.is_finite()); } @@ -78,7 +78,7 @@ fn test_hvector_distance_different_dimensions() { fn test_hvector_large_values() { let v1 = HVector::new(vec![1e6, 2e6]); let v2 = HVector::new(vec![1e6, 2e6]); - let distance = v1.distance_to(&v2).unwrap(); + let distance = v1.distance_to(&v2, &SimilarityMethod::default()).unwrap(); assert!(distance.abs() < 1e-10); } @@ -86,7 +86,7 @@ fn test_hvector_large_values() { fn test_hvector_negative_values() { let v1 = HVector::new(vec![-1.0, -2.0]); let v2 = HVector::new(vec![1.0, 2.0]); - let distance = v1.distance_to(&v2).unwrap(); + let distance = v1.distance_to(&v2, &SimilarityMethod::default()).unwrap(); // used round to avoid floating point precision issues assert!(distance.round() == MAX_DISTANCE); } @@ -95,6 +95,6 @@ fn test_hvector_negative_values() { fn test_hvector_cosine_similarity() { let v1 = HVector::new(vec![1.0, 2.0, 3.0]); let v2 = HVector::new(vec![4.0, 5.0, 6.0]); - let similarity = v1.distance_to(&v2).unwrap(); + let similarity = v1.distance_to(&v2, &SimilarityMethod::default()).unwrap(); assert!(similarity == 1.0 - 0.9746318461970762); } diff --git a/helix-db/src/helix_engine/traversal_core/ops/vectors/brute_force_search.rs b/helix-db/src/helix_engine/traversal_core/ops/vectors/brute_force_search.rs index a9587f4c..6d353647 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/vectors/brute_force_search.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/vectors/brute_force_search.rs @@ -5,7 +5,10 @@ use crate::{ helix_engine::{ traversal_core::{traversal_iter::RoTraversalIterator, traversal_value::TraversalValue}, types::GraphError, - vector_core::vector_distance::cosine_similarity, + vector_core::{ + vector::HVector, + vector_distance::{DistanceCalc, SimilarityMethod}, + }, }, protocol::value::Value, utils::filterable::Filterable, @@ -34,6 +37,7 @@ pub trait BruteForceSearchVAdapter<'a>: self, query: &[f64], k: K, + method: &SimilarityMethod, ) -> RoTraversalIterator<'a, impl Iterator>> where K: TryInto, @@ -47,6 +51,7 @@ impl<'a, I: Iterator> + 'a> BruteForce self, query: &[f64], k: K, + method: &SimilarityMethod, ) -> RoTraversalIterator<'a, impl Iterator>> where K: TryInto, @@ -59,7 +64,7 @@ impl<'a, I: Iterator> + 'a> BruteForce .inner .filter_map(|v| match v { Ok(TraversalValue::Vector(mut v)) => { - let d = cosine_similarity(v.get_data(), query).unwrap(); + let d = HVector::distance(v.get_data(), query, method).unwrap(); v.set_distance(d); Some(v) } diff --git a/helix-db/src/helix_engine/traversal_core/ops/vectors/insert.rs b/helix-db/src/helix_engine/traversal_core/ops/vectors/insert.rs index 383cbc47..a614b777 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/vectors/insert.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/vectors/insert.rs @@ -4,7 +4,7 @@ use crate::{ helix_engine::{ traversal_core::{traversal_iter::RwTraversalIterator, traversal_value::TraversalValue}, types::GraphError, - vector_core::{hnsw::HNSW, vector::HVector}, + vector_core::{hnsw::HNSW, vector::HVector, vector_distance::SimilarityMethod}, }, protocol::value::Value, }; @@ -27,6 +27,7 @@ pub trait InsertVAdapter<'a, 'b>: Iterator>, + method: &SimilarityMethod, ) -> RwTraversalIterator<'a, 'b, impl Iterator>> where F: Fn(&HVector, &RoTxn) -> bool; @@ -40,6 +41,7 @@ impl<'a, 'b, I: Iterator>> InsertVAdap query: &[f64], label: &str, fields: Option>, + method: &SimilarityMethod, ) -> RwTraversalIterator<'a, 'b, impl Iterator>> where F: Fn(&HVector, &RoTxn) -> bool, @@ -55,7 +57,7 @@ impl<'a, 'b, I: Iterator>> InsertVAdap (String::from("is_deleted"), Value::Boolean(false)), ]), }; - let vector = self.storage.vectors.insert::(self.txn, query, fields); + let vector = self.storage.vectors.insert::(self.txn, query, fields, method); let result = match vector { Ok(vector) => Ok(TraversalValue::Vector(vector)), diff --git a/helix-db/src/helix_engine/traversal_core/ops/vectors/search.rs b/helix-db/src/helix_engine/traversal_core/ops/vectors/search.rs index fee2fb22..8b57bf48 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/vectors/search.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/vectors/search.rs @@ -3,7 +3,7 @@ use heed3::RoTxn; use crate::helix_engine::{ traversal_core::{traversal_iter::RoTraversalIterator, traversal_value::TraversalValue}, types::{GraphError, VectorError}, - vector_core::{hnsw::HNSW, vector::HVector}, + vector_core::{hnsw::HNSW, vector::HVector, vector_distance::SimilarityMethod}, }; use helix_macros::debug_trace; use std::iter::once; @@ -29,6 +29,7 @@ pub trait SearchVAdapter<'a>: Iterator k: K, label: &str, filter: Option<&[F]>, + method: &SimilarityMethod, ) -> RoTraversalIterator<'a, impl Iterator>> where F: Fn(&HVector, &RoTxn) -> bool, @@ -45,16 +46,22 @@ impl<'a, I: Iterator> + 'a> SearchVAda k: K, label: &str, filter: Option<&[F]>, + method: &SimilarityMethod, ) -> RoTraversalIterator<'a, impl Iterator>> where F: Fn(&HVector, &RoTxn) -> bool, K: TryInto, K::Error: std::fmt::Debug, { - let vectors = - self.storage - .vectors - .search(self.txn, query, k.try_into().unwrap(), label, filter, false); + let vectors = self.storage.vectors.search( + self.txn, + query, + k.try_into().unwrap(), + label, + filter, + false, + method, + ); let iter = match vectors { Ok(vectors) => vectors @@ -104,4 +111,3 @@ impl<'a, I: Iterator> + 'a> SearchVAda } } } - diff --git a/helix-db/src/helix_engine/vector_core/hnsw.rs b/helix-db/src/helix_engine/vector_core/hnsw.rs index 3b89bbfe..51d6807e 100644 --- a/helix-db/src/helix_engine/vector_core/hnsw.rs +++ b/helix-db/src/helix_engine/vector_core/hnsw.rs @@ -1,9 +1,9 @@ -use crate::{helix_engine::types::VectorError, protocol::value::Value}; use crate::helix_engine::vector_core::vector::HVector; +use crate::helix_engine::vector_core::vector_distance::SimilarityMethod; +use crate::{helix_engine::types::VectorError, protocol::value::Value}; use heed3::{RoTxn, RwTxn}; -pub trait HNSW -{ +pub trait HNSW { /// Search for the k nearest neighbors of a query vector /// /// # Arguments @@ -23,6 +23,7 @@ pub trait HNSW label: &str, filter: Option<&[F]>, should_trickle: bool, + method: &SimilarityMethod, ) -> Result, VectorError> where F: Fn(&HVector, &RoTxn) -> bool; @@ -42,6 +43,7 @@ pub trait HNSW txn: &mut RwTxn, data: &[f64], fields: Option>, + method: &SimilarityMethod, ) -> Result where F: Fn(&HVector, &RoTxn) -> bool; @@ -68,11 +70,7 @@ pub trait HNSW /// /// * `txn` - The transaction to use /// * `id` - The id of the vector - fn delete( - &self, - txn: &mut RwTxn, - id: u128, - ) -> Result<(), VectorError>; + fn delete(&self, txn: &mut RwTxn, id: u128) -> Result<(), VectorError>; /// Get specific vector based on id and level /// @@ -94,4 +92,3 @@ pub trait HNSW with_data: bool, ) -> Result; } - diff --git a/helix-db/src/helix_engine/vector_core/vector.rs b/helix-db/src/helix_engine/vector_core/vector.rs index 1b01ad1b..6447deb2 100644 --- a/helix-db/src/helix_engine/vector_core/vector.rs +++ b/helix-db/src/helix_engine/vector_core/vector.rs @@ -1,7 +1,7 @@ use crate::{ helix_engine::{ types::{GraphError, VectorError}, - vector_core::vector_distance::DistanceCalc, + vector_core::vector_distance::{DistanceCalc, SimilarityMethod}, }, protocol::{return_values::ReturnValue, value::Value}, utils::{ @@ -156,8 +156,12 @@ impl HVector { } #[inline(always)] - pub fn distance_to(&self, other: &HVector) -> Result { - HVector::distance(self, other) + pub fn distance_to( + &self, + other: &HVector, + method: &SimilarityMethod, + ) -> Result { + HVector::distance(&self.data, &other.data, method) } #[inline(always)] @@ -270,4 +274,3 @@ impl Filterable for HVector { unreachable!() } } - diff --git a/helix-db/src/helix_engine/vector_core/vector_core.rs b/helix-db/src/helix_engine/vector_core/vector_core.rs index 83fe8603..c1673376 100644 --- a/helix-db/src/helix_engine/vector_core/vector_core.rs +++ b/helix-db/src/helix_engine/vector_core/vector_core.rs @@ -6,6 +6,7 @@ use crate::{ hnsw::HNSW, utils::{Candidate, HeapOps, VectorFilter}, vector::HVector, + vector_distance::SimilarityMethod, }, }, protocol::value::Value, @@ -248,6 +249,7 @@ impl VectorCore { level: usize, should_extend: bool, filter: Option<&[F]>, + method: &SimilarityMethod, ) -> Result, VectorError> where F: Fn(&HVector, &RoTxn) -> bool, @@ -266,7 +268,7 @@ impl VectorCore { continue; } - neighbor.set_distance(neighbor.distance_to(query)?); + neighbor.set_distance(neighbor.distance_to(query, method)?); /* let passes_filters = match filter { @@ -297,6 +299,7 @@ impl VectorCore { ef: usize, level: usize, filter: Option<&[F]>, + method: &SimilarityMethod, ) -> Result, VectorError> where F: Fn(&HVector, &RoTxn) -> bool, @@ -305,7 +308,7 @@ impl VectorCore { let mut candidates: BinaryHeap = BinaryHeap::new(); let mut results: BinaryHeap = BinaryHeap::new(); - entry_point.set_distance(entry_point.distance_to(query)?); + entry_point.set_distance(entry_point.distance_to(query, method)?); candidates.push(Candidate { id: entry_point.get_id(), distance: entry_point.get_distance(), @@ -332,7 +335,7 @@ impl VectorCore { .into_iter() .filter(|neighbor| visited.insert(neighbor.get_id())) .filter_map(|mut neighbor| { - let distance = neighbor.distance_to(query).ok()?; + let distance = neighbor.distance_to(query, method).ok()?; if max_distance.is_none_or(|max| distance < max) { neighbor.set_distance(distance); @@ -403,6 +406,7 @@ impl HNSW for VectorCore { label: &str, filter: Option<&[F]>, should_trickle: bool, + method: &SimilarityMethod, ) -> Result, VectorError> where F: Fn(&HVector, &RoTxn) -> bool, @@ -425,6 +429,7 @@ impl HNSW for VectorCore { true => filter, false => None, }, + method, )?; if let Some(closest) = nearest.pop() { @@ -442,6 +447,7 @@ impl HNSW for VectorCore { true => filter, false => None, }, + method, )?; let results = @@ -456,6 +462,7 @@ impl HNSW for VectorCore { txn: &mut RwTxn, data: &[f64], fields: Option>, + method: &SimilarityMethod, ) -> Result where F: Fn(&HVector, &RoTxn) -> bool, @@ -489,7 +496,8 @@ impl HNSW for VectorCore { let l = entry_point.get_level(); let mut curr_ep = entry_point; for level in (new_level + 1..=l).rev() { - let nearest = self.search_level::(txn, &query, &mut curr_ep, 1, level, None)?; + let nearest = + self.search_level::(txn, &query, &mut curr_ep, 1, level, None, method)?; curr_ep = nearest .peek() .ok_or(VectorError::VectorCoreError( @@ -506,17 +514,19 @@ impl HNSW for VectorCore { self.config.ef_construct, level, None, + method, )?; curr_ep = nearest.peek().unwrap().clone(); - let neighbors = self.select_neighbors::(txn, &query, nearest, level, true, None)?; + let neighbors = + self.select_neighbors::(txn, &query, nearest, level, true, None, method)?; self.set_neighbours(txn, query.get_id(), &neighbors, level)?; for e in neighbors { let id = e.get_id(); let e_conns = BinaryHeap::from(self.get_neighbors::(txn, id, level, None)?); let e_new_conn = - self.select_neighbors::(txn, &query, e_conns, level, true, None)?; + self.select_neighbors::(txn, &query, e_conns, level, true, None, method)?; self.set_neighbours(txn, id, &e_new_conn, level)?; } } diff --git a/helix-db/src/helix_engine/vector_core/vector_distance.rs b/helix-db/src/helix_engine/vector_core/vector_distance.rs index b24fc6cb..08c23ed8 100644 --- a/helix-db/src/helix_engine/vector_core/vector_distance.rs +++ b/helix-db/src/helix_engine/vector_core/vector_distance.rs @@ -1,11 +1,25 @@ +use serde::Deserialize; + use crate::helix_engine::{types::VectorError, vector_core::vector::HVector}; pub const MAX_DISTANCE: f64 = 2.0; pub const ORTHOGONAL: f64 = 1.0; pub const MIN_DISTANCE: f64 = 0.0; +#[derive(Default, Debug, Deserialize, Clone)] +pub enum SimilarityMethod { + #[default] + #[serde(rename = "cosine_distance")] + CosineDistance, + #[serde(rename = "cosine_similarity")] + CosineSimilarity, + #[serde(rename = "euclidean_distance")] + EuclideanDistance, +} + pub trait DistanceCalc { - fn distance(from: &HVector, to: &HVector) -> Result; + fn distance(from: &[f64], to: &[f64], method: &SimilarityMethod) -> Result; + } impl DistanceCalc for HVector { /// Calculates the distance between two vectors. @@ -16,15 +30,18 @@ impl DistanceCalc for HVector { /// - 0.0 (orthogonal) → Distance 1.0 /// - -1.0 (most dissimilar) → Distance 2.0 (furthest) #[inline(always)] - #[cfg(feature = "cosine")] - fn distance(from: &HVector, to: &HVector) -> Result { - cosine_similarity(&from.data, &to.data).map(|sim| 1.0 - sim) + fn distance(from: &[f64], to: &[f64], method: &SimilarityMethod) -> Result { + match method { + SimilarityMethod::CosineDistance => cosine_similarity(&from, &to).map(|sim| 1.0 - sim), + SimilarityMethod::CosineSimilarity => cosine_similarity(&from, &to), + SimilarityMethod::EuclideanDistance => todo!(), + } + } } #[inline] -#[cfg(feature = "cosine")] pub fn cosine_similarity(from: &[f64], to: &[f64]) -> Result { let len = from.len(); let other_len = to.len(); diff --git a/helix-db/src/helix_gateway/mcp/tools.rs b/helix-db/src/helix_gateway/mcp/tools.rs index c867af48..7fa0ee32 100644 --- a/helix-db/src/helix_gateway/mcp/tools.rs +++ b/helix-db/src/helix_gateway/mcp/tools.rs @@ -1,24 +1,34 @@ use crate::{ - debug_println, helix_engine::{ + debug_println, + helix_engine::{ storage_core::HelixGraphStorage, traversal_core::{ ops::{ - bm25::search_bm25::SearchBM25Adapter, g::G, in_::{ + bm25::search_bm25::SearchBM25Adapter, + g::G, + in_::{ in_::{InAdapter, InNodesIterator}, in_e::{InEdgesAdapter, InEdgesIterator}, - }, out::{ + }, + out::{ out::{OutAdapter, OutNodesIterator}, out_e::{OutEdgesAdapter, OutEdgesIterator}, - }, source::{add_e::EdgeType, e_from_type::EFromType, n_from_type::NFromType}, util::order::OrderByAdapter, vectors::{brute_force_search::BruteForceSearchVAdapter, search::SearchVAdapter} + }, + source::{add_e::EdgeType, e_from_type::EFromType, n_from_type::NFromType}, + util::order::OrderByAdapter, + vectors::{brute_force_search::BruteForceSearchVAdapter, search::SearchVAdapter}, }, traversal_value::{Traversable, TraversalValue}, }, types::GraphError, - vector_core::vector::HVector, - }, helix_gateway::{ - embedding_providers::embedding_providers::{get_embedding_model, EmbeddingModel}, + vector_core::{vector::HVector, vector_distance::SimilarityMethod}, + }, + helix_gateway::{ + embedding_providers::embedding_providers::{EmbeddingModel, get_embedding_model}, mcp::mcp::{MCPConnection, MCPHandler, MCPHandlerSubmission, MCPToolInput, McpBackend}, - }, protocol::{response::Response, return_values::ReturnValue, value::Value}, utils::label_hash::hash_label + }, + protocol::{response::Response, return_values::ReturnValue, value::Value}, + utils::label_hash::hash_label, }; use heed3::RoTxn; use helix_macros::{mcp_handler, tool_calls}; @@ -71,7 +81,6 @@ pub enum Order { Desc, } - #[derive(Debug, Deserialize)] #[serde(rename_all = "snake_case")] pub struct FilterProperties { @@ -190,6 +199,7 @@ pub(super) trait McpTools<'a> { query: String, label: String, k: Option, + method: Option, ) -> Result, GraphError>; fn search_vector( @@ -199,6 +209,7 @@ pub(super) trait McpTools<'a> { vector: Vec, k: usize, min_score: Option, + method: Option, ) -> Result, GraphError>; fn order_by( @@ -480,6 +491,7 @@ impl<'a> McpTools<'a> for McpBackend { query: String, label: String, k: Option, + method: Option, ) -> Result, GraphError> { let db = Arc::clone(&self.db); @@ -488,7 +500,13 @@ impl<'a> McpTools<'a> for McpBackend { let embedding = result?; let res = G::new(db, txn) - .search_v:: bool, _>(&embedding, k.unwrap_or(5), &label, None) + .search_v:: bool, _>( + &embedding, + k.unwrap_or(5), + &label, + None, + &method.unwrap_or_default(), + ) .collect_to::>(); debug_println!("result: {res:?}"); @@ -502,13 +520,14 @@ impl<'a> McpTools<'a> for McpBackend { vector: Vec, k: usize, min_score: Option, + method: Option, ) -> Result, GraphError> { let db = Arc::clone(&self.db); let items = connection.iter.clone().collect::>(); let mut res = G::new_from(db, txn, items) - .brute_force_search_v(&vector, k) + .brute_force_search_v(&vector, k, &method.unwrap_or_default()) .collect_to::>(); if let Some(min_score) = min_score { @@ -536,10 +555,13 @@ impl<'a> McpTools<'a> for McpBackend { let iter = connection.iter.clone().collect::>(); - let res = match order { - Order::Asc => G::new_from(db, txn, iter).order_by_asc(&properties).collect_to::>(), - Order::Desc => G::new_from(db, txn, iter).order_by_desc(&properties).collect_to::>(), + Order::Asc => G::new_from(db, txn, iter) + .order_by_asc(&properties) + .collect_to::>(), + Order::Desc => G::new_from(db, txn, iter) + .order_by_desc(&properties) + .collect_to::>(), }; debug_println!("result: {res:?}"); diff --git a/helix-db/src/helix_gateway/mcp/tools_tests.rs b/helix-db/src/helix_gateway/mcp/tools_tests.rs index 2c17b2f6..ef009602 100644 --- a/helix-db/src/helix_gateway/mcp/tools_tests.rs +++ b/helix-db/src/helix_gateway/mcp/tools_tests.rs @@ -5,7 +5,7 @@ use tempfile::TempDir; use crate::{ helix_engine::{ - storage_core::{version_info::VersionInfo}, + storage_core::version_info::VersionInfo, traversal_core::{ HelixGraphEngine, HelixGraphEngineOpts, config::Config, @@ -19,7 +19,7 @@ use crate::{ }, traversal_value::{Traversable, TraversalValue}, }, - vector_core::vector::HVector, + vector_core::{vector::HVector, vector_distance::SimilarityMethod}, }, helix_gateway::mcp::{mcp::MCPConnection, tools::McpTools}, }; @@ -66,7 +66,6 @@ fn test_mcp_tool_search_vector_text() {} use rand::prelude::SliceRandom; - #[test] fn test_mcp_tool_search_vector() { let (engine, _temp_dir) = setup_test_db(); @@ -94,7 +93,12 @@ fn test_mcp_tool_search_vector() { for vector in vectors { let vector = G::new_mut(Arc::clone(&engine.storage), &mut txn) - .insert_v:: bool>(&vector, "vector", None) + .insert_v:: bool>( + &vector, + "vector", + None, + &crate::helix_engine::vector_core::vector_distance::SimilarityMethod::default(), + ) .collect_to_obj(); let _ = G::new_mut(Arc::clone(&engine.storage), &mut txn) @@ -127,7 +131,14 @@ fn test_mcp_tool_search_vector() { // brute force searches for vectors let res = mcp_backend - .search_vector(&txn, &mcp_connection, vec![1.0, 1.0, 1.0], 10, None) + .search_vector( + &txn, + &mcp_connection, + vec![1.0, 1.0, 1.0], + 10, + None, + Some(SimilarityMethod::default()), + ) .unwrap(); // checks that the first vector is correct From c474f7da0ee85e53dc79cb282911a38fbabbcbf6 Mon Sep 17 00:00:00 2001 From: xav-db Date: Sat, 13 Sep 2025 21:28:03 +0100 Subject: [PATCH 2/9] implementing euclidean distance --- .../vector_core/vector_distance.rs | 23 ++++++++++++++----- 1 file changed, 17 insertions(+), 6 deletions(-) diff --git a/helix-db/src/helix_engine/vector_core/vector_distance.rs b/helix-db/src/helix_engine/vector_core/vector_distance.rs index 08c23ed8..9b27ad9b 100644 --- a/helix-db/src/helix_engine/vector_core/vector_distance.rs +++ b/helix-db/src/helix_engine/vector_core/vector_distance.rs @@ -19,7 +19,6 @@ pub enum SimilarityMethod { pub trait DistanceCalc { fn distance(from: &[f64], to: &[f64], method: &SimilarityMethod) -> Result; - } impl DistanceCalc for HVector { /// Calculates the distance between two vectors. @@ -32,15 +31,13 @@ impl DistanceCalc for HVector { #[inline(always)] fn distance(from: &[f64], to: &[f64], method: &SimilarityMethod) -> Result { match method { - SimilarityMethod::CosineDistance => cosine_similarity(&from, &to).map(|sim| 1.0 - sim), - SimilarityMethod::CosineSimilarity => cosine_similarity(&from, &to), - SimilarityMethod::EuclideanDistance => todo!(), + SimilarityMethod::CosineDistance => cosine_similarity(&from, &to).map(|sim| 1.0 - sim), + SimilarityMethod::CosineSimilarity => cosine_similarity(&from, &to), + SimilarityMethod::EuclideanDistance => euclidean_distance(&from, &to), } - } } - #[inline] pub fn cosine_similarity(from: &[f64], to: &[f64]) -> Result { let len = from.len(); @@ -173,3 +170,17 @@ unsafe fn horizontal_sum_pd(__v: __m256d) -> f64 { // Extract the low 64 bits as a scalar _mm_cvtsd_f64(sum) } + +pub fn euclidean_distance(from: &[f64], to: &[f64]) -> Result { + if from.len() != to.len() { + return Err(VectorError::InvalidVectorLength); + } + Ok(from + .iter() + .zip(to.iter()) + .map(|(x, y)| { + let c = x - y; + c * c + }) + .sum()) +} From 85b43fef8d145dde4a37aeb0805de80091e38981 Mon Sep 17 00:00:00 2001 From: xav-db Date: Sun, 14 Sep 2025 20:43:56 -0400 Subject: [PATCH 3/9] rejigging to make it done via config instead of per query --- helix-db/benches/hnsw_benches.rs | 41 +++++------ helix-db/src/helix_engine/bm25/bm25.rs | 5 +- helix-db/src/helix_engine/bm25/bm25_tests.rs | 53 ++++----------- helix-db/src/helix_engine/storage_core/mod.rs | 9 +-- helix-db/src/helix_engine/tests/hnsw_tests.rs | 17 +++-- .../tests/traversal_tests/drop_tests.rs | 5 +- .../traversal_tests/edge_traversal_tests.rs | 4 +- .../tests/traversal_tests/util_tests.rs | 12 +--- .../traversal_tests/vector_traversal_tests.rs | 68 +++---------------- .../src/helix_engine/tests/vector_tests.rs | 2 +- .../src/helix_engine/traversal_core/config.rs | 57 ++++++++++++---- .../ops/vectors/brute_force_search.rs | 6 +- .../traversal_core/ops/vectors/insert.rs | 6 +- .../traversal_core/ops/vectors/search.rs | 5 +- helix-db/src/helix_engine/vector_core/hnsw.rs | 3 - .../src/helix_engine/vector_core/vector.rs | 3 +- .../helix_engine/vector_core/vector_core.rs | 31 ++++----- .../vector_core/vector_distance.rs | 17 +---- helix-db/src/helix_gateway/mcp/tools.rs | 9 +-- helix-db/src/helix_gateway/mcp/tools_tests.rs | 18 +---- 20 files changed, 133 insertions(+), 238 deletions(-) diff --git a/helix-db/benches/hnsw_benches.rs b/helix-db/benches/hnsw_benches.rs index 693fca72..05b2b938 100644 --- a/helix-db/benches/hnsw_benches.rs +++ b/helix-db/benches/hnsw_benches.rs @@ -3,20 +3,17 @@ mod tests { use heed3::{Env, EnvOpenOptions, RoTxn}; use helix_db::{ - helix_engine::vector_core::{ + helix_engine::{traversal_core::config::SimilarityMethod, vector_core::{ hnsw::HNSW, vector::HVector, - vector_core::{HNSWConfig, VectorCore}, vector_distance::SimilarityMethod, - }, + vector_core::{HNSWConfig, VectorCore}, + }}, utils::tqdm::tqdm, }; use polars::prelude::*; - use rand::{ - prelude::SliceRandom, - Rng, - }; + use rand::{Rng, prelude::SliceRandom}; use std::{ - collections::{HashSet, HashMap}, + collections::{HashMap, HashSet}, fs::{self, File}, sync::{Arc, Mutex}, thread, @@ -88,26 +85,23 @@ mod tests { .map(|dist| (base_vec.id.clone(), dist)) .ok() }) - .collect(); + .collect(); distances.sort_by(|a, b| { a.1.partial_cmp(&b.1).unwrap_or(std::cmp::Ordering::Equal) }); - let top_k_ids: Vec = distances - .into_iter() - .take(k) - .map(|(id, _)| id) - .collect(); + let top_k_ids: Vec = + distances.into_iter().take(k).map(|(id, _)| id).collect(); (query_id, top_k_ids) }) - .collect(); + .collect(); results.lock().unwrap().extend(local_results); }) }) - .collect(); + .collect(); for handle in handles { handle.join().unwrap(); @@ -312,14 +306,20 @@ mod tests { let env = setup_temp_env(); let mut txn = env.write_txn().unwrap(); - let index = VectorCore::new(&env, &mut txn, HNSWConfig::new(None, None, None)).unwrap(); + let index = VectorCore::new( + &env, + &mut txn, + HNSWConfig::new(None, None, None), + Some(SimilarityMethod::default()), + ) + .unwrap(); let mut total_insertion_time = std::time::Duration::from_secs(0); let mut base_all_vectors: Vec = Vec::new(); let over_all_time = Instant::now(); for (i, data) in base_vectors.iter().enumerate() { let start_time = Instant::now(); - let vec = index.insert::(&mut txn, &data, None, &SimilarityMethod::default()).unwrap(); + let vec = index.insert::(&mut txn, &data, None).unwrap(); let time = start_time.elapsed(); base_all_vectors.push(vec); //println!("{} => inserting in {} ms", i, time.as_millis()); @@ -354,7 +354,9 @@ mod tests { let mut total_search_time = std::time::Duration::from_secs(0); for (qid, query) in query_vectors.iter() { let start_time = Instant::now(); - let results = index.search::(&txn, query, k, "vector", None, false, &SimilarityMethod::default()).unwrap(); + let results = index + .search::(&txn, query, k, "vector", None, false) + .unwrap(); let search_duration = start_time.elapsed(); total_search_time += search_duration; @@ -400,4 +402,3 @@ mod tests { } // TODO: memory benchmark (only the hnsw index ofc) - diff --git a/helix-db/src/helix_engine/bm25/bm25.rs b/helix-db/src/helix_engine/bm25/bm25.rs index 047c7488..d4ec1012 100644 --- a/helix-db/src/helix_engine/bm25/bm25.rs +++ b/helix-db/src/helix_engine/bm25/bm25.rs @@ -3,7 +3,7 @@ use crate::{ helix_engine::{ storage_core::HelixGraphStorage, types::GraphError, - vector_core::{hnsw::HNSW, vector::HVector, vector_distance::SimilarityMethod}, + vector_core::{hnsw::HNSW, vector::HVector}, }, protocol::value::Value, }; @@ -350,7 +350,6 @@ pub trait HybridSearch { query_vector: &[f64], alpha: f32, limit: usize, - similarity_method: SimilarityMethod, ) -> impl std::future::Future, GraphError>> + Send; } @@ -361,7 +360,6 @@ impl HybridSearch for HelixGraphStorage { query_vector: &[f64], alpha: f32, limit: usize, - similarity_method: SimilarityMethod, ) -> Result, GraphError> { let query_owned = query.to_string(); let query_vector_owned = query_vector.to_vec(); @@ -386,7 +384,6 @@ impl HybridSearch for HelixGraphStorage { "vector", None, false, - &similarity_method, )?; Ok(Some(results)) }); diff --git a/helix-db/src/helix_engine/bm25/bm25_tests.rs b/helix-db/src/helix_engine/bm25/bm25_tests.rs index 80c45fef..7e7b0972 100644 --- a/helix-db/src/helix_engine/bm25/bm25_tests.rs +++ b/helix-db/src/helix_engine/bm25/bm25_tests.rs @@ -7,7 +7,7 @@ mod tests { }, storage_core::{HelixGraphStorage, version_info::VersionInfo}, traversal_core::config::Config, - vector_core::{hnsw::HNSW, vector::HVector, vector_distance::SimilarityMethod}, + vector_core::{hnsw::HNSW, vector::HVector}, }, protocol::value::Value, }; @@ -1424,12 +1424,9 @@ mod tests { let mut wtxn = storage.graph_env.write_txn().unwrap(); let vectors = generate_random_vectors(800, 650); for vec in vectors { - let _ = storage.vectors.insert:: bool>( - &mut wtxn, - &vec, - None, - &SimilarityMethod::default(), - ); + let _ = storage + .vectors + .insert:: bool>(&mut wtxn, &vec, None); } wtxn.commit().unwrap(); @@ -1439,13 +1436,7 @@ mod tests { let limit = 10; let result = storage - .hybrid_search( - query, - &query_vector[0], - alpha, - limit, - SimilarityMethod::default(), - ) + .hybrid_search(query, &query_vector[0], alpha, limit) .await; match result { @@ -1475,12 +1466,9 @@ mod tests { let mut wtxn = storage.graph_env.write_txn().unwrap(); let vectors = generate_random_vectors(800, 650); for vec in vectors { - let _ = storage.vectors.insert:: bool>( - &mut wtxn, - &vec, - None, - &SimilarityMethod::default(), - ); + let _ = storage + .vectors + .insert:: bool>(&mut wtxn, &vec, None); } wtxn.commit().unwrap(); @@ -1489,13 +1477,7 @@ mod tests { // alpha = 0.0 (Vector only) let results_vector_only = storage - .hybrid_search( - query, - &query_vector[0], - 0.0, - 10, - SimilarityMethod::default(), - ) + .hybrid_search(query, &query_vector[0], 0.0, 10) .await; match results_vector_only { @@ -1527,12 +1509,9 @@ mod tests { let mut wtxn = storage.graph_env.write_txn().unwrap(); let vectors = generate_random_vectors(800, 650); for vec in vectors { - let _ = storage.vectors.insert:: bool>( - &mut wtxn, - &vec, - None, - &SimilarityMethod::default(), - ); + let _ = storage + .vectors + .insert:: bool>(&mut wtxn, &vec, None); } wtxn.commit().unwrap(); @@ -1541,13 +1520,7 @@ mod tests { // alpha = 1.0 (BM25 only) let results_bm25_only = storage - .hybrid_search( - query, - &query_vector[0], - 1.0, - 10, - SimilarityMethod::default(), - ) + .hybrid_search(query, &query_vector[0], 1.0, 10) .await; // all should be valid results or acceptable errors diff --git a/helix-db/src/helix_engine/storage_core/mod.rs b/helix-db/src/helix_engine/storage_core/mod.rs index a7d4caff..84e6cbf1 100644 --- a/helix-db/src/helix_engine/storage_core/mod.rs +++ b/helix-db/src/helix_engine/storage_core/mod.rs @@ -9,7 +9,7 @@ use crate::{ storage_methods::{DBMethods, StorageMethods}, version_info::VersionInfo, }, - traversal_core::config::Config, + traversal_core::config::{Config}, types::GraphError, vector_core::{ hnsw::HNSW, @@ -41,13 +41,11 @@ pub type EdgeId = u128; pub struct StorageConfig { pub schema: String, - pub graphvis_node_label: Option, pub embedding_model: Option, } pub struct HelixGraphStorage { pub graph_env: Env, - pub nodes_db: Database, Bytes>, pub edges_db: Database, Bytes>, pub out_edges_db: Database, @@ -56,7 +54,6 @@ pub struct HelixGraphStorage { pub vectors: VectorCore, pub bm25: Option, pub version_info: VersionInfo, - pub storage_config: StorageConfig, } @@ -152,6 +149,7 @@ impl HelixGraphStorage { vector_config.ef_construction, vector_config.ef_search, ), + vector_config.vector_similarity, )?; let bm25 = config @@ -161,7 +159,6 @@ impl HelixGraphStorage { let storage_config = StorageConfig::new( config.schema.unwrap_or("".to_string()), - config.graphvis_node_label, config.embedding_model, ); @@ -265,12 +262,10 @@ impl HelixGraphStorage { impl StorageConfig { pub fn new( schema: String, - graphvis_node_label: Option, embedding_model: Option, ) -> StorageConfig { Self { schema, - graphvis_node_label, embedding_model, } } diff --git a/helix-db/src/helix_engine/tests/hnsw_tests.rs b/helix-db/src/helix_engine/tests/hnsw_tests.rs index baa5406b..5851e630 100644 --- a/helix-db/src/helix_engine/tests/hnsw_tests.rs +++ b/helix-db/src/helix_engine/tests/hnsw_tests.rs @@ -1,9 +1,9 @@ // MAKE SURE TO --release +use crate::helix_engine::traversal_core::config::SimilarityMethod; use crate::helix_engine::vector_core::{ hnsw::HNSW, vector::HVector, vector_core::{HNSWConfig, VectorCore}, - vector_distance::SimilarityMethod, }; use heed3::{Env, EnvOpenOptions, RoTxn}; use rand::{Rng, seq::SliceRandom}; @@ -111,7 +111,7 @@ fn tests_hnsw_config_build() { let config = HNSWConfig::new(Some(32), Some(256), Some(256)); - let index = VectorCore::new(&env, &mut txn, config).unwrap(); + let index = VectorCore::new(&env, &mut txn, config, Some(SimilarityMethod::default())).unwrap(); assert_eq!(index.config.m, 32); assert_eq!(index.config.ef_construct, 256); assert_eq!(index.config.ef, 256); @@ -126,7 +126,7 @@ fn tests_hnsw_config_build() { fn test_hnsw_insert() { let env = setup_temp_env(); let mut txn = env.write_txn().unwrap(); - let index = VectorCore::new(&env, &mut txn, HNSWConfig::new(None, None, None)).unwrap(); + let index = VectorCore::new(&env, &mut txn, HNSWConfig::new(None, None, None), Some(SimilarityMethod::default())).unwrap(); let n_base = 500; let dims = 750; @@ -134,7 +134,7 @@ fn test_hnsw_insert() { for data in vectors { let vec = index - .insert::(&mut txn, &data, None, &SimilarityMethod::default()) + .insert::(&mut txn, &data, None) .unwrap(); assert_eq!(vec.data, data); assert!(vec.properties.is_none()); @@ -148,7 +148,7 @@ fn test_hnsw_insert() { fn test_get_vector() { let env = setup_temp_env(); let mut txn = env.write_txn().unwrap(); - let index = VectorCore::new(&env, &mut txn, HNSWConfig::new(None, None, None)).unwrap(); + let index = VectorCore::new(&env, &mut txn, HNSWConfig::new(None, None, None), Some(SimilarityMethod::default())).unwrap(); let n_base = 500; let dims = 750; @@ -158,7 +158,7 @@ fn test_get_vector() { for data in vectors { all_vectors.push( index - .insert::(&mut txn, &data, None, &SimilarityMethod::default()) + .insert::(&mut txn, &data, None) .unwrap(), ); } @@ -198,13 +198,13 @@ fn test_hnsw_search() { let env = setup_temp_env(); let mut txn = env.write_txn().unwrap(); - let index = VectorCore::new(&env, &mut txn, HNSWConfig::new(None, None, None)).unwrap(); + let index = VectorCore::new(&env, &mut txn, HNSWConfig::new(None, None, None), Some(SimilarityMethod::default())).unwrap(); let mut base_all_vectors: Vec = Vec::new(); for data in base_vectors.iter() { base_all_vectors.push( index - .insert::(&mut txn, &data, None, &SimilarityMethod::default()) + .insert::(&mut txn, &data, None) .unwrap(), ); } @@ -228,7 +228,6 @@ fn test_hnsw_search() { "vector", None, false, - &SimilarityMethod::default(), ) .unwrap(); diff --git a/helix-db/src/helix_engine/tests/traversal_tests/drop_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/drop_tests.rs index 25db39d1..49f9ccd9 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/drop_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/drop_tests.rs @@ -21,7 +21,8 @@ use crate::{ }, traversal_value::{Traversable, TraversalValue}, }, - vector_core::{vector::HVector, vector_distance::SimilarityMethod}, + vector_core::vector::HVector, + }, props, utils::filterable::Filterable, @@ -362,7 +363,6 @@ fn test_vector_deletion_in_existing_graph() { &[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "vector", None, - &SimilarityMethod::default(), ) .collect_to_obj(); other_vectors.push(other_vector); @@ -373,7 +373,6 @@ fn test_vector_deletion_in_existing_graph() { &[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "vector", None, - &SimilarityMethod::default(), ) .collect_to_obj(); diff --git a/helix-db/src/helix_engine/tests/traversal_tests/edge_traversal_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/edge_traversal_tests.rs index 68544f48..4a6aff88 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/edge_traversal_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/edge_traversal_tests.rs @@ -20,7 +20,7 @@ use crate::{ }, traversal_value::{Traversable, TraversalValue}, }, - vector_core::{vector::HVector, vector_distance::SimilarityMethod}, + vector_core::vector::HVector, }, props, utils::filterable::Filterable, @@ -412,7 +412,6 @@ fn test_add_e_between_node_and_vector() { &[1.0, 2.0, 3.0], "vector", None, - &SimilarityMethod::default(), ) .collect_to_obj(); @@ -445,7 +444,6 @@ fn test_add_e_between_node_and_vector() { 10, "vector", None, - &SimilarityMethod::default() ) .collect_to::>() ); diff --git a/helix-db/src/helix_engine/tests/traversal_tests/util_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/util_tests.rs index 674e5da6..b4e1eb03 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/util_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/util_tests.rs @@ -17,7 +17,7 @@ use crate::{ }, traversal_value::Traversable, }, - vector_core::{vector::HVector, vector_distance::SimilarityMethod}, + vector_core::vector::HVector, }, props, }; @@ -216,7 +216,6 @@ fn test_order_vector_by_asc() { &[1.0, 2.0, 3.0], "vector", Some(props! { "age" => 30 }), - &SimilarityMethod::default(), ) .collect_to_obj(); @@ -225,7 +224,6 @@ fn test_order_vector_by_asc() { &[1.0, 2.0, 3.0], "vector", Some(props! { "age" => 20 }), - &SimilarityMethod::default(), ) .collect_to_obj(); @@ -234,7 +232,6 @@ fn test_order_vector_by_asc() { &[1.0, 2.0, 3.0], "vector", Some(props! { "age" => 10 }), - &SimilarityMethod::default(), ) .collect_to_obj(); @@ -247,7 +244,6 @@ fn test_order_vector_by_asc() { 10, "vector", None, - &SimilarityMethod::default(), ) .order_by_asc("age") .collect_to::>(); @@ -269,7 +265,6 @@ fn test_order_vector_by_desc() { &[1.0, 2.0, 3.0], "vector", Some(props! { "age" => 30 }), - &SimilarityMethod::default(), ) .collect_to_obj(); @@ -277,8 +272,7 @@ fn test_order_vector_by_desc() { .insert_v::( &[1.0, 2.0, 3.0], "vector", - Some(props! { "age" => 20 }), - &SimilarityMethod::default(), + Some(props! { "age" => 20 }), ) .collect_to_obj(); @@ -287,7 +281,6 @@ fn test_order_vector_by_desc() { &[1.0, 2.0, 3.0], "vector", Some(props! { "age" => 10 }), - &SimilarityMethod::default(), ) .collect_to_obj(); @@ -300,7 +293,6 @@ fn test_order_vector_by_desc() { 10, "vector", None, - &SimilarityMethod::default(), ) .order_by_desc("age") .collect_to::>(); diff --git a/helix-db/src/helix_engine/tests/traversal_tests/vector_traversal_tests.rs b/helix-db/src/helix_engine/tests/traversal_tests/vector_traversal_tests.rs index f5cc95be..cd8751b3 100644 --- a/helix-db/src/helix_engine/tests/traversal_tests/vector_traversal_tests.rs +++ b/helix-db/src/helix_engine/tests/traversal_tests/vector_traversal_tests.rs @@ -23,7 +23,7 @@ use crate::{ }, traversal_value::{Traversable, TraversalValue}, }, - vector_core::{vector::HVector, vector_distance::SimilarityMethod}, + vector_core::vector::HVector, }, props, }; @@ -55,12 +55,7 @@ fn test_from_v() { .collect_to_obj(); let vector = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v:: bool>( - &[1.0, 2.0, 3.0], - "vector", - None, - &SimilarityMethod::default(), - ) + .insert_v:: bool>(&[1.0, 2.0, 3.0], "vector", None) .collect_to_obj(); let _ = G::new_mut(Arc::clone(&storage), &mut txn) @@ -91,12 +86,7 @@ fn test_to_v() { .collect_to_obj(); let vector = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v:: bool>( - &[1.0, 2.0, 3.0], - "vector", - None, - &SimilarityMethod::default(), - ) + .insert_v:: bool>(&[1.0, 2.0, 3.0], "vector", None) .collect_to_obj(); let _ = G::new_mut(Arc::clone(&storage), &mut txn) @@ -137,12 +127,7 @@ fn test_brute_force_vector_search() { let mut vector_ids = Vec::new(); for vector in vectors { let vector_id = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v:: bool>( - &vector, - "vector", - None, - &SimilarityMethod::default(), - ) + .insert_v:: bool>(&vector, "vector", None) .collect_to_obj() .id(); let _ = G::new_mut(Arc::clone(&storage), &mut txn) @@ -166,7 +151,7 @@ fn test_brute_force_vector_search() { .n_from_id(&node.id()) .out_e("embedding") .to_v() - .brute_force_search_v(&[1.0, 2.0, 3.0], 10, &SimilarityMethod::default()) + .brute_force_search_v(&[1.0, 2.0, 3.0], 10) .collect_to::>(); println!("traversal: {traversal:?}"); @@ -228,12 +213,7 @@ fn test_vector_search() { rng.random::(), ]; let _ = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v:: bool>( - &random_vector, - "vector", - None, - &SimilarityMethod::default(), - ) + .insert_v:: bool>(&random_vector, "vector", None) .collect_to_obj(); println!("inserted vector: {i:?}"); i += 1; @@ -254,12 +234,7 @@ fn test_vector_search() { for vector in vectors { let node = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v:: bool>( - &vector, - "vector", - None, - &SimilarityMethod::default(), - ) + .insert_v:: bool>(&vector, "vector", None) .collect_to_obj(); inserted_vectors.push(node.id()); println!("inserted vector: {i:?}"); @@ -275,7 +250,6 @@ fn test_vector_search() { 2000, "vector", None, - &SimilarityMethod::default(), ) .collect_to::>(); // traversal.reverse(); @@ -294,12 +268,7 @@ fn test_delete_vector() { let mut txn = storage.graph_env.write_txn().unwrap(); let vector = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v:: bool>( - &[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], - "vector", - None, - &SimilarityMethod::default(), - ) + .insert_v:: bool>(&[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "vector", None) .collect_to_obj(); let node = G::new_mut(Arc::clone(&storage), &mut txn) .add_n("person", None, None) @@ -317,7 +286,6 @@ fn test_delete_vector() { 2000, "vector", None, - &SimilarityMethod::default(), ) .collect_to::>(); @@ -334,7 +302,6 @@ fn test_delete_vector() { 2000, "vector", None, - &SimilarityMethod::default(), ) .collect_to::>(), Arc::clone(&storage), @@ -351,7 +318,6 @@ fn test_delete_vector() { 2000, "vector", None, - &SimilarityMethod::default(), ) .collect_to::>(); @@ -386,12 +352,7 @@ fn test_drop_vectors_then_add_them_back() { .collect_to_obj(); let embedding = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v:: bool>( - &[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], - "vector", - None, - &SimilarityMethod::default(), - ) + .insert_v:: bool>(&[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "vector", None) .collect_to_obj(); let _ = G::new_mut(Arc::clone(&storage), &mut txn) @@ -451,7 +412,6 @@ fn test_drop_vectors_then_add_them_back() { &[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "Entity_Embedding", Some(props! { "name_embedding" => [1.0, 1.0, 1.0, 1.0, 1.0, 1.0].to_vec() }), - &SimilarityMethod::default(), ) .collect_to_obj(); let edge = G::new_mut(Arc::clone(&storage), &mut txn) @@ -474,7 +434,6 @@ fn test_drop_vectors_then_add_them_back() { 2000, "Entity_Embedding", None, - &SimilarityMethod::default(), ) .collect_to::>(); assert_eq!(traversal.len(), 1); @@ -491,12 +450,7 @@ fn test_drop_vectors_then_add_them_back() { let mut txn = storage.graph_env.write_txn().unwrap(); let embedding = G::new_mut(Arc::clone(&storage), &mut txn) - .insert_v:: bool>( - &[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], - "vector", - None, - &SimilarityMethod::default(), - ) + .insert_v:: bool>(&[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "vector", None) .collect_to_obj(); let _ = G::new_mut(Arc::clone(&storage), &mut txn) @@ -536,7 +490,6 @@ fn test_drop_vectors_then_add_them_back() { &[1.0, 1.0, 1.0, 1.0, 1.0, 1.0], "Entity_Embedding", Some(props! { "name_embedding" => [1.0, 1.0, 1.0, 1.0, 1.0, 1.0].to_vec() }), - &SimilarityMethod::default(), ) .collect_to_obj(); let edge = G::new_mut(Arc::clone(&storage), &mut txn) @@ -559,7 +512,6 @@ fn test_drop_vectors_then_add_them_back() { 2000, "Entity_Embedding", None, - &SimilarityMethod::default(), ) .collect_to::>(); assert_eq!(traversal.len(), 1); diff --git a/helix-db/src/helix_engine/tests/vector_tests.rs b/helix-db/src/helix_engine/tests/vector_tests.rs index b8874ee1..732e6e07 100644 --- a/helix-db/src/helix_engine/tests/vector_tests.rs +++ b/helix-db/src/helix_engine/tests/vector_tests.rs @@ -1,5 +1,5 @@ use crate::helix_engine::vector_core::vector::HVector; -use crate::helix_engine::vector_core::vector_distance::SimilarityMethod; +use crate::helix_engine::traversal_core::config::SimilarityMethod; use crate::helix_engine::vector_core::vector_distance::{MAX_DISTANCE, MIN_DISTANCE, ORTHOGONAL}; #[test] diff --git a/helix-db/src/helix_engine/traversal_core/config.rs b/helix-db/src/helix_engine/traversal_core/config.rs index 390b3eed..99c200ec 100644 --- a/helix-db/src/helix_engine/traversal_core/config.rs +++ b/helix-db/src/helix_engine/traversal_core/config.rs @@ -3,13 +3,17 @@ use crate::{ helixc::analyzer::{INTROSPECTION_DATA, SECONDARY_INDICES}, }; use serde::{Deserialize, Serialize}; -use std::{fmt, path::PathBuf}; +use std::{ + fmt::{self, Display}, + path::PathBuf, +}; #[derive(Serialize, Deserialize, Debug, Clone)] pub struct VectorConfig { pub m: Option, pub ef_construction: Option, pub ef_search: Option, + pub vector_similarity: Option, } impl Default for VectorConfig { @@ -18,6 +22,7 @@ impl Default for VectorConfig { m: Some(16), ef_construction: Some(128), ef_search: Some(768), + vector_similarity: Some(SimilarityMethod::default()), } } } @@ -27,6 +32,27 @@ pub struct GraphConfig { pub secondary_indices: Option>, } +#[derive(Serialize, Deserialize, Debug, Clone, Default)] +pub enum SimilarityMethod { + #[serde(rename = "cosine_distance")] + CosineDistance, + #[default] + #[serde(rename = "cosine_similarity")] + CosineSimilarity, + #[serde(rename = "euclidean_distance")] + EuclideanDistance, +} + +impl Display for SimilarityMethod { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + match self { + SimilarityMethod::CosineDistance => write!(f, "cosine_distance"), + SimilarityMethod::CosineSimilarity => write!(f, "cosine_similarity"), + SimilarityMethod::EuclideanDistance => write!(f, "euclidean_distance"), + } + } +} + #[derive(Serialize, Deserialize, Debug, Clone)] pub struct Config { pub vector_config: Option, @@ -36,7 +62,6 @@ pub struct Config { pub bm25: Option, pub schema: Option, pub embedding_model: Option, - pub graphvis_node_label: Option, } impl Config { @@ -49,13 +74,14 @@ impl Config { bm25: bool, schema: Option, embedding_model: Option, - graphvis_node_label: Option, + vector_similarity: Option, ) -> Self { Self { vector_config: Some(VectorConfig { m: Some(m), ef_construction: Some(ef_construction), ef_search: Some(ef_search), + vector_similarity, }), graph_config: Some(GraphConfig { secondary_indices: None, @@ -65,7 +91,6 @@ impl Config { bm25: Some(bm25), schema, embedding_model, - graphvis_node_label, } } @@ -94,7 +119,8 @@ impl Config { "vector_config": { "m": 16, "ef_construction": 128, - "ef_search": 768 + "ef_search": 768, + "vector_similarity": "cosine_similarity" }, "graph_config": { "secondary_indices": [] @@ -103,7 +129,6 @@ impl Config { "mcp": true, "bm25": true, "embedding_model": "text-embedding-ada-002", - "graphvis_node_label": "" } "# .trim() @@ -146,6 +171,7 @@ impl Default for Config { m: Some(16), ef_construction: Some(128), ef_search: Some(768), + vector_similarity: Some(SimilarityMethod::default()), }), graph_config: Some(GraphConfig { secondary_indices: None, @@ -155,7 +181,6 @@ impl Default for Config { bm25: Some(true), schema: None, embedding_model: Some("text-embedding-ada-002".to_string()), - graphvis_node_label: None, } } } @@ -192,6 +217,16 @@ impl fmt::Display for Config { .ef_search .unwrap_or(768) )?; + writeln!( + f, + "vector_similarity: {},", + self.vector_config + .as_ref() + .unwrap_or(&VectorConfig::default()) + .vector_similarity + .as_ref() + .unwrap_or(&SimilarityMethod::default()) + )?; writeln!(f, "}}),")?; writeln!(f, "graph_config: Some(GraphConfig {{")?; writeln!( @@ -234,14 +269,6 @@ impl fmt::Display for Config { None => "None".to_string(), } )?; - writeln!( - f, - "graphvis_node_label: {},", - match &self.graphvis_node_label { - Some(label) => format!("Some(\"{label}\".to_string())"), - None => "None".to_string(), - } - )?; writeln!(f, "}})")?; writeln!(f, "}}")?; Ok(()) diff --git a/helix-db/src/helix_engine/traversal_core/ops/vectors/brute_force_search.rs b/helix-db/src/helix_engine/traversal_core/ops/vectors/brute_force_search.rs index 6d353647..58cca4a0 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/vectors/brute_force_search.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/vectors/brute_force_search.rs @@ -7,7 +7,7 @@ use crate::{ types::GraphError, vector_core::{ vector::HVector, - vector_distance::{DistanceCalc, SimilarityMethod}, + vector_distance::DistanceCalc, }, }, protocol::value::Value, @@ -37,7 +37,6 @@ pub trait BruteForceSearchVAdapter<'a>: self, query: &[f64], k: K, - method: &SimilarityMethod, ) -> RoTraversalIterator<'a, impl Iterator>> where K: TryInto, @@ -51,7 +50,6 @@ impl<'a, I: Iterator> + 'a> BruteForce self, query: &[f64], k: K, - method: &SimilarityMethod, ) -> RoTraversalIterator<'a, impl Iterator>> where K: TryInto, @@ -64,7 +62,7 @@ impl<'a, I: Iterator> + 'a> BruteForce .inner .filter_map(|v| match v { Ok(TraversalValue::Vector(mut v)) => { - let d = HVector::distance(v.get_data(), query, method).unwrap(); + let d = HVector::distance(v.get_data(), query, &self.storage.vectors.method).unwrap(); v.set_distance(d); Some(v) } diff --git a/helix-db/src/helix_engine/traversal_core/ops/vectors/insert.rs b/helix-db/src/helix_engine/traversal_core/ops/vectors/insert.rs index a614b777..383cbc47 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/vectors/insert.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/vectors/insert.rs @@ -4,7 +4,7 @@ use crate::{ helix_engine::{ traversal_core::{traversal_iter::RwTraversalIterator, traversal_value::TraversalValue}, types::GraphError, - vector_core::{hnsw::HNSW, vector::HVector, vector_distance::SimilarityMethod}, + vector_core::{hnsw::HNSW, vector::HVector}, }, protocol::value::Value, }; @@ -27,7 +27,6 @@ pub trait InsertVAdapter<'a, 'b>: Iterator>, - method: &SimilarityMethod, ) -> RwTraversalIterator<'a, 'b, impl Iterator>> where F: Fn(&HVector, &RoTxn) -> bool; @@ -41,7 +40,6 @@ impl<'a, 'b, I: Iterator>> InsertVAdap query: &[f64], label: &str, fields: Option>, - method: &SimilarityMethod, ) -> RwTraversalIterator<'a, 'b, impl Iterator>> where F: Fn(&HVector, &RoTxn) -> bool, @@ -57,7 +55,7 @@ impl<'a, 'b, I: Iterator>> InsertVAdap (String::from("is_deleted"), Value::Boolean(false)), ]), }; - let vector = self.storage.vectors.insert::(self.txn, query, fields, method); + let vector = self.storage.vectors.insert::(self.txn, query, fields); let result = match vector { Ok(vector) => Ok(TraversalValue::Vector(vector)), diff --git a/helix-db/src/helix_engine/traversal_core/ops/vectors/search.rs b/helix-db/src/helix_engine/traversal_core/ops/vectors/search.rs index 8b57bf48..a373fc6d 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/vectors/search.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/vectors/search.rs @@ -3,7 +3,7 @@ use heed3::RoTxn; use crate::helix_engine::{ traversal_core::{traversal_iter::RoTraversalIterator, traversal_value::TraversalValue}, types::{GraphError, VectorError}, - vector_core::{hnsw::HNSW, vector::HVector, vector_distance::SimilarityMethod}, + vector_core::{hnsw::HNSW, vector::HVector}, }; use helix_macros::debug_trace; use std::iter::once; @@ -29,7 +29,6 @@ pub trait SearchVAdapter<'a>: Iterator k: K, label: &str, filter: Option<&[F]>, - method: &SimilarityMethod, ) -> RoTraversalIterator<'a, impl Iterator>> where F: Fn(&HVector, &RoTxn) -> bool, @@ -46,7 +45,6 @@ impl<'a, I: Iterator> + 'a> SearchVAda k: K, label: &str, filter: Option<&[F]>, - method: &SimilarityMethod, ) -> RoTraversalIterator<'a, impl Iterator>> where F: Fn(&HVector, &RoTxn) -> bool, @@ -60,7 +58,6 @@ impl<'a, I: Iterator> + 'a> SearchVAda label, filter, false, - method, ); let iter = match vectors { diff --git a/helix-db/src/helix_engine/vector_core/hnsw.rs b/helix-db/src/helix_engine/vector_core/hnsw.rs index 51d6807e..4ef66159 100644 --- a/helix-db/src/helix_engine/vector_core/hnsw.rs +++ b/helix-db/src/helix_engine/vector_core/hnsw.rs @@ -1,5 +1,4 @@ use crate::helix_engine::vector_core::vector::HVector; -use crate::helix_engine::vector_core::vector_distance::SimilarityMethod; use crate::{helix_engine::types::VectorError, protocol::value::Value}; use heed3::{RoTxn, RwTxn}; @@ -23,7 +22,6 @@ pub trait HNSW { label: &str, filter: Option<&[F]>, should_trickle: bool, - method: &SimilarityMethod, ) -> Result, VectorError> where F: Fn(&HVector, &RoTxn) -> bool; @@ -43,7 +41,6 @@ pub trait HNSW { txn: &mut RwTxn, data: &[f64], fields: Option>, - method: &SimilarityMethod, ) -> Result where F: Fn(&HVector, &RoTxn) -> bool; diff --git a/helix-db/src/helix_engine/vector_core/vector.rs b/helix-db/src/helix_engine/vector_core/vector.rs index 6447deb2..b00c811b 100644 --- a/helix-db/src/helix_engine/vector_core/vector.rs +++ b/helix-db/src/helix_engine/vector_core/vector.rs @@ -1,7 +1,8 @@ use crate::{ helix_engine::{ + traversal_core::config::SimilarityMethod, types::{GraphError, VectorError}, - vector_core::vector_distance::{DistanceCalc, SimilarityMethod}, + vector_core::vector_distance::DistanceCalc, }, protocol::{return_values::ReturnValue, value::Value}, utils::{ diff --git a/helix-db/src/helix_engine/vector_core/vector_core.rs b/helix-db/src/helix_engine/vector_core/vector_core.rs index c1673376..31c95494 100644 --- a/helix-db/src/helix_engine/vector_core/vector_core.rs +++ b/helix-db/src/helix_engine/vector_core/vector_core.rs @@ -1,12 +1,12 @@ use crate::{ debug_println, helix_engine::{ + traversal_core::config::SimilarityMethod, types::VectorError, vector_core::{ hnsw::HNSW, utils::{Candidate, HeapOps, VectorFilter}, vector::HVector, - vector_distance::SimilarityMethod, }, }, protocol::value::Value, @@ -65,10 +65,16 @@ pub struct VectorCore { pub vector_data_db: Database, pub edges_db: Database, pub config: HNSWConfig, + pub method: SimilarityMethod, } impl VectorCore { - pub fn new(env: &Env, txn: &mut RwTxn, config: HNSWConfig) -> Result { + pub fn new( + env: &Env, + txn: &mut RwTxn, + config: HNSWConfig, + method: Option, + ) -> Result { let vectors_db = env.create_database(txn, Some(DB_VECTORS))?; let vector_data_db = env.create_database(txn, Some(DB_VECTOR_DATA))?; let edges_db = env.create_database(txn, Some(DB_HNSW_EDGES))?; @@ -78,6 +84,7 @@ impl VectorCore { vector_data_db, edges_db, config, + method: method.unwrap_or(SimilarityMethod::default()), }) } @@ -249,7 +256,6 @@ impl VectorCore { level: usize, should_extend: bool, filter: Option<&[F]>, - method: &SimilarityMethod, ) -> Result, VectorError> where F: Fn(&HVector, &RoTxn) -> bool, @@ -268,7 +274,7 @@ impl VectorCore { continue; } - neighbor.set_distance(neighbor.distance_to(query, method)?); + neighbor.set_distance(neighbor.distance_to(query, &self.method)?); /* let passes_filters = match filter { @@ -299,7 +305,6 @@ impl VectorCore { ef: usize, level: usize, filter: Option<&[F]>, - method: &SimilarityMethod, ) -> Result, VectorError> where F: Fn(&HVector, &RoTxn) -> bool, @@ -308,7 +313,7 @@ impl VectorCore { let mut candidates: BinaryHeap = BinaryHeap::new(); let mut results: BinaryHeap = BinaryHeap::new(); - entry_point.set_distance(entry_point.distance_to(query, method)?); + entry_point.set_distance(entry_point.distance_to(query, &self.method)?); candidates.push(Candidate { id: entry_point.get_id(), distance: entry_point.get_distance(), @@ -335,7 +340,7 @@ impl VectorCore { .into_iter() .filter(|neighbor| visited.insert(neighbor.get_id())) .filter_map(|mut neighbor| { - let distance = neighbor.distance_to(query, method).ok()?; + let distance = neighbor.distance_to(query, &self.method).ok()?; if max_distance.is_none_or(|max| distance < max) { neighbor.set_distance(distance); @@ -406,7 +411,6 @@ impl HNSW for VectorCore { label: &str, filter: Option<&[F]>, should_trickle: bool, - method: &SimilarityMethod, ) -> Result, VectorError> where F: Fn(&HVector, &RoTxn) -> bool, @@ -429,7 +433,6 @@ impl HNSW for VectorCore { true => filter, false => None, }, - method, )?; if let Some(closest) = nearest.pop() { @@ -447,7 +450,6 @@ impl HNSW for VectorCore { true => filter, false => None, }, - method, )?; let results = @@ -462,7 +464,6 @@ impl HNSW for VectorCore { txn: &mut RwTxn, data: &[f64], fields: Option>, - method: &SimilarityMethod, ) -> Result where F: Fn(&HVector, &RoTxn) -> bool, @@ -496,8 +497,7 @@ impl HNSW for VectorCore { let l = entry_point.get_level(); let mut curr_ep = entry_point; for level in (new_level + 1..=l).rev() { - let nearest = - self.search_level::(txn, &query, &mut curr_ep, 1, level, None, method)?; + let nearest = self.search_level::(txn, &query, &mut curr_ep, 1, level, None)?; curr_ep = nearest .peek() .ok_or(VectorError::VectorCoreError( @@ -514,19 +514,18 @@ impl HNSW for VectorCore { self.config.ef_construct, level, None, - method, )?; curr_ep = nearest.peek().unwrap().clone(); let neighbors = - self.select_neighbors::(txn, &query, nearest, level, true, None, method)?; + self.select_neighbors::(txn, &query, nearest, level, true, None)?; self.set_neighbours(txn, query.get_id(), &neighbors, level)?; for e in neighbors { let id = e.get_id(); let e_conns = BinaryHeap::from(self.get_neighbors::(txn, id, level, None)?); let e_new_conn = - self.select_neighbors::(txn, &query, e_conns, level, true, None, method)?; + self.select_neighbors::(txn, &query, e_conns, level, true, None)?; self.set_neighbours(txn, id, &e_new_conn, level)?; } } diff --git a/helix-db/src/helix_engine/vector_core/vector_distance.rs b/helix-db/src/helix_engine/vector_core/vector_distance.rs index 9b27ad9b..98212acc 100644 --- a/helix-db/src/helix_engine/vector_core/vector_distance.rs +++ b/helix-db/src/helix_engine/vector_core/vector_distance.rs @@ -1,22 +1,11 @@ -use serde::Deserialize; - -use crate::helix_engine::{types::VectorError, vector_core::vector::HVector}; +use crate::helix_engine::{ + traversal_core::config::SimilarityMethod, types::VectorError, vector_core::vector::HVector, +}; pub const MAX_DISTANCE: f64 = 2.0; pub const ORTHOGONAL: f64 = 1.0; pub const MIN_DISTANCE: f64 = 0.0; -#[derive(Default, Debug, Deserialize, Clone)] -pub enum SimilarityMethod { - #[default] - #[serde(rename = "cosine_distance")] - CosineDistance, - #[serde(rename = "cosine_similarity")] - CosineSimilarity, - #[serde(rename = "euclidean_distance")] - EuclideanDistance, -} - pub trait DistanceCalc { fn distance(from: &[f64], to: &[f64], method: &SimilarityMethod) -> Result; } diff --git a/helix-db/src/helix_gateway/mcp/tools.rs b/helix-db/src/helix_gateway/mcp/tools.rs index 7fa0ee32..9aaa6eba 100644 --- a/helix-db/src/helix_gateway/mcp/tools.rs +++ b/helix-db/src/helix_gateway/mcp/tools.rs @@ -21,7 +21,7 @@ use crate::{ traversal_value::{Traversable, TraversalValue}, }, types::GraphError, - vector_core::{vector::HVector, vector_distance::SimilarityMethod}, + vector_core::vector::HVector, }, helix_gateway::{ embedding_providers::embedding_providers::{EmbeddingModel, get_embedding_model}, @@ -199,7 +199,6 @@ pub(super) trait McpTools<'a> { query: String, label: String, k: Option, - method: Option, ) -> Result, GraphError>; fn search_vector( @@ -209,7 +208,6 @@ pub(super) trait McpTools<'a> { vector: Vec, k: usize, min_score: Option, - method: Option, ) -> Result, GraphError>; fn order_by( @@ -491,7 +489,6 @@ impl<'a> McpTools<'a> for McpBackend { query: String, label: String, k: Option, - method: Option, ) -> Result, GraphError> { let db = Arc::clone(&self.db); @@ -505,7 +502,6 @@ impl<'a> McpTools<'a> for McpBackend { k.unwrap_or(5), &label, None, - &method.unwrap_or_default(), ) .collect_to::>(); @@ -520,14 +516,13 @@ impl<'a> McpTools<'a> for McpBackend { vector: Vec, k: usize, min_score: Option, - method: Option, ) -> Result, GraphError> { let db = Arc::clone(&self.db); let items = connection.iter.clone().collect::>(); let mut res = G::new_from(db, txn, items) - .brute_force_search_v(&vector, k, &method.unwrap_or_default()) + .brute_force_search_v(&vector, k) .collect_to::>(); if let Some(min_score) = min_score { diff --git a/helix-db/src/helix_gateway/mcp/tools_tests.rs b/helix-db/src/helix_gateway/mcp/tools_tests.rs index ef009602..977e1a2d 100644 --- a/helix-db/src/helix_gateway/mcp/tools_tests.rs +++ b/helix-db/src/helix_gateway/mcp/tools_tests.rs @@ -19,7 +19,7 @@ use crate::{ }, traversal_value::{Traversable, TraversalValue}, }, - vector_core::{vector::HVector, vector_distance::SimilarityMethod}, + vector_core::vector::HVector, }, helix_gateway::mcp::{mcp::MCPConnection, tools::McpTools}, }; @@ -93,12 +93,7 @@ fn test_mcp_tool_search_vector() { for vector in vectors { let vector = G::new_mut(Arc::clone(&engine.storage), &mut txn) - .insert_v:: bool>( - &vector, - "vector", - None, - &crate::helix_engine::vector_core::vector_distance::SimilarityMethod::default(), - ) + .insert_v:: bool>(&vector, "vector", None) .collect_to_obj(); let _ = G::new_mut(Arc::clone(&engine.storage), &mut txn) @@ -131,14 +126,7 @@ fn test_mcp_tool_search_vector() { // brute force searches for vectors let res = mcp_backend - .search_vector( - &txn, - &mcp_connection, - vec![1.0, 1.0, 1.0], - 10, - None, - Some(SimilarityMethod::default()), - ) + .search_vector(&txn, &mcp_connection, vec![1.0, 1.0, 1.0], 10, None) .unwrap(); // checks that the first vector is correct From 88036edbd455c57374c7cdb6031ad3fe96865f9a Mon Sep 17 00:00:00 2001 From: xav-db Date: Mon, 15 Sep 2025 09:17:33 -0400 Subject: [PATCH 4/9] clippy fixes --- helix-db/src/helix_engine/vector_core/vector_core.rs | 2 +- helix-db/src/helix_engine/vector_core/vector_distance.rs | 6 +++--- helix-db/src/helixc/analyzer/types.rs | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/helix-db/src/helix_engine/vector_core/vector_core.rs b/helix-db/src/helix_engine/vector_core/vector_core.rs index 31c95494..bf383f5d 100644 --- a/helix-db/src/helix_engine/vector_core/vector_core.rs +++ b/helix-db/src/helix_engine/vector_core/vector_core.rs @@ -84,7 +84,7 @@ impl VectorCore { vector_data_db, edges_db, config, - method: method.unwrap_or(SimilarityMethod::default()), + method: method.unwrap_or_default(), }) } diff --git a/helix-db/src/helix_engine/vector_core/vector_distance.rs b/helix-db/src/helix_engine/vector_core/vector_distance.rs index 98212acc..a0055808 100644 --- a/helix-db/src/helix_engine/vector_core/vector_distance.rs +++ b/helix-db/src/helix_engine/vector_core/vector_distance.rs @@ -20,9 +20,9 @@ impl DistanceCalc for HVector { #[inline(always)] fn distance(from: &[f64], to: &[f64], method: &SimilarityMethod) -> Result { match method { - SimilarityMethod::CosineDistance => cosine_similarity(&from, &to).map(|sim| 1.0 - sim), - SimilarityMethod::CosineSimilarity => cosine_similarity(&from, &to), - SimilarityMethod::EuclideanDistance => euclidean_distance(&from, &to), + SimilarityMethod::CosineDistance => cosine_similarity(from, to).map(|sim| 1.0 - sim), + SimilarityMethod::CosineSimilarity => cosine_similarity(from, to), + SimilarityMethod::EuclideanDistance => euclidean_distance(from, to), } } } diff --git a/helix-db/src/helixc/analyzer/types.rs b/helix-db/src/helixc/analyzer/types.rs index c310496f..6d5a9699 100644 --- a/helix-db/src/helixc/analyzer/types.rs +++ b/helix-db/src/helixc/analyzer/types.rs @@ -394,6 +394,6 @@ impl From for Type { impl std::fmt::Display for Type { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { - write!(f, "{:?}", self) + write!(f, "{self:?}") } } From bc08a12ffcfe1d9f3e34a38c785595dca83adc30 Mon Sep 17 00:00:00 2001 From: xav-db Date: Mon, 15 Sep 2025 23:29:29 -0400 Subject: [PATCH 5/9] fixing euclidean distance --- helix-db/src/helix_engine/vector_core/vector_distance.rs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/helix-db/src/helix_engine/vector_core/vector_distance.rs b/helix-db/src/helix_engine/vector_core/vector_distance.rs index a0055808..5a27dbd8 100644 --- a/helix-db/src/helix_engine/vector_core/vector_distance.rs +++ b/helix-db/src/helix_engine/vector_core/vector_distance.rs @@ -171,5 +171,6 @@ pub fn euclidean_distance(from: &[f64], to: &[f64]) -> Result let c = x - y; c * c }) - .sum()) + .sum::() + .sqrt()) } From cd72bd69934a3411b8c76614cc04d80428c38e0b Mon Sep 17 00:00:00 2001 From: xav-db Date: Mon, 15 Sep 2025 23:44:27 -0400 Subject: [PATCH 6/9] changing default --- helix-db/src/helix_engine/traversal_core/config.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/helix-db/src/helix_engine/traversal_core/config.rs b/helix-db/src/helix_engine/traversal_core/config.rs index 99c200ec..8110c8e8 100644 --- a/helix-db/src/helix_engine/traversal_core/config.rs +++ b/helix-db/src/helix_engine/traversal_core/config.rs @@ -34,9 +34,9 @@ pub struct GraphConfig { #[derive(Serialize, Deserialize, Debug, Clone, Default)] pub enum SimilarityMethod { + #[default] #[serde(rename = "cosine_distance")] CosineDistance, - #[default] #[serde(rename = "cosine_similarity")] CosineSimilarity, #[serde(rename = "euclidean_distance")] From ed9f29b4f9616927e00b829259f5404a05a2fbbe Mon Sep 17 00:00:00 2001 From: xav-db Date: Tue, 16 Sep 2025 15:57:25 -0400 Subject: [PATCH 7/9] fixing vector similarity issues with different ordering needed for different return configurations, wrapped in a single enum that implements different ordering based on similarity calculation method --- helix-db/benches/hnsw_benches.rs | 2 +- helix-db/src/helix_engine/bm25/bm25.rs | 2 +- helix-db/src/helix_engine/tests/hnsw_tests.rs | 2 +- .../ops/vectors/brute_force_search.rs | 2 +- .../src/helix_engine/vector_core/utils.rs | 8 +- .../src/helix_engine/vector_core/vector.rs | 26 ++- .../helix_engine/vector_core/vector_core.rs | 10 +- .../vector_core/vector_distance.rs | 149 +++++++++++++++++- 8 files changed, 168 insertions(+), 33 deletions(-) diff --git a/helix-db/benches/hnsw_benches.rs b/helix-db/benches/hnsw_benches.rs index 05b2b938..5855c259 100644 --- a/helix-db/benches/hnsw_benches.rs +++ b/helix-db/benches/hnsw_benches.rs @@ -82,7 +82,7 @@ mod tests { .filter_map(|base_vec| { query_hvector .distance_to(base_vec, &SimilarityMethod::default()) - .map(|dist| (base_vec.id.clone(), dist)) + .map(|dist| (base_vec.id.clone(), *dist)) .ok() }) .collect(); diff --git a/helix-db/src/helix_engine/bm25/bm25.rs b/helix-db/src/helix_engine/bm25/bm25.rs index d4ec1012..93266155 100644 --- a/helix-db/src/helix_engine/bm25/bm25.rs +++ b/helix-db/src/helix_engine/bm25/bm25.rs @@ -403,7 +403,7 @@ impl HybridSearch for HelixGraphStorage { if let Some(vector_results) = vector_results? { for doc in vector_results { let doc_id = doc.id; - let score = doc.distance.unwrap_or(0.0); + let score = *doc.distance; let similarity = (1.0 / (1.0 + score)) as f32; combined_scores .entry(doc_id) diff --git a/helix-db/src/helix_engine/tests/hnsw_tests.rs b/helix-db/src/helix_engine/tests/hnsw_tests.rs index 5851e630..8a371b1c 100644 --- a/helix-db/src/helix_engine/tests/hnsw_tests.rs +++ b/helix-db/src/helix_engine/tests/hnsw_tests.rs @@ -56,7 +56,7 @@ fn calc_ground_truths( .filter_map(|base_vec| { query_hvector .distance_to(base_vec, &SimilarityMethod::default()) - .map(|dist| (base_vec.id.clone(), dist)) + .map(|dist| (base_vec.id.clone(), *dist)) .ok() }) .collect(); diff --git a/helix-db/src/helix_engine/traversal_core/ops/vectors/brute_force_search.rs b/helix-db/src/helix_engine/traversal_core/ops/vectors/brute_force_search.rs index 58cca4a0..2230bcf0 100644 --- a/helix-db/src/helix_engine/traversal_core/ops/vectors/brute_force_search.rs +++ b/helix-db/src/helix_engine/traversal_core/ops/vectors/brute_force_search.rs @@ -68,7 +68,7 @@ impl<'a, I: Iterator> + 'a> BruteForce } _ => None, }) - .sorted_by(|v1, v2| v1.partial_cmp(v2).unwrap()) + .sorted() .take(k.try_into().unwrap()) .filter_map(move |mut item| { item.properties = match storage diff --git a/helix-db/src/helix_engine/vector_core/utils.rs b/helix-db/src/helix_engine/vector_core/utils.rs index b6734d1d..67219f93 100644 --- a/helix-db/src/helix_engine/vector_core/utils.rs +++ b/helix-db/src/helix_engine/vector_core/utils.rs @@ -1,7 +1,7 @@ use crate::{ helix_engine::{ types::VectorError, - vector_core::vector::HVector, + vector_core::vector::HVector, vector_core::vector_distance::DistanceResult, }, protocol::value::Value, utils::filterable::Filterable }; @@ -11,7 +11,7 @@ use std::{cmp::Ordering, collections::BinaryHeap}; #[derive(PartialEq)] pub(super) struct Candidate { pub id: u128, - pub distance: f64, + pub distance: DistanceResult, } impl Eq for Candidate {} @@ -25,9 +25,7 @@ impl PartialOrd for Candidate { impl Ord for Candidate { fn cmp(&self, other: &Self) -> Ordering { other - .distance - .partial_cmp(&self.distance) - .unwrap_or(Ordering::Equal) + .distance.cmp(&self.distance) } } diff --git a/helix-db/src/helix_engine/vector_core/vector.rs b/helix-db/src/helix_engine/vector_core/vector.rs index b00c811b..e7cbb636 100644 --- a/helix-db/src/helix_engine/vector_core/vector.rs +++ b/helix-db/src/helix_engine/vector_core/vector.rs @@ -2,7 +2,7 @@ use crate::{ helix_engine::{ traversal_core::config::SimilarityMethod, types::{GraphError, VectorError}, - vector_core::vector_distance::DistanceCalc, + vector_core::vector_distance::{DistanceCalc, DistanceResult}, }, protocol::{return_values::ReturnValue, value::Value}, utils::{ @@ -28,7 +28,7 @@ pub struct HVector { /// The level of the HVector pub level: usize, /// The distance of the HVector - pub distance: Option, + pub distance: DistanceResult, /// The actual vector pub data: Vec, /// The properties of the HVector @@ -44,9 +44,7 @@ impl PartialOrd for HVector { impl Ord for HVector { fn cmp(&self, other: &Self) -> Ordering { other - .distance - .partial_cmp(&self.distance) - .unwrap_or(Ordering::Equal) + .distance.cmp(&self.distance) } } @@ -74,7 +72,7 @@ impl HVector { // is_deleted: false, level: 0, data, - distance: None, + distance: DistanceResult::Empty, properties: None, } } @@ -87,7 +85,7 @@ impl HVector { // is_deleted: false, level, data, - distance: None, + distance: DistanceResult::Empty, properties: None, } } @@ -141,7 +139,7 @@ impl HVector { // is_deleted: false, level, data, - distance: None, + distance: DistanceResult::Empty, properties: None, }) } @@ -161,18 +159,18 @@ impl HVector { &self, other: &HVector, method: &SimilarityMethod, - ) -> Result { + ) -> Result { HVector::distance(&self.data, &other.data, method) } #[inline(always)] - pub fn set_distance(&mut self, distance: f64) { - self.distance = Some(distance); + pub fn set_distance(&mut self, distance: DistanceResult) { + self.distance = distance; } #[inline(always)] - pub fn get_distance(&self) -> f64 { - self.distance.unwrap_or(2.0) + pub fn get_distance(&self) -> DistanceResult { + self.distance } } @@ -234,7 +232,7 @@ impl Filterable for HVector { } fn score(&self) -> f64 { - self.get_distance() + self.get_distance().into() } fn properties_mut(&mut self) -> &mut Option> { diff --git a/helix-db/src/helix_engine/vector_core/vector_core.rs b/helix-db/src/helix_engine/vector_core/vector_core.rs index bf383f5d..d54f1b4d 100644 --- a/helix-db/src/helix_engine/vector_core/vector_core.rs +++ b/helix-db/src/helix_engine/vector_core/vector_core.rs @@ -6,7 +6,7 @@ use crate::{ vector_core::{ hnsw::HNSW, utils::{Candidate, HeapOps, VectorFilter}, - vector::HVector, + vector::HVector, vector_distance::DistanceResult, }, }, protocol::value::Value, @@ -274,7 +274,7 @@ impl VectorCore { continue; } - neighbor.set_distance(neighbor.distance_to(query, &self.method)?); + neighbor.set_distance(neighbor.distance_to(query, &self.method)?.into()); /* let passes_filters = match filter { @@ -313,10 +313,10 @@ impl VectorCore { let mut candidates: BinaryHeap = BinaryHeap::new(); let mut results: BinaryHeap = BinaryHeap::new(); - entry_point.set_distance(entry_point.distance_to(query, &self.method)?); + entry_point.set_distance(entry_point.distance_to(query, &self.method)?.into()); candidates.push(Candidate { id: entry_point.get_id(), - distance: entry_point.get_distance(), + distance: entry_point.get_distance().clone(), }); results.push(entry_point.clone()); visited.insert(entry_point.get_id()); @@ -481,7 +481,7 @@ impl HNSW for VectorCore { Ok(ep) => ep, Err(_) => { self.set_entry_point(txn, &query)?; - query.set_distance(0.0); + query.set_distance(DistanceResult::Empty); if let Some(fields) = fields { self.vector_data_db.put( diff --git a/helix-db/src/helix_engine/vector_core/vector_distance.rs b/helix-db/src/helix_engine/vector_core/vector_distance.rs index 5a27dbd8..b11c7e0d 100644 --- a/helix-db/src/helix_engine/vector_core/vector_distance.rs +++ b/helix-db/src/helix_engine/vector_core/vector_distance.rs @@ -1,3 +1,8 @@ +use core::fmt; +use std::{cmp::Ordering, fmt::Display, ops::Deref}; + +use serde::{Deserialize, Serialize}; + use crate::helix_engine::{ traversal_core::config::SimilarityMethod, types::VectorError, vector_core::vector::HVector, }; @@ -7,7 +12,7 @@ pub const ORTHOGONAL: f64 = 1.0; pub const MIN_DISTANCE: f64 = 0.0; pub trait DistanceCalc { - fn distance(from: &[f64], to: &[f64], method: &SimilarityMethod) -> Result; + fn distance(from: &[f64], to: &[f64], method: &SimilarityMethod) -> Result; } impl DistanceCalc for HVector { /// Calculates the distance between two vectors. @@ -18,11 +23,21 @@ impl DistanceCalc for HVector { /// - 0.0 (orthogonal) → Distance 1.0 /// - -1.0 (most dissimilar) → Distance 2.0 (furthest) #[inline(always)] - fn distance(from: &[f64], to: &[f64], method: &SimilarityMethod) -> Result { + fn distance( + from: &[f64], + to: &[f64], + method: &SimilarityMethod, + ) -> Result { match method { - SimilarityMethod::CosineDistance => cosine_similarity(from, to).map(|sim| 1.0 - sim), - SimilarityMethod::CosineSimilarity => cosine_similarity(from, to), - SimilarityMethod::EuclideanDistance => euclidean_distance(from, to), + SimilarityMethod::CosineDistance => { + cosine_similarity(from, to).map(|sim| DistanceResult::CosineDistance(1.0 - sim)) + } + SimilarityMethod::CosineSimilarity => { + cosine_similarity(from, to).map(|sim| DistanceResult::CosineSimilarity(sim)) + } + SimilarityMethod::EuclideanDistance => { + euclidean_distance(from, to).map(|dist| DistanceResult::EuclideanDistance(dist)) + } } } } @@ -174,3 +189,127 @@ pub fn euclidean_distance(from: &[f64], to: &[f64]) -> Result .sum::() .sqrt()) } + +#[derive(Debug, Clone, Serialize, Deserialize, Copy)] +#[serde(untagged)] +pub enum DistanceResult { + CosineDistance(f64), + CosineSimilarity(f64), + EuclideanDistance(f64), + Empty, +} + +impl PartialEq for DistanceResult { + fn eq(&self, other: &Self) -> bool { + match (self, other) { + (DistanceResult::CosineDistance(a), DistanceResult::CosineDistance(b)) => a == b, + (DistanceResult::CosineSimilarity(a), DistanceResult::CosineSimilarity(b)) => b == a, + (DistanceResult::EuclideanDistance(a), DistanceResult::EuclideanDistance(b)) => a == b, + (DistanceResult::Empty, _) => false, + (_, DistanceResult::Empty) => false, + _ => unreachable!(), + } + } +} + +impl PartialEq for DistanceResult { + fn eq(&self, other: &f64) -> bool { + match self { + DistanceResult::CosineDistance(a) => a == other, + DistanceResult::CosineSimilarity(a) => a == other, + DistanceResult::EuclideanDistance(a) => a == other, + DistanceResult::Empty => false, + } + } +} +impl PartialOrd for DistanceResult { + fn partial_cmp(&self, other: &f64) -> Option { + match self { + DistanceResult::CosineDistance(a) => a.partial_cmp(other), + DistanceResult::CosineSimilarity(a) => a.partial_cmp(other), + DistanceResult::EuclideanDistance(a) => a.partial_cmp(other), + DistanceResult::Empty => None, + } + } +} + +impl PartialOrd for DistanceResult { + fn partial_cmp(&self, other: &Self) -> Option { + match (self, other) { + (DistanceResult::CosineDistance(a), DistanceResult::CosineDistance(b)) => { + b.partial_cmp(a) + } + (DistanceResult::CosineSimilarity(a), DistanceResult::CosineSimilarity(b)) => { + a.partial_cmp(b) + } + (DistanceResult::EuclideanDistance(a), DistanceResult::EuclideanDistance(b)) => { + b.partial_cmp(a) + } + (DistanceResult::Empty, DistanceResult::Empty) => Some(Ordering::Equal), + (DistanceResult::Empty, _) => Some(Ordering::Greater), + (_, DistanceResult::Empty) => Some(Ordering::Less), + _ => unreachable!(), + } + } +} + +impl Ord for DistanceResult { + fn cmp(&self, other: &Self) -> Ordering { + self.partial_cmp(other).unwrap() + } +} + +impl Eq for DistanceResult {} + +impl Deref for DistanceResult { + type Target = f64; + + fn deref(&self) -> &Self::Target { + match self { + DistanceResult::CosineDistance(a) => a, + DistanceResult::CosineSimilarity(a) => a, + DistanceResult::EuclideanDistance(a) => a, + DistanceResult::Empty => panic!("Empty distance result"), + } + } +} + + +impl From for f64 { + fn from(result: DistanceResult) -> Self { + match result { + DistanceResult::CosineDistance(a) => a, + DistanceResult::CosineSimilarity(a) => a, + DistanceResult::EuclideanDistance(a) => a, + DistanceResult::Empty => panic!("Empty distance result"), + } + } +} + +impl From<&DistanceResult> for f64 { + fn from(result: &DistanceResult) -> Self { + match result { + DistanceResult::CosineDistance(a) => *a, + DistanceResult::CosineSimilarity(a) => *a, + DistanceResult::EuclideanDistance(a) => *a, + DistanceResult::Empty => panic!("Empty distance result"), + } + } +} + +impl Default for DistanceResult { + fn default() -> Self { + DistanceResult::CosineDistance(2.0) + } +} + +impl Display for DistanceResult { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + DistanceResult::CosineDistance(a) => write!(f, "CosineDistance({})", a), + DistanceResult::CosineSimilarity(a) => write!(f, "CosineSimilarity({})", a), + DistanceResult::EuclideanDistance(a) => write!(f, "EuclideanDistance({})", a), + DistanceResult::Empty => write!(f, "Empty"), + } + } +} \ No newline at end of file From bcfbf1ef294c2c1522cfafdbddd172af39beb4f6 Mon Sep 17 00:00:00 2001 From: xav-db Date: Tue, 16 Sep 2025 16:01:28 -0400 Subject: [PATCH 8/9] fixing clippy --- .../helix_engine/vector_core/vector_core.rs | 6 ++--- .../vector_core/vector_distance.rs | 27 ++++++++++--------- 2 files changed, 17 insertions(+), 16 deletions(-) diff --git a/helix-db/src/helix_engine/vector_core/vector_core.rs b/helix-db/src/helix_engine/vector_core/vector_core.rs index d54f1b4d..1442f8ea 100644 --- a/helix-db/src/helix_engine/vector_core/vector_core.rs +++ b/helix-db/src/helix_engine/vector_core/vector_core.rs @@ -274,7 +274,7 @@ impl VectorCore { continue; } - neighbor.set_distance(neighbor.distance_to(query, &self.method)?.into()); + neighbor.set_distance(neighbor.distance_to(query, &self.method)?); /* let passes_filters = match filter { @@ -313,10 +313,10 @@ impl VectorCore { let mut candidates: BinaryHeap = BinaryHeap::new(); let mut results: BinaryHeap = BinaryHeap::new(); - entry_point.set_distance(entry_point.distance_to(query, &self.method)?.into()); + entry_point.set_distance(entry_point.distance_to(query, &self.method)?); candidates.push(Candidate { id: entry_point.get_id(), - distance: entry_point.get_distance().clone(), + distance: entry_point.get_distance(), }); results.push(entry_point.clone()); visited.insert(entry_point.get_id()); diff --git a/helix-db/src/helix_engine/vector_core/vector_distance.rs b/helix-db/src/helix_engine/vector_core/vector_distance.rs index b11c7e0d..16c29b33 100644 --- a/helix-db/src/helix_engine/vector_core/vector_distance.rs +++ b/helix-db/src/helix_engine/vector_core/vector_distance.rs @@ -33,10 +33,10 @@ impl DistanceCalc for HVector { cosine_similarity(from, to).map(|sim| DistanceResult::CosineDistance(1.0 - sim)) } SimilarityMethod::CosineSimilarity => { - cosine_similarity(from, to).map(|sim| DistanceResult::CosineSimilarity(sim)) + cosine_similarity(from, to).map(DistanceResult::CosineSimilarity) } SimilarityMethod::EuclideanDistance => { - euclidean_distance(from, to).map(|dist| DistanceResult::EuclideanDistance(dist)) + euclidean_distance(from, to).map(DistanceResult::EuclideanDistance) } } } @@ -235,7 +235,13 @@ impl PartialOrd for DistanceResult { impl PartialOrd for DistanceResult { fn partial_cmp(&self, other: &Self) -> Option { - match (self, other) { + self.cmp(other).into() + } +} + +impl Ord for DistanceResult { + fn cmp(&self, other: &Self) -> Ordering { + let result = match (self, other) { (DistanceResult::CosineDistance(a), DistanceResult::CosineDistance(b)) => { b.partial_cmp(a) } @@ -249,13 +255,8 @@ impl PartialOrd for DistanceResult { (DistanceResult::Empty, _) => Some(Ordering::Greater), (_, DistanceResult::Empty) => Some(Ordering::Less), _ => unreachable!(), - } - } -} - -impl Ord for DistanceResult { - fn cmp(&self, other: &Self) -> Ordering { - self.partial_cmp(other).unwrap() + }; + result.unwrap() } } @@ -306,9 +307,9 @@ impl Default for DistanceResult { impl Display for DistanceResult { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { - DistanceResult::CosineDistance(a) => write!(f, "CosineDistance({})", a), - DistanceResult::CosineSimilarity(a) => write!(f, "CosineSimilarity({})", a), - DistanceResult::EuclideanDistance(a) => write!(f, "EuclideanDistance({})", a), + DistanceResult::CosineDistance(a) => write!(f, "CosineDistance({a})"), + DistanceResult::CosineSimilarity(a) => write!(f, "CosineSimilarity({a})"), + DistanceResult::EuclideanDistance(a) => write!(f, "EuclideanDistance({a})"), DistanceResult::Empty => write!(f, "Empty"), } } From bcff751109180b1f25068ae08d7096a7bbe41c75 Mon Sep 17 00:00:00 2001 From: xav-db Date: Wed, 17 Sep 2025 14:05:36 -0700 Subject: [PATCH 9/9] updating tool imports --- helix-db/src/helix_gateway/mcp/tools.rs | 40 ++++++++++--------------- 1 file changed, 15 insertions(+), 25 deletions(-) diff --git a/helix-db/src/helix_gateway/mcp/tools.rs b/helix-db/src/helix_gateway/mcp/tools.rs index 425bc13e..36319200 100644 --- a/helix-db/src/helix_gateway/mcp/tools.rs +++ b/helix-db/src/helix_gateway/mcp/tools.rs @@ -1,35 +1,25 @@ use crate::{ debug_println, helix_engine::{ - helix_gateway::embedding_providers::embedding_providers::{ - EmbeddingModel, get_embedding_model, - }, + bm25::bm25::{BM25, BM25Flatten, HBM25Config}, storage_core::HelixGraphStorage, traversal_core::{ - bm25::bm25::{BM25, BM25Flatten, HBM25Config}, - storage_core::HelixGraphStorage, - traversal_core::{ - ops::{ - bm25::search_bm25::SearchBM25Adapter, - g::G, - in_::{ - in_::{InAdapter, InNodesIterator}, - in_e::{InEdgesAdapter, InEdgesIterator}, - }, - out::{ - out::{OutAdapter, OutNodesIterator}, - out_e::{OutEdgesAdapter, OutEdgesIterator}, - }, - source::{add_e::EdgeType, e_from_type::EFromType, n_from_type::NFromType}, - util::order::OrderByAdapter, - vectors::{ - brute_force_search::BruteForceSearchVAdapter, search::SearchVAdapter, - }, + ops::{ + bm25::search_bm25::SearchBM25Adapter, + g::G, + in_::{ + in_::{InAdapter, InNodesIterator}, + in_e::{InEdgesAdapter, InEdgesIterator}, + }, + out::{ + out::{OutAdapter, OutNodesIterator}, + out_e::{OutEdgesAdapter, OutEdgesIterator}, }, - traversal_value::{Traversable, TraversalValue}, + source::{add_e::EdgeType, e_from_type::EFromType, n_from_type::NFromType}, + util::order::OrderByAdapter, + vectors::{brute_force_search::BruteForceSearchVAdapter, search::SearchVAdapter}, }, - types::GraphError, - vector_core::vector::HVector, + traversal_value::{Traversable, TraversalValue}, }, types::GraphError, vector_core::vector::HVector,