From 6f58f38258f239d24f1093d862d140114b543264 Mon Sep 17 00:00:00 2001 From: nipeone Date: Sat, 29 Mar 2025 21:09:53 +0800 Subject: [PATCH 1/3] add Feature of AI-powered search --- .code-samples.meilisearch.yaml | 5 + .../Converters/EmbedderSourceConverter.cs | 72 +++++++++ src/Meilisearch/Embedder.cs | 145 ++++++++++++++++++ .../Extensions/ObjectExtensions.cs | 4 + src/Meilisearch/Index.Documents.cs | 27 ++++ src/Meilisearch/Index.Embedders.cs | 50 ++++++ .../QueryParameters/DocumentsQuery.cs | 6 + .../QueryParameters/SimilarDocumentsQuery.cs | 71 +++++++++ src/Meilisearch/SearchQuery.cs | 45 ++++++ src/Meilisearch/Settings.cs | 6 + src/Meilisearch/SimilarDocumentsResult.cs | 65 ++++++++ tests/Meilisearch.Tests/Datasets.cs | 1 + .../Datasets/movies_with_vector.json | 32 ++++ tests/Meilisearch.Tests/DocumentTests.cs | 48 ++++++ .../Meilisearch.Tests.csproj | 1 + tests/Meilisearch.Tests/Movie.cs | 16 ++ tests/Meilisearch.Tests/SettingsTests.cs | 6 +- 17 files changed, 598 insertions(+), 2 deletions(-) create mode 100644 src/Meilisearch/Converters/EmbedderSourceConverter.cs create mode 100644 src/Meilisearch/Embedder.cs create mode 100644 src/Meilisearch/Index.Embedders.cs create mode 100644 src/Meilisearch/QueryParameters/SimilarDocumentsQuery.cs create mode 100644 src/Meilisearch/SimilarDocumentsResult.cs create mode 100644 tests/Meilisearch.Tests/Datasets/movies_with_vector.json diff --git a/.code-samples.meilisearch.yaml b/.code-samples.meilisearch.yaml index 454efca6..5447bfbf 100644 --- a/.code-samples.meilisearch.yaml +++ b/.code-samples.meilisearch.yaml @@ -843,3 +843,8 @@ facet_search_3: |- FacetQuery = "c" }; await client.Index("books").FacetSearchAsync("genres", query); +get_similar_documents_post_1: |- + await client.Index("movies").GetSimilarDocumentsAsync(new SimilarDocumentsQuery() { + Id = 143, + Embedder = "manual" + }); diff --git a/src/Meilisearch/Converters/EmbedderSourceConverter.cs b/src/Meilisearch/Converters/EmbedderSourceConverter.cs new file mode 100644 index 00000000..197debf4 --- /dev/null +++ b/src/Meilisearch/Converters/EmbedderSourceConverter.cs @@ -0,0 +1,72 @@ +using System.Text.Json; +using System; +using System.Text.Json.Serialization; + +namespace Meilisearch.Converters +{ + /// + /// + /// + public class EmbedderSourceConverter: JsonConverter + { + /// + /// + /// + /// + /// + /// + /// + public override EmbedderSource Read(ref Utf8JsonReader reader, Type typeToConvert, JsonSerializerOptions options) + { + var value = reader.GetString(); + switch (value) + { + case "openAi": + return EmbedderSource.OpenAi; + case "huggingFace": + return EmbedderSource.HuggingFace; + case "ollama": + return EmbedderSource.Ollama; + case "rest": + return EmbedderSource.Rest; + case "userProvided": + return EmbedderSource.UserProvided; + default: + return EmbedderSource.Empty; + } + } + + /// + /// + /// + /// + /// + /// + public override void Write(Utf8JsonWriter writer, EmbedderSource value, JsonSerializerOptions options) + { + string stringValue; + switch (value) + { + case EmbedderSource.OpenAi: + stringValue = "openAi"; + break; + case EmbedderSource.HuggingFace: + stringValue = "huggingFace"; + break; + case EmbedderSource.Ollama: + stringValue = "ollama"; + break; + case EmbedderSource.Rest: + stringValue = "rest"; + break; + case EmbedderSource.UserProvided: + stringValue = "userProvided"; + break; + default: + stringValue = string.Empty; + break; + } + writer.WriteStringValue(stringValue); + } + } +} diff --git a/src/Meilisearch/Embedder.cs b/src/Meilisearch/Embedder.cs new file mode 100644 index 00000000..b104b910 --- /dev/null +++ b/src/Meilisearch/Embedder.cs @@ -0,0 +1,145 @@ +using System.Collections.Generic; +using System.Text.Json.Serialization; + +using Meilisearch.Converters; + +namespace Meilisearch +{ + /// + /// Configure at least one embedder to use AI-powered search. + /// + public class Embedder + { + /// + /// Use source to configure an embedder's source. + /// This field is mandatory. + /// + [JsonPropertyName("source")] + public EmbedderSource Source { get; set; } + + /// + /// Meilisearch queries url to generate vector embeddings for queries and documents. + /// + [JsonPropertyName("url")] + public string Url { get; set; } + + /// + /// Authentication token Meilisearch should send with each request to the embedder. + /// + [JsonPropertyName("apiKey")] + public string ApiKey { get; set; } + + /// + /// The model your embedder uses when generating vectors. + /// + [JsonPropertyName("model")] + public string Model { get; set; } + + /// + /// documentTemplate is a string containing a Liquid template. + /// + [JsonPropertyName("documentTemplate")] + public string DocumentTemplate { get; set; } + + /// + /// The maximum size of a rendered document template. Longer texts are truncated to fit the configured limit. + /// + [JsonPropertyName("documentTemplateMaxBytes")] + public int? DocumentTemplateMaxBytes { get; set; } + + /// + /// Number of dimensions in the chosen model. If not supplied, Meilisearch tries to infer this value. + /// + [JsonPropertyName("dimensions")] + public int? Dimensions { get; set; } + + /// + /// Use this field to use a specific revision of a model. + /// + [JsonPropertyName("revision")] + public string Revision { get; set; } + + /// + /// Use distribution when configuring an embedder to correct the returned + /// _rankingScores of the semantic hits with an affine transformation + /// + [JsonPropertyName("distribution")] + public Distribution Distribution { get; set; } + + ///// + ///// request must be a JSON object with the same structure + ///// and data of the request you must send to your rest embedder. + ///// + //[JsonPropertyName("request")] + //public object Request { get; set; } + + ///// + ///// response must be a JSON object with the same structure + ///// and data of the response you expect to receive from your rest embedder. + ///// + //[JsonPropertyName("response")] + //public object Response { get; set; } + + /// + /// When set to true, compresses vectors by representing each dimension with 1-bit values. + /// + [JsonPropertyName("binaryQuantized")] + public bool? BinaryQuantized { get; set; } + } + + /// + /// Configuring distribution requires a certain amount of trial and error, + /// in which you must perform semantic searches and monitor the results. + /// Based on their rankingScores and relevancy, add the observed mean and sigma values for that index. + /// + public class Distribution + { + /// + /// a number between 0 and 1 indicating the semantic score of "somewhat relevant" + /// hits before using the distribution setting. + /// + [JsonPropertyName("mean")] + public float? Mean { get; set; } + + /// + /// a number between 0 and 1 indicating the average absolute difference in + /// _rankingScores between "very relevant" hits and "somewhat relevant" hits, + /// and "somewhat relevant" hits and "irrelevant hits". + /// + [JsonPropertyName("sigma")] + public float? Sigma { get; set; } + } + + /// + /// + /// + [JsonConverter(typeof(EmbedderSourceConverter))] + public enum EmbedderSource + { + /// + /// empty source + /// + Empty, + /// + /// openAi source + /// + OpenAi, + /// + /// guggingFace source + /// + HuggingFace, + /// + /// ollama source + /// + Ollama, + /// + /// use rest to auto-generate embeddings with any embedder offering a REST API. + /// + Rest, + /// + /// You may also configure a userProvided embedder. + /// In this case, you must manually include vector data in your documents' _vectors field. + /// + UserProvided + } +} diff --git a/src/Meilisearch/Extensions/ObjectExtensions.cs b/src/Meilisearch/Extensions/ObjectExtensions.cs index 1154f15a..74809c96 100644 --- a/src/Meilisearch/Extensions/ObjectExtensions.cs +++ b/src/Meilisearch/Extensions/ObjectExtensions.cs @@ -62,6 +62,10 @@ internal static string ToQueryString(this object source, BindingFlags bindingAtt { values.Add(key + "=" + Uri.EscapeDataString(datetimeValue.ToString("yyyy-MM-dd'T'HH:mm:ss.fffzzz"))); } + else if(value is Boolean boolValue) + { + values.Add(key + "=" + (boolValue ? "true" : "false")); + } else { values.Add(key + "=" + Uri.EscapeDataString(value.ToString())); diff --git a/src/Meilisearch/Index.Documents.cs b/src/Meilisearch/Index.Documents.cs index e21c0097..d05ca962 100644 --- a/src/Meilisearch/Index.Documents.cs +++ b/src/Meilisearch/Index.Documents.cs @@ -1,3 +1,4 @@ +using System.Collections; using System.Collections.Generic; using System.Linq; using System.Net.Http; @@ -413,6 +414,32 @@ public async Task>> GetDocumentsAsync(Document } } + /// + /// Get similar documents with the allowed Query Parameters. + /// + /// + /// + /// + /// + public async Task> GetSimilarDocumentsAsync(SimilarDocumentsQuery query = default, + CancellationToken cancellationToken = default) + { + try{ + var uri = $"indexes/{Uid}/similar"; + var result = await _http.PostAsJsonAsync(uri, query, Constants.JsonSerializerOptionsRemoveNulls, + cancellationToken: cancellationToken) + .ConfigureAwait(false); + return await result.Content + .ReadFromJsonAsync>(cancellationToken: cancellationToken) + .ConfigureAwait(false); + } + catch (MeilisearchCommunicationError e) + { + throw new MeilisearchCommunicationError( + Constants.VersionErrorHintMessage(e.Message, nameof(GetDocumentsAsync)), e); + } + } + /// /// Delete one document. /// diff --git a/src/Meilisearch/Index.Embedders.cs b/src/Meilisearch/Index.Embedders.cs new file mode 100644 index 00000000..a23ef371 --- /dev/null +++ b/src/Meilisearch/Index.Embedders.cs @@ -0,0 +1,50 @@ +using System.Collections.Generic; +using System.Net.Http.Json; +using System.Threading; +using System.Threading.Tasks; + +using Meilisearch.Extensions; +namespace Meilisearch +{ + public partial class Index + { + /// + /// Gets the embedders setting. + /// + /// The cancellation token for this call. + /// Returns the embedders setting. + public async Task> GetEmbeddersAsync(CancellationToken cancellationToken = default) + { + return await _http.GetFromJsonAsync>($"indexes/{Uid}/settings/embedders", cancellationToken: cancellationToken) + .ConfigureAwait(false); + } + + /// + /// Updates the embedders setting. + /// + /// Collection of embedders + /// The cancellation token for this call. + /// Returns the task info of the asynchronous task. + public async Task UpdateEmbeddersAsync(Dictionary embedders, CancellationToken cancellationToken = default) + { + var responseMessage = + await _http.PatchAsJsonAsync($"indexes/{Uid}/settings/embedders", embedders, Constants.JsonSerializerOptionsRemoveNulls, cancellationToken: cancellationToken) + .ConfigureAwait(false); + return await responseMessage.Content.ReadFromJsonAsync(cancellationToken: cancellationToken) + .ConfigureAwait(false); + } + + /// + /// Resets the embedders setting. + /// + /// The cancellation token for this call. + /// Returns the task info of the asynchronous task. + public async Task ResetEmbeddersAsync(CancellationToken cancellationToken = default) + { + var response = await _http.DeleteAsync($"indexes/{Uid}/settings/embedders", cancellationToken) + .ConfigureAwait(false); + + return await response.Content.ReadFromJsonAsync(cancellationToken: cancellationToken).ConfigureAwait(false); + } + } +} diff --git a/src/Meilisearch/QueryParameters/DocumentsQuery.cs b/src/Meilisearch/QueryParameters/DocumentsQuery.cs index d5e2a191..d188595a 100644 --- a/src/Meilisearch/QueryParameters/DocumentsQuery.cs +++ b/src/Meilisearch/QueryParameters/DocumentsQuery.cs @@ -31,5 +31,11 @@ public class DocumentsQuery /// [JsonPropertyName("filter")] public object Filter { get; set; } + + /// + /// Return document vector data with search result + /// + [JsonPropertyName("retrieveVectors")] + public bool? RetrieveVectors { get; set; } } } diff --git a/src/Meilisearch/QueryParameters/SimilarDocumentsQuery.cs b/src/Meilisearch/QueryParameters/SimilarDocumentsQuery.cs new file mode 100644 index 00000000..e15df8ce --- /dev/null +++ b/src/Meilisearch/QueryParameters/SimilarDocumentsQuery.cs @@ -0,0 +1,71 @@ +using System.Collections.Generic; +using System.Text.Json.Serialization; + +using Meilisearch; + + +/// +/// A class that handles the creation of a query string for similar Documents. +/// +public class SimilarDocumentsQuery +{ + /// + /// Identifier of the target document + /// + [JsonPropertyName("id")] + public object Id { get; set; } + + /// + /// Embedder name to use when computing recommendations + /// + [JsonPropertyName("embedder")] + public string Embedder { get; set; } + + /// + /// Attributes to display in the returned documents + /// + [JsonPropertyName("attributesToRetrieve")] + public IEnumerable AttributesToRetrieve { get; set; } + + /// + /// Gets or sets the offset. + /// + [JsonPropertyName("offset")] + public int? Offset { get; set; } + + /// + /// Gets or sets the limit. + /// + [JsonPropertyName("limit")] + public int? Limit { get; set; } + + /// + /// Filter queries by an attribute's value + /// + [JsonPropertyName("filter")] + public string Filter { get; set; } + + /// + /// Display the global ranking score of a document + /// + [JsonPropertyName("showRankingScore")] + public bool? ShowRankingScore { get; set; } + + /// + /// Display detailed ranking score information + /// + [JsonPropertyName("showRankingScoreDetails")] + public bool? ShowRankingScoreDetails { get; set; } + + /// + /// Exclude results with low ranking scores + /// + [JsonPropertyName("rankingScoreThreshold")] + public float? RankingScoreThreshold { get; set; } + + /// + /// Return document vector data + /// + [JsonPropertyName("retrieveVectors")] + public bool? RetrieveVectors { get; set; } +} diff --git a/src/Meilisearch/SearchQuery.cs b/src/Meilisearch/SearchQuery.cs index fcfcbf19..d22a2984 100644 --- a/src/Meilisearch/SearchQuery.cs +++ b/src/Meilisearch/SearchQuery.cs @@ -148,5 +148,50 @@ public class SearchQuery /// [JsonPropertyName("rankingScoreThreshold")] public decimal? RankingScoreThreshold { get; set; } + + /// + /// Return results based on query keywords and meaning + /// + [JsonPropertyName("hybrid")] + public Hybrid Hybrid { get; set; } + + /// + /// Search using a custom query vector + /// + [JsonPropertyName("vector")] + public IEnumerable Vector { get; set; } + + /// + /// Return document vector data + /// + [JsonPropertyName("retrieveVectors")] + public bool? RetrieveVectors { get; set; } + + /// + /// Explicitly specify languages used in a query + /// + [JsonPropertyName("locales")] + public IEnumerable Locales { get; set; } + } + + /// + /// Configures Meilisearch to return search results based on a query's meaning and context. + /// + public class Hybrid + { + /// + /// embedder must be a string indicating an embedder configured with the /settings endpoint. + /// + [JsonPropertyName("embedder")] + public string Embedder { get; set; } + + /// + /// semanticRatio must be a number between 0.0 and 1.0 indicating + /// the proportion between keyword and semantic search results. + /// 0.0 causes Meilisearch to only return keyword results. + /// 1.0 causes Meilisearch to only return meaning-based results. Defaults to 0.5. + /// + [JsonPropertyName("semanticRatio")] + public float? SemanticRatio { get; set; } } } diff --git a/src/Meilisearch/Settings.cs b/src/Meilisearch/Settings.cs index a926cc88..526af69e 100644 --- a/src/Meilisearch/Settings.cs +++ b/src/Meilisearch/Settings.cs @@ -103,5 +103,11 @@ public class Settings /// [JsonPropertyName("searchCutoffMs")] public int? SearchCutoffMs { get; set; } + + /// + /// Embedders translate documents and queries into vector embeddings. + /// + [JsonPropertyName("embedders")] + public IDictionary Embedders { get; set; } } } diff --git a/src/Meilisearch/SimilarDocumentsResult.cs b/src/Meilisearch/SimilarDocumentsResult.cs new file mode 100644 index 00000000..de1c5420 --- /dev/null +++ b/src/Meilisearch/SimilarDocumentsResult.cs @@ -0,0 +1,65 @@ + +using System.Collections.Generic; +using System.Text.Json.Serialization; + +namespace Meilisearch +{ + public class SimilarDocumentsResult + { + /// + /// + /// + /// + /// + /// + /// + /// + /// + /// + public SimilarDocumentsResult( + IReadOnlyCollection hits, string id, int offset, + int limit, int estimatedTotalHits,int processingTimeMs, string indexUid) + { + Id = id; + Hits = hits; + Offset = offset; + Limit = limit; + EstimatedTotalHits = estimatedTotalHits; + ProcessingTimeMs = processingTimeMs; + IndexUid = indexUid; + } + + [JsonPropertyName("id")] + public string Id { get; } + + /// + [JsonPropertyName("hits")] + public IReadOnlyCollection Hits { get; } + + /// + /// Number of documents skipped. + /// + [JsonPropertyName("offset")] + public int Offset { get; } + + /// + /// Number of documents to take. + /// + [JsonPropertyName("limit")] + public int Limit { get; } + + /// + /// Gets the estimated total number of hits returned by the search. + /// + [JsonPropertyName("estimatedTotalHits")] + public int EstimatedTotalHits { get; } + + /// + [JsonPropertyName("processingTimeMs")] + public int ProcessingTimeMs { get; } + + /// + [JsonPropertyName("indexUid")] + public string IndexUid { get; } + } +} diff --git a/tests/Meilisearch.Tests/Datasets.cs b/tests/Meilisearch.Tests/Datasets.cs index 610454a2..6cf2899c 100644 --- a/tests/Meilisearch.Tests/Datasets.cs +++ b/tests/Meilisearch.Tests/Datasets.cs @@ -17,6 +17,7 @@ internal static class Datasets public static readonly string MoviesForFacetingJsonPath = Path.Combine(BasePath, "movies_for_faceting.json"); public static readonly string MoviesWithIntIdJsonPath = Path.Combine(BasePath, "movies_with_int_id.json"); public static readonly string MoviesWithInfoJsonPath = Path.Combine(BasePath, "movies_with_info.json"); + public static readonly string MoviesWithVectorJsonPath = Path.Combine(BasePath, "movies_with_vector.json"); public static readonly string ProductsForDistinctJsonPath = Path.Combine(BasePath, "products_for_distinct_search.json"); } diff --git a/tests/Meilisearch.Tests/Datasets/movies_with_vector.json b/tests/Meilisearch.Tests/Datasets/movies_with_vector.json new file mode 100644 index 00000000..aae43f09 --- /dev/null +++ b/tests/Meilisearch.Tests/Datasets/movies_with_vector.json @@ -0,0 +1,32 @@ +[ + { + "title": "Shazam!", + "release_year": 2019, + "id": "287947", + "_vectors": { "manual": [0.8, 0.4, -0.5]} + }, + { + "title": "Captain Marvel", + "release_year": 2019, + "id": "299537", + "_vectors": { "manual": [0.6, 0.8, -0.2] } + }, + { + "title": "Escape Room", + "release_year": 2019, + "id": "522681", + "_vectors": { "manual": [0.1, 0.6, 0.8] } + }, + { + "title": "How to Train Your Dragon: The Hidden World", + "release_year": 2019, + "id": "166428", + "_vectors": { "manual": [0.7, 0.7, -0.4] } + }, + { + "title": "All Quiet on the Western Front", + "release_year": 1930, + "id": "143", + "_vectors": { "manual": [-0.5, 0.3, 0.85] } + } +] \ No newline at end of file diff --git a/tests/Meilisearch.Tests/DocumentTests.cs b/tests/Meilisearch.Tests/DocumentTests.cs index e6f00076..67f17ff2 100644 --- a/tests/Meilisearch.Tests/DocumentTests.cs +++ b/tests/Meilisearch.Tests/DocumentTests.cs @@ -1,9 +1,13 @@ +using System; using System.Collections.Generic; using System.IO; using System.Linq; +using System.Text.Json; +using System.Text.Json.Serialization; using System.Threading.Tasks; using FluentAssertions; +using FluentAssertions.Execution; using Meilisearch.QueryParameters; @@ -758,5 +762,49 @@ public async Task DeleteAllExistingDocuments() var docs = await index.GetDocumentsAsync(); docs.Results.Should().BeEmpty(); } + + [Fact] + public async Task GetSimilarDocumentsAsync() + { + var index = await _fixture.SetUpEmptyIndex("GetSimilarDocumentsAsyncTest"); + + var embedders = new Dictionary + { + { "manual", new Embedder() { Source = EmbedderSource.UserProvided, Dimensions = 3 }}, + }; + + var settings = await index.GetSettingsAsync(); + settings.FilterableAttributes = new string[] { "release_year" }; + settings.Embedders = embedders; + var task = await index.UpdateSettingsAsync(settings); + + await index.WaitForTaskAsync(task.TaskUid); + settings = await index.GetSettingsAsync(); + + // Add documents + var movies = await JsonFileReader.ReadAsync>(Datasets.MoviesWithVectorJsonPath); + task = await index.AddDocumentsAsync(movies); + + string json = JsonSerializer.Serialize(movies); + + // Check the documents have been added + task.TaskUid.Should().BeGreaterOrEqualTo(0); + await index.WaitForTaskAsync(task.TaskUid); + + var xx = (await index.GetDocumentsAsync(new DocumentsQuery() { RetrieveVectors = true })).Results.ToList(); + + // Get similar documents + var docs = (await index.GetSimilarDocumentsAsync( + new SimilarDocumentsQuery() { + Id = "143", + Embedder = "manual" + })).Hits.ToList(); + + Assert.Equal(4, docs.Count); + Assert.Equal("Escape Room", docs[0].Title); + Assert.Equal("Captain Marvel", docs[1].Title); + Assert.Equal("How to Train Your Dragon: The Hidden World", docs[2].Title); + Assert.Equal("Shazam!", docs[3].Title); + } } } diff --git a/tests/Meilisearch.Tests/Meilisearch.Tests.csproj b/tests/Meilisearch.Tests/Meilisearch.Tests.csproj index 3a38b4cd..a4c96c29 100644 --- a/tests/Meilisearch.Tests/Meilisearch.Tests.csproj +++ b/tests/Meilisearch.Tests/Meilisearch.Tests.csproj @@ -33,6 +33,7 @@ + diff --git a/tests/Meilisearch.Tests/Movie.cs b/tests/Meilisearch.Tests/Movie.cs index dc20ffed..924410aa 100644 --- a/tests/Meilisearch.Tests/Movie.cs +++ b/tests/Meilisearch.Tests/Movie.cs @@ -1,5 +1,6 @@ using System.Collections.Generic; using System.Text.Json; +using System.Text.Json.Serialization; namespace Meilisearch.Tests { @@ -73,4 +74,19 @@ public class MovieWithRankingScoreDetails [System.Diagnostics.CodeAnalysis.SuppressMessage("Style", "IDE1006:Naming Styles", Justification = "Naming convention used to match meilisearch.")] public IDictionary _RankingScoreDetails { get; set; } } + + public class MovieWithVector + { + [JsonPropertyName("id")] + public string Id { get; set; } + + [JsonPropertyName("title")] + public string Title { get; set; } + + [JsonPropertyName("release_year")] + public int? ReleaseYear { get; set; } + + [JsonPropertyName("_vectors")] + public IDictionary> _Vectors { get; set; } + } } diff --git a/tests/Meilisearch.Tests/SettingsTests.cs b/tests/Meilisearch.Tests/SettingsTests.cs index 395bc54a..c41c1082 100644 --- a/tests/Meilisearch.Tests/SettingsTests.cs +++ b/tests/Meilisearch.Tests/SettingsTests.cs @@ -63,7 +63,8 @@ public SettingsTests(TFixture fixture) Pagination = new Pagination { MaxTotalHits = 1000 - } + }, + Embedders = new Dictionary { } }; } @@ -682,7 +683,8 @@ private static Settings SettingsWithDefaultedNullFields(Settings inputSettings, Pagination = inputSettings.Pagination ?? defaultSettings.Pagination, ProximityPrecision = inputSettings.ProximityPrecision ?? defaultSettings.ProximityPrecision, Dictionary = inputSettings.Dictionary ?? defaultSettings.Dictionary, - SearchCutoffMs = inputSettings.SearchCutoffMs ?? defaultSettings.SearchCutoffMs + SearchCutoffMs = inputSettings.SearchCutoffMs ?? defaultSettings.SearchCutoffMs, + Embedders = inputSettings.Embedders ?? defaultSettings.Embedders, }; } From dd12e31c0e57b1d637b3e0c24d60a0e6afaed886 Mon Sep 17 00:00:00 2001 From: nipeone Date: Mon, 31 Mar 2025 09:20:01 +0800 Subject: [PATCH 2/3] Make the types of embedders uniform --- src/Meilisearch/Settings.cs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/Meilisearch/Settings.cs b/src/Meilisearch/Settings.cs index 526af69e..b6de7164 100644 --- a/src/Meilisearch/Settings.cs +++ b/src/Meilisearch/Settings.cs @@ -108,6 +108,6 @@ public class Settings /// Embedders translate documents and queries into vector embeddings. /// [JsonPropertyName("embedders")] - public IDictionary Embedders { get; set; } + public Dictionary Embedders { get; set; } } } From c108ed178c886a5766597dd69c243436aa1e9daa Mon Sep 17 00:00:00 2001 From: nipeone Date: Mon, 31 Mar 2025 09:21:24 +0800 Subject: [PATCH 3/3] add embedder setting tests, customSearch test --- tests/Meilisearch.Tests/DocumentTests.cs | 27 +------------ tests/Meilisearch.Tests/IndexFixture.cs | 31 ++++++++++++++ tests/Meilisearch.Tests/SearchTests.cs | 21 ++++++++++ tests/Meilisearch.Tests/SettingsTests.cs | 51 ++++++++++++++++++++++++ 4 files changed, 104 insertions(+), 26 deletions(-) diff --git a/tests/Meilisearch.Tests/DocumentTests.cs b/tests/Meilisearch.Tests/DocumentTests.cs index 67f17ff2..1d1b3afd 100644 --- a/tests/Meilisearch.Tests/DocumentTests.cs +++ b/tests/Meilisearch.Tests/DocumentTests.cs @@ -766,32 +766,7 @@ public async Task DeleteAllExistingDocuments() [Fact] public async Task GetSimilarDocumentsAsync() { - var index = await _fixture.SetUpEmptyIndex("GetSimilarDocumentsAsyncTest"); - - var embedders = new Dictionary - { - { "manual", new Embedder() { Source = EmbedderSource.UserProvided, Dimensions = 3 }}, - }; - - var settings = await index.GetSettingsAsync(); - settings.FilterableAttributes = new string[] { "release_year" }; - settings.Embedders = embedders; - var task = await index.UpdateSettingsAsync(settings); - - await index.WaitForTaskAsync(task.TaskUid); - settings = await index.GetSettingsAsync(); - - // Add documents - var movies = await JsonFileReader.ReadAsync>(Datasets.MoviesWithVectorJsonPath); - task = await index.AddDocumentsAsync(movies); - - string json = JsonSerializer.Serialize(movies); - - // Check the documents have been added - task.TaskUid.Should().BeGreaterOrEqualTo(0); - await index.WaitForTaskAsync(task.TaskUid); - - var xx = (await index.GetDocumentsAsync(new DocumentsQuery() { RetrieveVectors = true })).Results.ToList(); + var index = await _fixture.SetUpIndexForVectorSearch("GetSimilarDocumentsAsyncTest"); // Get similar documents var docs = (await index.GetSimilarDocumentsAsync( diff --git a/tests/Meilisearch.Tests/IndexFixture.cs b/tests/Meilisearch.Tests/IndexFixture.cs index 485a3d79..4c4d201a 100644 --- a/tests/Meilisearch.Tests/IndexFixture.cs +++ b/tests/Meilisearch.Tests/IndexFixture.cs @@ -109,6 +109,37 @@ public async Task SetUpIndexForFaceting(string indexUid) return index; } + public async Task SetUpIndexForVectorSearch(string indexUid) + { + var index = DefaultClient.Index(indexUid); + + var task = await index.UpdateEmbeddersAsync(new Dictionary + { + { "manual", new Embedder { Source = EmbedderSource.UserProvided, Dimensions = 3 } } + }); + + var finishedTask = await index.WaitForTaskAsync(task.TaskUid); + if (finishedTask.Status != TaskInfoStatus.Succeeded) + { + throw new Exception($"The documents were not added during SetUpIndexForVectorSearch.\n" + + $"Impossible to run the tests.\n" + + $"{JsonSerializer.Serialize(finishedTask.Error)}"); + } + + var movies = await JsonFileReader.ReadAsync>(Datasets.MoviesWithVectorJsonPath); + task = await index.AddDocumentsAsync(movies, primaryKey: "id"); + + finishedTask = await index.WaitForTaskAsync(task.TaskUid); + if (finishedTask.Status != TaskInfoStatus.Succeeded) + { + throw new Exception($"The documents were not added during SetUpIndexForVectorSearch.\n" + + $"Impossible to run the tests.\n" + + $"{JsonSerializer.Serialize(finishedTask.Error)}"); + } + + return index; + } + public async Task SetUpIndexForNestedSearch(string indexUid) { var index = DefaultClient.Index(indexUid); diff --git a/tests/Meilisearch.Tests/SearchTests.cs b/tests/Meilisearch.Tests/SearchTests.cs index 328b2143..5455a2af 100644 --- a/tests/Meilisearch.Tests/SearchTests.cs +++ b/tests/Meilisearch.Tests/SearchTests.cs @@ -12,6 +12,7 @@ public abstract class SearchTests : IAsyncLifetime where TFixture : In private Index _basicIndex; private Index _nestedIndex; private Index _indexForFaceting; + private Index _indexForVectorSearch; private Index _indexWithIntId; private Index _productIndexForDistinct; private Index _indexForRankingScoreThreshold; @@ -28,6 +29,7 @@ public async Task InitializeAsync() await _fixture.DeleteAllIndexes(); // Test context cleaned for each [Fact] _basicIndex = await _fixture.SetUpBasicIndex("BasicIndex-SearchTests"); _indexForFaceting = await _fixture.SetUpIndexForFaceting("IndexForFaceting-SearchTests"); + _indexForVectorSearch = await _fixture.SetUpIndexForVectorSearch("IndexForVector-SearchTests"); _indexWithIntId = await _fixture.SetUpBasicIndexWithIntId("IndexWithIntId-SearchTests"); _nestedIndex = await _fixture.SetUpIndexForNestedSearch("IndexForNestedDocs-SearchTests"); _productIndexForDistinct = await _fixture.SetUpIndexForDistinctProductsSearch("IndexForDistinctProducts-SearchTests"); @@ -554,5 +556,24 @@ public async Task CustomSearchWithRankingScoreThreshold() movies.Hits.First().Id.Should().Be("13"); movies.Hits.First().Name.Should().Be("Harry Potter"); } + + [Fact] + public async Task CustomSearchWithVector() + { + var searchQuery = new SearchQuery + { + Hybrid = new Hybrid + { + Embedder = "manual", + SemanticRatio = 1.0f + }, + Vector = new[] { 0.1f, 0.6f, 0.8f }, + }; + + var movies = await _indexForVectorSearch.SearchAsync(string.Empty, searchQuery); + + Assert.Equal("522681", movies.Hits.First().Id); + Assert.Equal("Escape Room", movies.Hits.First().Title); + } } } diff --git a/tests/Meilisearch.Tests/SettingsTests.cs b/tests/Meilisearch.Tests/SettingsTests.cs index c41c1082..0ab6a0c7 100644 --- a/tests/Meilisearch.Tests/SettingsTests.cs +++ b/tests/Meilisearch.Tests/SettingsTests.cs @@ -664,6 +664,57 @@ public async Task ResetSearchCutoffMsAsync() await AssertGetEquality(_index.GetSearchCutoffMsAsync, _defaultSettings.SearchCutoffMs); } + [Fact] + public async Task GetEmbeddersAsync() + { + await AssertGetEquality(_index.GetEmbeddersAsync, _defaultSettings.Embedders); + } + + [Fact] + public async Task UpdateEmbeddersAsync() + { + var newEmbedders = new Dictionary + { + { + "default", + new Embedder + { + Source = EmbedderSource.HuggingFace, + Model = "BAAI/bge-base-en-v1.5", + DocumentTemplate = "A movie titled '{{doc.name}}' with the following genre {{doc.genre}}", + DocumentTemplateMaxBytes = 400 + } + } + }; + + await AssertUpdateSuccess(_index.UpdateEmbeddersAsync, newEmbedders); + await AssertGetEquality(_index.GetEmbeddersAsync, newEmbedders); + } + + [Fact] + public async Task ResetEmbeddersAsync() + { + var newEmbedders = new Dictionary + { + { + "default", + new Embedder + { + Source = EmbedderSource.HuggingFace, + Model = "BAAI/bge-base-en-v1.5", + DocumentTemplate = "A movie titled '{{doc.name}}' with the following genre {{doc.genre}}", + DocumentTemplateMaxBytes = 400 + } + } + }; + + await AssertUpdateSuccess(_index.UpdateEmbeddersAsync, newEmbedders); + await AssertGetEquality(_index.GetEmbeddersAsync, newEmbedders); + + await AssertResetSuccess(_index.ResetEmbeddersAsync); + await AssertGetEquality(_index.GetEmbeddersAsync, _defaultSettings.Embedders); + } + private static Settings SettingsWithDefaultedNullFields(Settings inputSettings, Settings defaultSettings) { return new Settings