Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 17 additions & 2 deletions docs/integrations/embedding/voyageai.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ title: VoyageAI
sidebarTitle: VoyageAI
---

import { PyEmbeddingVoyageaiUsage } from '/snippets/integrations.mdx';
import { PyEmbeddingVoyageaiUsage, PyEmbeddingVoyageaiMultimodal } from '/snippets/integrations.mdx';

Voyage AI provides cutting-edge embedding and rerankers.

Expand All @@ -22,15 +22,20 @@ Supported models are:
- voyage-multilingual-2
- voyage-law-2
- voyage-code-2
- voyage-multimodal-3.5 (multimodal - supports text, images, and video)

<Info>
**Multimodal Model:** `voyage-multimodal-3.5` supports text, images, and video inputs. It outputs 1024-dimensional embeddings by default, configurable via the `output_dimension` parameter (256, 512, 1024, 2048). See the [VoyageAI multimodal embeddings documentation](https://docs.voyageai.com/docs/multimodal-embeddings) for more details.
</Info>

Supported parameters (to be passed in `create` method) are:

| Parameter | Type | Default Value | Description |
|---|---|--------|---------|
| `name` | `str` | `None` | The model ID of the model to use. Supported base models for Text Embeddings: voyage-3, voyage-3-lite, voyage-finance-2, voyage-multilingual-2, voyage-law-2, voyage-code-2 |
| `name` | `str` | `None` | The model ID of the model to use. Supported models: voyage-3, voyage-3-lite, voyage-3.5, voyage-3.5-lite, voyage-context-3, voyage-finance-2, voyage-multilingual-2, voyage-law-2, voyage-code-2, voyage-multimodal-3.5 |
| `input_type` | `str` | `None` | Type of the input text. Default to None. Other options: query, document. |
| `truncation` | `bool` | `True` | Whether to truncate the input texts to fit within the context length. |
| `output_dimension` | `int` | `None` | Output embedding dimension. Only supported by `voyage-multimodal-3.5`. Valid options: 256, 512, 1024 (default), 2048. |


Usage Example:
Expand All @@ -39,4 +44,14 @@ Usage Example:
<CodeBlock filename="Python" language="Python" icon="python">
{PyEmbeddingVoyageaiUsage}
</CodeBlock>
</CodeGroup>

### Multimodal Example

The `voyage-multimodal-3.5` model can embed text alongside images. You can use image URLs, file paths, or PIL Image objects:

<CodeGroup>
<CodeBlock filename="Python" language="Python" icon="python">
{PyEmbeddingVoyageaiMultimodal}
</CodeBlock>
</CodeGroup>
2 changes: 2 additions & 0 deletions docs/snippets/integrations.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ export const PyEmbeddingOpenclipTextSearch = "actual = table.search(\"man's best

export const PyEmbeddingSentenceTransformersBaai = "import tempfile\nfrom pathlib import Path\n\nimport lancedb\nfrom lancedb.embeddings import get_registry\nfrom lancedb.pydantic import LanceModel, Vector\n\ndb = lancedb.connect(str(Path(tempfile.mkdtemp()) / \"sentence-transformers\"))\nmodel = (\n get_registry()\n .get(\"sentence-transformers\")\n .create(name=\"BAAI/bge-small-en-v1.5\", device=\"cpu\")\n)\n\nclass Words(LanceModel):\n text: str = model.SourceField()\n vector: Vector(model.ndims()) = model.VectorField()\n\ntable = db.create_table(\"words\", schema=Words)\ntable.add(\n [\n {\"text\": \"hello world\"},\n {\"text\": \"goodbye world\"},\n ]\n)\n\nquery = \"greetings\"\nactual = table.search(query).limit(1).to_pydantic(Words)[0]\nprint(actual.text)\n";

export const PyEmbeddingVoyageaiMultimodal = "import tempfile\nfrom pathlib import Path\n\nimport lancedb\nfrom lancedb.embeddings import EmbeddingFunctionRegistry\nfrom lancedb.pydantic import LanceModel, Vector\n\n# Create multimodal embedding function with custom dimension\nvoyageai = (\n EmbeddingFunctionRegistry.get_instance()\n .get(\"voyageai\")\n .create(name=\"voyage-multimodal-3.5\", output_dimension=512)\n)\n\nclass ImageModel(LanceModel):\n image_uri: str = voyageai.SourceField()\n vector: Vector(voyageai.ndims()) = voyageai.VectorField()\n\ndb = lancedb.connect(str(Path(tempfile.mkdtemp()) / \"voyageai-multimodal\"))\ntbl = db.create_table(\"images\", schema=ImageModel, mode=\"overwrite\")\n\n# Add images using URLs\ntbl.add(\n [\n {\"image_uri\": \"https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/300px-PNG_transparency_demonstration_1.png\"},\n ]\n)\n\n# Search with text query\nresults = tbl.search(\"dice\").limit(1).to_list()\nprint(results)\n";

export const PyEmbeddingVoyageaiUsage = "import tempfile\nfrom pathlib import Path\n\nimport lancedb\nfrom lancedb.embeddings import EmbeddingFunctionRegistry\nfrom lancedb.pydantic import LanceModel, Vector\n\nvoyageai = (\n EmbeddingFunctionRegistry.get_instance().get(\"voyageai\").create(name=\"voyage-3\")\n)\n\nclass TextModel(LanceModel):\n text: str = voyageai.SourceField()\n vector: Vector(voyageai.ndims()) = voyageai.VectorField()\n\ndata = [{\"text\": \"hello world\"}, {\"text\": \"goodbye world\"}]\n\ndb = lancedb.connect(str(Path(tempfile.mkdtemp()) / \"voyageai-demo\"))\ntbl = db.create_table(\"test\", schema=TextModel, mode=\"overwrite\")\n\ntbl.add(data)\n";

export const PyFrameworksLangchainAddImages = "image_uris = [\"./assets/image-1.png\", \"./assets/image-2.png\"]\nvector_store.add_images(uris=image_uris)\n# here image_uris are local fs paths to the images.\n";
Expand Down
38 changes: 38 additions & 0 deletions tests/py/test_integrations.py
Original file line number Diff line number Diff line change
Expand Up @@ -548,6 +548,44 @@ class TextModel(LanceModel):
# --8<-- [end:embedding_voyageai_usage]


def test_embedding_voyageai_multimodal() -> None:
require_env("VOYAGE_API_KEY")

# --8<-- [start:embedding_voyageai_multimodal]
import tempfile
from pathlib import Path

import lancedb
from lancedb.embeddings import EmbeddingFunctionRegistry
from lancedb.pydantic import LanceModel, Vector

# Create multimodal embedding function with custom dimension
voyageai = (
EmbeddingFunctionRegistry.get_instance()
.get("voyageai")
.create(name="voyage-multimodal-3.5", output_dimension=512)
)

class ImageModel(LanceModel):
image_uri: str = voyageai.SourceField()
vector: Vector(voyageai.ndims()) = voyageai.VectorField()

db = lancedb.connect(str(Path(tempfile.mkdtemp()) / "voyageai-multimodal"))
tbl = db.create_table("images", schema=ImageModel, mode="overwrite")

# Add images using URLs
tbl.add(
[
{"image_uri": "https://upload.wikimedia.org/wikipedia/commons/thumb/4/47/PNG_transparency_demonstration_1.png/300px-PNG_transparency_demonstration_1.png"},
]
)

# Search with text query
results = tbl.search("dice").limit(1).to_list()
print(results)
# --8<-- [end:embedding_voyageai_multimodal]


# Reranking integrations


Expand Down