-
Notifications
You must be signed in to change notification settings - Fork 25.1k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Add an optimised int8 vector distance function for aarch64. (#106133)
This commit adds an optimised int8 vector distance implementation for aarch64. Additional platforms like, say, x64, will be added as a follow-up. The vector distance implementation outperforms Lucene's Pamana Vector implementation for binary comparisons by approx 5x (depending on the number of dimensions). It does so by means of compiler intrinsics built into a separate native library and link by Panama's FFI. Comparisons are performed on off-heap mmap'ed vector data. The implementation is currently only used during merging of scalar quantized segments, through a custom format ES814HnswScalarQuantizedVectorsFormat, but its usage will likely be expanded over time. Co-authored-by: Benjamin Trent <[email protected]> Co-authored-by: Lorenzo Dematté <[email protected]> Co-authored-by: Mark Vieira <[email protected]> Co-authored-by: Ryan Ernst <[email protected]>
- Loading branch information
1 parent
fb1bc58
commit 6b52d78
Showing
63 changed files
with
4,812 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
188 changes: 188 additions & 0 deletions
188
benchmarks/src/main/java/org/elasticsearch/benchmark/vector/VectorScorerBenchmark.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,188 @@ | ||
/* | ||
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
* or more contributor license agreements. Licensed under the Elastic License | ||
* 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
* in compliance with, at your election, the Elastic License 2.0 or the Server | ||
* Side Public License, v 1. | ||
*/ | ||
|
||
package org.elasticsearch.benchmark.vector; | ||
|
||
import org.apache.lucene.index.VectorSimilarityFunction; | ||
import org.apache.lucene.store.Directory; | ||
import org.apache.lucene.store.IOContext; | ||
import org.apache.lucene.store.IndexInput; | ||
import org.apache.lucene.store.IndexOutput; | ||
import org.apache.lucene.store.MMapDirectory; | ||
import org.apache.lucene.util.quantization.ScalarQuantizedVectorSimilarity; | ||
import org.elasticsearch.common.logging.LogConfigurator; | ||
import org.elasticsearch.core.IOUtils; | ||
import org.elasticsearch.vec.VectorScorer; | ||
import org.elasticsearch.vec.VectorScorerFactory; | ||
import org.openjdk.jmh.annotations.Benchmark; | ||
import org.openjdk.jmh.annotations.BenchmarkMode; | ||
import org.openjdk.jmh.annotations.Fork; | ||
import org.openjdk.jmh.annotations.Measurement; | ||
import org.openjdk.jmh.annotations.Mode; | ||
import org.openjdk.jmh.annotations.OutputTimeUnit; | ||
import org.openjdk.jmh.annotations.Param; | ||
import org.openjdk.jmh.annotations.Scope; | ||
import org.openjdk.jmh.annotations.Setup; | ||
import org.openjdk.jmh.annotations.State; | ||
import org.openjdk.jmh.annotations.TearDown; | ||
import org.openjdk.jmh.annotations.Warmup; | ||
|
||
import java.io.IOException; | ||
import java.nio.file.Files; | ||
import java.util.concurrent.ThreadLocalRandom; | ||
import java.util.concurrent.TimeUnit; | ||
|
||
import static org.elasticsearch.vec.VectorSimilarityType.DOT_PRODUCT; | ||
import static org.elasticsearch.vec.VectorSimilarityType.EUCLIDEAN; | ||
|
||
@Fork(value = 1, jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" }) | ||
@Warmup(iterations = 3, time = 3) | ||
@Measurement(iterations = 5, time = 3) | ||
@BenchmarkMode(Mode.Throughput) | ||
@OutputTimeUnit(TimeUnit.MICROSECONDS) | ||
@State(Scope.Thread) | ||
/** | ||
* Benchmark that compares various scalar quantized vector similarity function | ||
* implementations;: scalar, lucene's panama-ized, and Elasticsearch's native. | ||
* Run with ./gradlew -p benchmarks run --args 'VectorScorerBenchmark' | ||
*/ | ||
public class VectorScorerBenchmark { | ||
|
||
static { | ||
LogConfigurator.configureESLogging(); // native access requires logging to be initialized | ||
} | ||
|
||
@Param({ "96", "768", "1024" }) | ||
int dims; | ||
int size = 2; // there are only two vectors to compare | ||
|
||
Directory dir; | ||
IndexInput in; | ||
VectorScorerFactory factory; | ||
|
||
byte[] vec1; | ||
byte[] vec2; | ||
float vec1Offset; | ||
float vec2Offset; | ||
float scoreCorrectionConstant; | ||
|
||
ScalarQuantizedVectorSimilarity luceneDotScorer; | ||
ScalarQuantizedVectorSimilarity luceneSqrScorer; | ||
VectorScorer nativeDotScorer; | ||
VectorScorer nativeSqrScorer; | ||
|
||
@Setup | ||
public void setup() throws IOException { | ||
var optionalVectorScorerFactory = VectorScorerFactory.instance(); | ||
if (optionalVectorScorerFactory.isEmpty()) { | ||
String msg = "JDK=[" | ||
+ Runtime.version() | ||
+ "], os.name=[" | ||
+ System.getProperty("os.name") | ||
+ "], os.arch=[" | ||
+ System.getProperty("os.arch") | ||
+ "]"; | ||
throw new AssertionError("Vector scorer factory not present. Cannot run the benchmark. " + msg); | ||
} | ||
factory = optionalVectorScorerFactory.get(); | ||
scoreCorrectionConstant = 1f; | ||
vec1 = new byte[dims]; | ||
vec2 = new byte[dims]; | ||
|
||
ThreadLocalRandom.current().nextBytes(vec1); | ||
ThreadLocalRandom.current().nextBytes(vec2); | ||
vec1Offset = ThreadLocalRandom.current().nextFloat(); | ||
vec2Offset = ThreadLocalRandom.current().nextFloat(); | ||
|
||
dir = new MMapDirectory(Files.createTempDirectory("nativeScalarQuantBench")); | ||
try (IndexOutput out = dir.createOutput("vector.data", IOContext.DEFAULT)) { | ||
out.writeBytes(vec1, 0, vec1.length); | ||
out.writeInt(Float.floatToIntBits(vec1Offset)); | ||
out.writeBytes(vec2, 0, vec2.length); | ||
out.writeInt(Float.floatToIntBits(vec2Offset)); | ||
} | ||
in = dir.openInput("vector.data", IOContext.DEFAULT); | ||
|
||
luceneDotScorer = ScalarQuantizedVectorSimilarity.fromVectorSimilarity( | ||
VectorSimilarityFunction.DOT_PRODUCT, | ||
scoreCorrectionConstant | ||
); | ||
luceneSqrScorer = ScalarQuantizedVectorSimilarity.fromVectorSimilarity(VectorSimilarityFunction.EUCLIDEAN, scoreCorrectionConstant); | ||
nativeDotScorer = factory.getScalarQuantizedVectorScorer(dims, size, scoreCorrectionConstant, DOT_PRODUCT, in).get(); | ||
nativeSqrScorer = factory.getScalarQuantizedVectorScorer(dims, size, scoreCorrectionConstant, EUCLIDEAN, in).get(); | ||
|
||
// sanity | ||
var f1 = dotProductLucene(); | ||
var f2 = dotProductNative(); | ||
var f3 = dotProductScalar(); | ||
if (f1 != f2) { | ||
throw new AssertionError("lucene[" + f1 + "] != " + "native[" + f2 + "]"); | ||
} | ||
if (f1 != f3) { | ||
throw new AssertionError("lucene[" + f1 + "] != " + "scalar[" + f3 + "]"); | ||
} | ||
// square distance | ||
f1 = squareDistanceLucene(); | ||
f2 = squareDistanceNative(); | ||
f3 = squareDistanceScalar(); | ||
if (f1 != f2) { | ||
throw new AssertionError("lucene[" + f1 + "] != " + "native[" + f2 + "]"); | ||
} | ||
if (f1 != f3) { | ||
throw new AssertionError("lucene[" + f1 + "] != " + "scalar[" + f3 + "]"); | ||
} | ||
} | ||
|
||
@TearDown | ||
public void teardown() throws IOException { | ||
IOUtils.close(dir, in); | ||
} | ||
|
||
@Benchmark | ||
public float dotProductLucene() { | ||
return luceneDotScorer.score(vec1, vec1Offset, vec2, vec2Offset); | ||
} | ||
|
||
@Benchmark | ||
public float dotProductNative() throws IOException { | ||
return nativeDotScorer.score(0, 1); | ||
} | ||
|
||
@Benchmark | ||
public float dotProductScalar() { | ||
int dotProduct = 0; | ||
for (int i = 0; i < vec1.length; i++) { | ||
dotProduct += vec1[i] * vec2[i]; | ||
} | ||
float adjustedDistance = dotProduct * scoreCorrectionConstant + vec1Offset + vec2Offset; | ||
return (1 + adjustedDistance) / 2; | ||
} | ||
|
||
// -- square distance | ||
|
||
@Benchmark | ||
public float squareDistanceLucene() { | ||
return luceneSqrScorer.score(vec1, vec1Offset, vec2, vec2Offset); | ||
} | ||
|
||
@Benchmark | ||
public float squareDistanceNative() throws IOException { | ||
return nativeSqrScorer.score(0, 1); | ||
} | ||
|
||
@Benchmark | ||
public float squareDistanceScalar() { | ||
int squareDistance = 0; | ||
for (int i = 0; i < vec1.length; i++) { | ||
int diff = vec1[i] - vec2[i]; | ||
squareDistance += diff * diff; | ||
} | ||
float adjustedDistance = squareDistance * scoreCorrectionConstant; | ||
return 1 / (1f + adjustedDistance); | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,110 @@ | ||
#!/usr/bin/env bash | ||
# | ||
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one | ||
# or more contributor license agreements. Licensed under the Elastic License | ||
# 2.0 and the Server Side Public License, v 1; you may not use this file except | ||
# in compliance with, at your election, the Elastic License 2.0 or the Server | ||
# Side Public License, v 1. | ||
# | ||
|
||
set -e | ||
|
||
if [ "$#" -ne 1 ]; then | ||
printf 'Usage: %s <version>\n' "$(basename "$0")" | ||
exit 0; | ||
fi | ||
|
||
if [ $(docker buildx inspect --bootstrap | grep -c 'Platforms:.*linux/arm64') -ne 1 ]; then | ||
echo 'Error: No Docker support for linux/arm64 detected' | ||
echo 'For more information see https://docs.docker.com/build/building/multi-platform' | ||
exit 1; | ||
fi | ||
|
||
if [ -z "$ARTIFACTORY_API_KEY" ]; then | ||
echo 'Error: The ARTIFACTORY_API_KEY environment variable must be set.' | ||
exit 1; | ||
fi | ||
|
||
VERSION="$1" | ||
ARTIFACTORY_REPOSITORY="${ARTIFACTORY_REPOSITORY:-https://artifactory.elastic.dev/artifactory/elasticsearch-native/}" | ||
TEMP=$(mktemp -d) | ||
|
||
fetch_homebrew_artifact() { | ||
DIGEST=$(curl -sS --retry 3 -H "Accept: application/vnd.oci.image.index.v1+json" -H "Authorization: Bearer QQ==" \ | ||
--location "https://ghcr.io/v2/homebrew/core/zstd/manifests/$VERSION" | jq -r \ | ||
".manifests[] | select(.platform.os == \"darwin\" and .platform.architecture == \"$1\" and .platform.\"os.version\" == \"macOS 13\") | .annotations.\"sh.brew.bottle.digest\"") | ||
|
||
OUTPUT_FILE="$TEMP/zstd-$VERSION-darwin-$1.tar.gz" | ||
curl -sS --retry 3 -H "Authorization: Bearer QQ==" --output "$OUTPUT_FILE" --location "https://ghcr.io/v2/homebrew/core/zstd/blobs/sha256:$DIGEST" | ||
echo $OUTPUT_FILE | ||
} | ||
|
||
download_license() { | ||
curl -sS --retry 3 --location https://raw.githubusercontent.com/facebook/zstd/v${VERSION}/LICENSE --output $1 | ||
} | ||
|
||
echo 'Downloading MacOS zstd binaries...' | ||
DARWIN_ARM_BREW=$(fetch_homebrew_artifact 'arm64') | ||
DARWIN_X86_BREW=$(fetch_homebrew_artifact 'amd64') | ||
|
||
build_darwin_jar() { | ||
ARTIFACT="$TEMP/zstd-$VERSION-darwin-$2.jar" | ||
TAR_DIR="$TEMP/darwin-$2" | ||
mkdir $TAR_DIR | ||
tar zxf $1 --strip-components=2 --include="*/LICENSE" --include="*/libzstd.$VERSION.dylib" -C $TAR_DIR && rm $1 | ||
mv $TAR_DIR/lib/libzstd.$VERSION.dylib $TAR_DIR/libzstd.dylib && rm -rf $TAR_DIR/lib | ||
FILE_COUNT=$(ls -1 $TAR_DIR | wc -l | xargs) | ||
if [ "$FILE_COUNT" -ne 2 ]; then | ||
>&2 echo "ERROR: Expected 2 files in $TAR_DIR but found $FILE_COUNT" | ||
exit 1 | ||
fi | ||
(cd $TAR_DIR/../ && zip -rq - $(basename $TAR_DIR)) > $ARTIFACT && rm -rf $TAR_DIR | ||
echo $ARTIFACT | ||
} | ||
|
||
echo 'Building MacOS jars...' | ||
DARWIN_ARM_JAR=$(build_darwin_jar $DARWIN_ARM_BREW "aarch64") | ||
DARWIN_X86_JAR=$(build_darwin_jar $DARWIN_X86_BREW "x86-64") | ||
|
||
build_linux_jar() { | ||
ARTIFACT="$TEMP/zstd-$VERSION-linux-$2.jar" | ||
OUTPUT_DIR="$TEMP/linux-$2" | ||
mkdir $OUTPUT_DIR | ||
DOCKER_IMAGE=$(docker build --build-arg="ZSTD_VERSION=1.5.5" --file zstd.Dockerfile --platform $1 --quiet .) | ||
docker run --platform $1 $DOCKER_IMAGE > $OUTPUT_DIR/libzstd.so | ||
download_license $OUTPUT_DIR/LICENSE | ||
(cd $OUTPUT_DIR/../ && zip -rq - $(basename $OUTPUT_DIR)) > $ARTIFACT && rm -rf $OUTPUT_DIR | ||
echo $ARTIFACT | ||
} | ||
|
||
echo 'Building Linux jars...' | ||
LINUX_ARM_JAR=$(build_linux_jar "linux/amd64" "x86-64") | ||
LINUX_X86_JAR=$(build_linux_jar "linux/arm64" "aarch64") | ||
|
||
build_windows_jar() { | ||
ARTIFACT="$TEMP/zstd-$VERSION-windows-x86-64.jar" | ||
OUTPUT_DIR="$TEMP/win32-x86-64" | ||
mkdir $OUTPUT_DIR | ||
curl -sS --retry 3 --location https://github.com/facebook/zstd/releases/download/v${VERSION}/zstd-v${VERSION}-win64.zip --output $OUTPUT_DIR/zstd.zip | ||
unzip -jq $OUTPUT_DIR/zstd.zip zstd-v${VERSION}-win64/dll/libzstd.dll -d $OUTPUT_DIR && rm $OUTPUT_DIR/zstd.zip | ||
mv $OUTPUT_DIR/libzstd.dll $OUTPUT_DIR/zstd.dll | ||
download_license $OUTPUT_DIR/LICENSE | ||
(cd $OUTPUT_DIR/../ && zip -rq - $(basename $OUTPUT_DIR)) > $ARTIFACT && rm -rf $OUTPUT_DIR | ||
echo $ARTIFACT | ||
} | ||
|
||
echo 'Building Windows jar...' | ||
WINDOWS_X86_JAR=$(build_windows_jar) | ||
|
||
upload_artifact() { | ||
curl -sS -X PUT -H "X-JFrog-Art-Api: ${ARTIFACTORY_API_KEY}" --data-binary "@$1" --location "${ARTIFACTORY_REPOSITORY}/org/elasticsearch/zstd/${VERSION}/$(basename $1)" | ||
} | ||
|
||
echo 'Uploading artifacts...' | ||
upload_artifact ${DARWIN_ARM_JAR} | ||
upload_artifact ${DARWIN_X86_JAR} | ||
upload_artifact ${LINUX_ARM_JAR} | ||
upload_artifact ${LINUX_X86_JAR} | ||
upload_artifact ${WINDOWS_X86_JAR} | ||
|
||
rm -rf $TEMP |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,11 @@ | ||
FROM centos:7 | ||
ARG ZSTD_VERSION | ||
|
||
RUN yum install -y git gcc gcc-c++ make | ||
RUN git clone --depth 1 --branch v${ZSTD_VERSION} https://github.com/facebook/zstd.git | ||
WORKDIR zstd | ||
RUN make lib-release && strip --strip-unneeded lib/libzstd.so.${ZSTD_VERSION} | ||
|
||
ENV ZSTD_VERSION=${ZSTD_VERSION} | ||
|
||
CMD cat lib/libzstd.so.${ZSTD_VERSION} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
pr: 106133 | ||
summary: Add an optimised vector distance function for aarch64 | ||
area: Search | ||
type: enhancement | ||
issues: [] |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.