Skip to content

Commit

Permalink
Add an optimised int8 vector distance function for aarch64. (#106133)
Browse files Browse the repository at this point in the history
This commit adds an optimised int8 vector distance implementation for aarch64. Additional platforms like, say, x64, will be added as a follow-up.

The vector distance implementation outperforms Lucene's Pamana Vector implementation for binary comparisons by approx 5x (depending on the number of dimensions). It does so by means of compiler intrinsics built into a separate native library and link by Panama's FFI. Comparisons are performed on off-heap mmap'ed vector data.

The implementation is currently only used during merging of scalar quantized segments, through a custom format ES814HnswScalarQuantizedVectorsFormat, but its usage will likely be expanded over time.

Co-authored-by: Benjamin Trent <[email protected]>
Co-authored-by: Lorenzo Dematté <[email protected]>
Co-authored-by: Mark Vieira <[email protected]>
Co-authored-by: Ryan Ernst <[email protected]>
  • Loading branch information
5 people authored Apr 12, 2024
1 parent fb1bc58 commit 6b52d78
Show file tree
Hide file tree
Showing 63 changed files with 4,812 additions and 12 deletions.
13 changes: 13 additions & 0 deletions benchmarks/build.gradle
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ apply plugin: org.elasticsearch.gradle.internal.ElasticsearchJavaBasePlugin
apply plugin: 'java-library'
apply plugin: 'application'

var os = org.gradle.internal.os.OperatingSystem.current()

application {
mainClass = 'org.openjdk.jmh.Main'
}
Expand Down Expand Up @@ -39,6 +41,7 @@ dependencies {
api(project(':x-pack:plugin:ql'))
api(project(':x-pack:plugin:esql'))
api(project(':x-pack:plugin:esql:compute'))
implementation project(path: ':libs:elasticsearch-vec')
expression(project(path: ':modules:lang-expression', configuration: 'zip'))
painless(project(path: ':modules:lang-painless', configuration: 'zip'))
api "org.openjdk.jmh:jmh-core:$versions.jmh"
Expand Down Expand Up @@ -73,6 +76,16 @@ tasks.named("run").configure {
executable = "${BuildParams.runtimeJavaHome}/bin/java"
args << "-Dplugins.dir=${buildDir}/plugins" << "-Dtests.index=${buildDir}/index"
dependsOn "copyExpression", "copyPainless"
systemProperty 'java.library.path', file("../libs/native/libraries/build/platform/${platformName()}-${os.arch}")
}

String platformName() {
String name = System.getProperty("os.name");
if (name.startsWith("Mac")) {
return "darwin";
} else {
return name.toLowerCase(Locale.ROOT);
}
}

spotless {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,188 @@
/*
* Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
* or more contributor license agreements. Licensed under the Elastic License
* 2.0 and the Server Side Public License, v 1; you may not use this file except
* in compliance with, at your election, the Elastic License 2.0 or the Server
* Side Public License, v 1.
*/

package org.elasticsearch.benchmark.vector;

import org.apache.lucene.index.VectorSimilarityFunction;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
import org.apache.lucene.store.MMapDirectory;
import org.apache.lucene.util.quantization.ScalarQuantizedVectorSimilarity;
import org.elasticsearch.common.logging.LogConfigurator;
import org.elasticsearch.core.IOUtils;
import org.elasticsearch.vec.VectorScorer;
import org.elasticsearch.vec.VectorScorerFactory;
import org.openjdk.jmh.annotations.Benchmark;
import org.openjdk.jmh.annotations.BenchmarkMode;
import org.openjdk.jmh.annotations.Fork;
import org.openjdk.jmh.annotations.Measurement;
import org.openjdk.jmh.annotations.Mode;
import org.openjdk.jmh.annotations.OutputTimeUnit;
import org.openjdk.jmh.annotations.Param;
import org.openjdk.jmh.annotations.Scope;
import org.openjdk.jmh.annotations.Setup;
import org.openjdk.jmh.annotations.State;
import org.openjdk.jmh.annotations.TearDown;
import org.openjdk.jmh.annotations.Warmup;

import java.io.IOException;
import java.nio.file.Files;
import java.util.concurrent.ThreadLocalRandom;
import java.util.concurrent.TimeUnit;

import static org.elasticsearch.vec.VectorSimilarityType.DOT_PRODUCT;
import static org.elasticsearch.vec.VectorSimilarityType.EUCLIDEAN;

@Fork(value = 1, jvmArgsPrepend = { "--add-modules=jdk.incubator.vector" })
@Warmup(iterations = 3, time = 3)
@Measurement(iterations = 5, time = 3)
@BenchmarkMode(Mode.Throughput)
@OutputTimeUnit(TimeUnit.MICROSECONDS)
@State(Scope.Thread)
/**
* Benchmark that compares various scalar quantized vector similarity function
* implementations;: scalar, lucene's panama-ized, and Elasticsearch's native.
* Run with ./gradlew -p benchmarks run --args 'VectorScorerBenchmark'
*/
public class VectorScorerBenchmark {

static {
LogConfigurator.configureESLogging(); // native access requires logging to be initialized
}

@Param({ "96", "768", "1024" })
int dims;
int size = 2; // there are only two vectors to compare

Directory dir;
IndexInput in;
VectorScorerFactory factory;

byte[] vec1;
byte[] vec2;
float vec1Offset;
float vec2Offset;
float scoreCorrectionConstant;

ScalarQuantizedVectorSimilarity luceneDotScorer;
ScalarQuantizedVectorSimilarity luceneSqrScorer;
VectorScorer nativeDotScorer;
VectorScorer nativeSqrScorer;

@Setup
public void setup() throws IOException {
var optionalVectorScorerFactory = VectorScorerFactory.instance();
if (optionalVectorScorerFactory.isEmpty()) {
String msg = "JDK=["
+ Runtime.version()
+ "], os.name=["
+ System.getProperty("os.name")
+ "], os.arch=["
+ System.getProperty("os.arch")
+ "]";
throw new AssertionError("Vector scorer factory not present. Cannot run the benchmark. " + msg);
}
factory = optionalVectorScorerFactory.get();
scoreCorrectionConstant = 1f;
vec1 = new byte[dims];
vec2 = new byte[dims];

ThreadLocalRandom.current().nextBytes(vec1);
ThreadLocalRandom.current().nextBytes(vec2);
vec1Offset = ThreadLocalRandom.current().nextFloat();
vec2Offset = ThreadLocalRandom.current().nextFloat();

dir = new MMapDirectory(Files.createTempDirectory("nativeScalarQuantBench"));
try (IndexOutput out = dir.createOutput("vector.data", IOContext.DEFAULT)) {
out.writeBytes(vec1, 0, vec1.length);
out.writeInt(Float.floatToIntBits(vec1Offset));
out.writeBytes(vec2, 0, vec2.length);
out.writeInt(Float.floatToIntBits(vec2Offset));
}
in = dir.openInput("vector.data", IOContext.DEFAULT);

luceneDotScorer = ScalarQuantizedVectorSimilarity.fromVectorSimilarity(
VectorSimilarityFunction.DOT_PRODUCT,
scoreCorrectionConstant
);
luceneSqrScorer = ScalarQuantizedVectorSimilarity.fromVectorSimilarity(VectorSimilarityFunction.EUCLIDEAN, scoreCorrectionConstant);
nativeDotScorer = factory.getScalarQuantizedVectorScorer(dims, size, scoreCorrectionConstant, DOT_PRODUCT, in).get();
nativeSqrScorer = factory.getScalarQuantizedVectorScorer(dims, size, scoreCorrectionConstant, EUCLIDEAN, in).get();

// sanity
var f1 = dotProductLucene();
var f2 = dotProductNative();
var f3 = dotProductScalar();
if (f1 != f2) {
throw new AssertionError("lucene[" + f1 + "] != " + "native[" + f2 + "]");
}
if (f1 != f3) {
throw new AssertionError("lucene[" + f1 + "] != " + "scalar[" + f3 + "]");
}
// square distance
f1 = squareDistanceLucene();
f2 = squareDistanceNative();
f3 = squareDistanceScalar();
if (f1 != f2) {
throw new AssertionError("lucene[" + f1 + "] != " + "native[" + f2 + "]");
}
if (f1 != f3) {
throw new AssertionError("lucene[" + f1 + "] != " + "scalar[" + f3 + "]");
}
}

@TearDown
public void teardown() throws IOException {
IOUtils.close(dir, in);
}

@Benchmark
public float dotProductLucene() {
return luceneDotScorer.score(vec1, vec1Offset, vec2, vec2Offset);
}

@Benchmark
public float dotProductNative() throws IOException {
return nativeDotScorer.score(0, 1);
}

@Benchmark
public float dotProductScalar() {
int dotProduct = 0;
for (int i = 0; i < vec1.length; i++) {
dotProduct += vec1[i] * vec2[i];
}
float adjustedDistance = dotProduct * scoreCorrectionConstant + vec1Offset + vec2Offset;
return (1 + adjustedDistance) / 2;
}

// -- square distance

@Benchmark
public float squareDistanceLucene() {
return luceneSqrScorer.score(vec1, vec1Offset, vec2, vec2Offset);
}

@Benchmark
public float squareDistanceNative() throws IOException {
return nativeSqrScorer.score(0, 1);
}

@Benchmark
public float squareDistanceScalar() {
int squareDistance = 0;
for (int i = 0; i < vec1.length; i++) {
int diff = vec1[i] - vec2[i];
squareDistance += diff * diff;
}
float adjustedDistance = squareDistance * scoreCorrectionConstant;
return 1 / (1f + adjustedDistance);
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,7 @@ public class InternalDistributionModuleCheckTaskProvider {
"org.elasticsearch.securesm",
"org.elasticsearch.server",
"org.elasticsearch.tdigest",
"org.elasticsearch.vec",
"org.elasticsearch.xcontent"
);

Expand Down
110 changes: 110 additions & 0 deletions dev-tools/publish_zstd_binaries.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
#!/usr/bin/env bash
#
# Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
# or more contributor license agreements. Licensed under the Elastic License
# 2.0 and the Server Side Public License, v 1; you may not use this file except
# in compliance with, at your election, the Elastic License 2.0 or the Server
# Side Public License, v 1.
#

set -e

if [ "$#" -ne 1 ]; then
printf 'Usage: %s <version>\n' "$(basename "$0")"
exit 0;
fi

if [ $(docker buildx inspect --bootstrap | grep -c 'Platforms:.*linux/arm64') -ne 1 ]; then
echo 'Error: No Docker support for linux/arm64 detected'
echo 'For more information see https://docs.docker.com/build/building/multi-platform'
exit 1;
fi

if [ -z "$ARTIFACTORY_API_KEY" ]; then
echo 'Error: The ARTIFACTORY_API_KEY environment variable must be set.'
exit 1;
fi

VERSION="$1"
ARTIFACTORY_REPOSITORY="${ARTIFACTORY_REPOSITORY:-https://artifactory.elastic.dev/artifactory/elasticsearch-native/}"
TEMP=$(mktemp -d)

fetch_homebrew_artifact() {
DIGEST=$(curl -sS --retry 3 -H "Accept: application/vnd.oci.image.index.v1+json" -H "Authorization: Bearer QQ==" \
--location "https://ghcr.io/v2/homebrew/core/zstd/manifests/$VERSION" | jq -r \
".manifests[] | select(.platform.os == \"darwin\" and .platform.architecture == \"$1\" and .platform.\"os.version\" == \"macOS 13\") | .annotations.\"sh.brew.bottle.digest\"")

OUTPUT_FILE="$TEMP/zstd-$VERSION-darwin-$1.tar.gz"
curl -sS --retry 3 -H "Authorization: Bearer QQ==" --output "$OUTPUT_FILE" --location "https://ghcr.io/v2/homebrew/core/zstd/blobs/sha256:$DIGEST"
echo $OUTPUT_FILE
}

download_license() {
curl -sS --retry 3 --location https://raw.githubusercontent.com/facebook/zstd/v${VERSION}/LICENSE --output $1
}

echo 'Downloading MacOS zstd binaries...'
DARWIN_ARM_BREW=$(fetch_homebrew_artifact 'arm64')
DARWIN_X86_BREW=$(fetch_homebrew_artifact 'amd64')

build_darwin_jar() {
ARTIFACT="$TEMP/zstd-$VERSION-darwin-$2.jar"
TAR_DIR="$TEMP/darwin-$2"
mkdir $TAR_DIR
tar zxf $1 --strip-components=2 --include="*/LICENSE" --include="*/libzstd.$VERSION.dylib" -C $TAR_DIR && rm $1
mv $TAR_DIR/lib/libzstd.$VERSION.dylib $TAR_DIR/libzstd.dylib && rm -rf $TAR_DIR/lib
FILE_COUNT=$(ls -1 $TAR_DIR | wc -l | xargs)
if [ "$FILE_COUNT" -ne 2 ]; then
>&2 echo "ERROR: Expected 2 files in $TAR_DIR but found $FILE_COUNT"
exit 1
fi
(cd $TAR_DIR/../ && zip -rq - $(basename $TAR_DIR)) > $ARTIFACT && rm -rf $TAR_DIR
echo $ARTIFACT
}

echo 'Building MacOS jars...'
DARWIN_ARM_JAR=$(build_darwin_jar $DARWIN_ARM_BREW "aarch64")
DARWIN_X86_JAR=$(build_darwin_jar $DARWIN_X86_BREW "x86-64")

build_linux_jar() {
ARTIFACT="$TEMP/zstd-$VERSION-linux-$2.jar"
OUTPUT_DIR="$TEMP/linux-$2"
mkdir $OUTPUT_DIR
DOCKER_IMAGE=$(docker build --build-arg="ZSTD_VERSION=1.5.5" --file zstd.Dockerfile --platform $1 --quiet .)
docker run --platform $1 $DOCKER_IMAGE > $OUTPUT_DIR/libzstd.so
download_license $OUTPUT_DIR/LICENSE
(cd $OUTPUT_DIR/../ && zip -rq - $(basename $OUTPUT_DIR)) > $ARTIFACT && rm -rf $OUTPUT_DIR
echo $ARTIFACT
}

echo 'Building Linux jars...'
LINUX_ARM_JAR=$(build_linux_jar "linux/amd64" "x86-64")
LINUX_X86_JAR=$(build_linux_jar "linux/arm64" "aarch64")

build_windows_jar() {
ARTIFACT="$TEMP/zstd-$VERSION-windows-x86-64.jar"
OUTPUT_DIR="$TEMP/win32-x86-64"
mkdir $OUTPUT_DIR
curl -sS --retry 3 --location https://github.com/facebook/zstd/releases/download/v${VERSION}/zstd-v${VERSION}-win64.zip --output $OUTPUT_DIR/zstd.zip
unzip -jq $OUTPUT_DIR/zstd.zip zstd-v${VERSION}-win64/dll/libzstd.dll -d $OUTPUT_DIR && rm $OUTPUT_DIR/zstd.zip
mv $OUTPUT_DIR/libzstd.dll $OUTPUT_DIR/zstd.dll
download_license $OUTPUT_DIR/LICENSE
(cd $OUTPUT_DIR/../ && zip -rq - $(basename $OUTPUT_DIR)) > $ARTIFACT && rm -rf $OUTPUT_DIR
echo $ARTIFACT
}

echo 'Building Windows jar...'
WINDOWS_X86_JAR=$(build_windows_jar)

upload_artifact() {
curl -sS -X PUT -H "X-JFrog-Art-Api: ${ARTIFACTORY_API_KEY}" --data-binary "@$1" --location "${ARTIFACTORY_REPOSITORY}/org/elasticsearch/zstd/${VERSION}/$(basename $1)"
}

echo 'Uploading artifacts...'
upload_artifact ${DARWIN_ARM_JAR}
upload_artifact ${DARWIN_X86_JAR}
upload_artifact ${LINUX_ARM_JAR}
upload_artifact ${LINUX_X86_JAR}
upload_artifact ${WINDOWS_X86_JAR}

rm -rf $TEMP
11 changes: 11 additions & 0 deletions dev-tools/zstd.Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
FROM centos:7
ARG ZSTD_VERSION

RUN yum install -y git gcc gcc-c++ make
RUN git clone --depth 1 --branch v${ZSTD_VERSION} https://github.com/facebook/zstd.git
WORKDIR zstd
RUN make lib-release && strip --strip-unneeded lib/libzstd.so.${ZSTD_VERSION}

ENV ZSTD_VERSION=${ZSTD_VERSION}

CMD cat lib/libzstd.so.${ZSTD_VERSION}
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ static List<String> systemJvmOptions(Settings nodeSettings, final Map<String, St
* explore alternatives. See org.elasticsearch.xpack.searchablesnapshots.preallocate.Preallocate.
*/
"--add-opens=java.base/java.io=org.elasticsearch.preallocate",
"--add-opens=org.apache.lucene.core/org.apache.lucene.store=org.elasticsearch.vec",
maybeEnableNativeAccess(),
maybeOverrideDockerCgroup(distroType),
maybeSetActiveProcessorCount(nodeSettings),
Expand Down
5 changes: 5 additions & 0 deletions docs/changelog/106133.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
pr: 106133
summary: Add an optimised vector distance function for aarch64
area: Search
type: enhancement
issues: []
Original file line number Diff line number Diff line change
Expand Up @@ -9,12 +9,15 @@
package org.elasticsearch.nativeaccess.jna;

import org.elasticsearch.nativeaccess.lib.JavaLibrary;
import org.elasticsearch.nativeaccess.lib.NativeLibrary;
import org.elasticsearch.nativeaccess.lib.NativeLibraryProvider;
import org.elasticsearch.nativeaccess.lib.PosixCLibrary;
import org.elasticsearch.nativeaccess.lib.SystemdLibrary;
import org.elasticsearch.nativeaccess.lib.VectorLibrary;
import org.elasticsearch.nativeaccess.lib.ZstdLibrary;

import java.util.Map;
import java.util.function.Supplier;

public class JnaNativeLibraryProvider extends NativeLibraryProvider {

Expand All @@ -29,8 +32,14 @@ public JnaNativeLibraryProvider() {
SystemdLibrary.class,
JnaSystemdLibrary::new,
ZstdLibrary.class,
JnaZstdLibrary::new
JnaZstdLibrary::new,
VectorLibrary.class,
notImplemented()
)
);
}

private static Supplier<NativeLibrary> notImplemented() {
return () -> { throw new AssertionError(); };
}
}
Loading

0 comments on commit 6b52d78

Please sign in to comment.