diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 6252f85a3933..793c6b852680 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -105,6 +105,8 @@ Optimizations
 
 * GITHUB#14176: Reduce when visiting bpv24-encoded doc ids in BKD leaves. (Guo Feng)
 
+# GITHUB#14203: Use Vector API to decode BKD docIds. (GuoFeng)
+
 Bug Fixes
 ---------------------
 
diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/BKDCodecBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/BKDCodecBenchmark.java
new file mode 100644
index 000000000000..61c55b4f6945
--- /dev/null
+++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/BKDCodecBenchmark.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.jmh;
+
+import java.io.IOException;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import org.apache.lucene.store.Directory;
+import org.apache.lucene.store.IOContext;
+import org.apache.lucene.store.IndexInput;
+import org.apache.lucene.store.IndexOutput;
+import org.apache.lucene.store.MMapDirectory;
+import org.apache.lucene.util.IOUtils;
+import org.apache.lucene.util.bkd.BKDConfig;
+import org.apache.lucene.util.bkd.BKDWriter;
+import org.apache.lucene.util.bkd.DocIdsWriter;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.TearDown;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@State(Scope.Benchmark)
+@Warmup(iterations = 3, time = 1)
+@Measurement(iterations = 5, time = 1)
+@Fork(value = 1)
+public class BKDCodecBenchmark {
+
+  private static final int SIZE = BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE;
+
+  @Param({"16", "24"})
+  public int bpv;
+
+  private Directory dir;
+  private DocIdsWriter legacy;
+  private IndexInput legacyIn;
+  private DocIdsWriter vector;
+  private IndexInput vectorIn;
+  private int[] docs;
+
+  @Setup(Level.Trial)
+  public void setupTrial() throws IOException {
+    Path path = Files.createTempDirectory("bkd");
+    dir = MMapDirectory.open(path);
+    docs = new int[SIZE];
+    legacy = new DocIdsWriter(SIZE, BKDWriter.VERSION_META_FILE);
+    legacyIn = writeDocIds("legacy", docs, legacy);
+    vector = new DocIdsWriter(SIZE, BKDWriter.VERSION_VECTORIZED_DOCID);
+    vectorIn = writeDocIds("current", docs, vector);
+  }
+
+  private IndexInput writeDocIds(String file, int[] docs, DocIdsWriter writer) throws IOException {
+    try (IndexOutput out = dir.createOutput(file, IOContext.DEFAULT)) {
+      Random r = new Random(0);
+      // avoid cluster encoding
+      docs[0] = 1;
+      docs[1] = (1 << bpv) - 1;
+      for (int i = 2; i < SIZE; ++i) {
+        docs[i] = r.nextInt(1 << bpv);
+      }
+      writer.writeDocIds(docs, 0, SIZE, out);
+    }
+    return dir.openInput(file, IOContext.DEFAULT);
+  }
+
+  @Setup(Level.Invocation)
+  public void setupInvocation() throws IOException {
+    legacyIn.seek(0);
+    vectorIn.seek(0);
+  }
+
+  @TearDown(Level.Trial)
+  public void tearDownTrial() throws IOException {
+    IOUtils.close(legacyIn, vectorIn, dir);
+  }
+
+  private int count(int iter) {
+    return iter % 20 == 0 ? SIZE - 1 : SIZE;
+  }
+
+  @Benchmark
+  public void scalar(Blackhole bh) throws IOException {
+    for (int i = 0; i <= 100; i++) {
+      int count = count(i);
+      legacy.readInts(legacyIn, count, docs);
+      bh.consume(docs);
+      setupInvocation();
+    }
+  }
+
+  @Benchmark
+  public void vector(Blackhole bh) throws IOException {
+    for (int i = 0; i <= 100; i++) {
+      int count = count(i);
+      vector.readInts(vectorIn, count, docs);
+      bh.consume(docs);
+      setupInvocation();
+    }
+  }
+}
diff --git a/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/ShiftMaskBenchmark.java b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/ShiftMaskBenchmark.java
new file mode 100644
index 000000000000..be3628957ddb
--- /dev/null
+++ b/lucene/benchmark-jmh/src/java/org/apache/lucene/benchmark/jmh/ShiftMaskBenchmark.java
@@ -0,0 +1,78 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.benchmark.jmh;
+
+import java.io.IOException;
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+@BenchmarkMode(Mode.Throughput)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
+@State(Scope.Benchmark)
+@Warmup(iterations = 3, time = 1)
+@Measurement(iterations = 5, time = 1)
+@Fork(value = 1)
+/** no-commit : remove before merge */
+public class ShiftMaskBenchmark {
+
+  private int[] counts;
+  private int[] source;
+  private int[] dest;
+
+  @Setup(Level.Trial)
+  public void setupTrial() throws IOException {
+    Random r = new Random(0);
+    source = new int[1024];
+    dest = new int[1024];
+    for (int i = 0; i < 512; i++) {
+      source[i] = r.nextInt(1 << 24);
+    }
+    counts = new int[] {255, 256, 511, 512};
+  }
+
+  @Benchmark
+  public void varOffset(Blackhole bh) throws IOException {
+    for (int count : counts) {
+      shiftMask(source, dest, count & 0x1, count, 8, 0xFF);
+    }
+  }
+
+  @Benchmark
+  public void fixOffset(Blackhole bh) throws IOException {
+    for (int count : counts) {
+      shiftMask(source, dest, 1, count, 8, 0xFF);
+    }
+  }
+
+  private static void shiftMask(int[] src, int[] dst, int offset, int count, int shift, int mask) {
+    for (int i = 0; i < count; i++) {
+      dst[i] = (src[i + offset] >> shift) & mask;
+    }
+  }
+}
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsFormat.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsFormat.java
index 64e61991eb1e..20ab3da1a25a 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsFormat.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsFormat.java
@@ -17,11 +17,13 @@
 package org.apache.lucene.codecs.lucene90;
 
 import java.io.IOException;
+import java.util.Map;
 import org.apache.lucene.codecs.PointsFormat;
 import org.apache.lucene.codecs.PointsReader;
 import org.apache.lucene.codecs.PointsWriter;
 import org.apache.lucene.index.SegmentReadState;
 import org.apache.lucene.index.SegmentWriteState;
+import org.apache.lucene.util.bkd.BKDWriter;
 
 /**
  * Lucene 9.0 point format, which encodes dimensional values in a block KD-tree structure for fast
@@ -59,18 +61,39 @@ public final class Lucene90PointsFormat extends PointsFormat {
   public static final String META_EXTENSION = "kdm";
 
   static final int VERSION_START = 0;
-  static final int VERSION_CURRENT = VERSION_START;
+  static final int VERSION_BKD_VECTORIZED_BPV24 = 1;
+  static final int VERSION_CURRENT = VERSION_BKD_VECTORIZED_BPV24;
+
+  private static final Map<Integer, Integer> VERSION_TO_BKD_VERSION =
+      Map.of(
+          VERSION_START, BKDWriter.VERSION_META_FILE,
+          VERSION_BKD_VECTORIZED_BPV24, BKDWriter.VERSION_VECTORIZED_DOCID);
+
+  private final int version;
 
   /** Sole constructor */
-  public Lucene90PointsFormat() {}
+  public Lucene90PointsFormat() {
+    this(VERSION_CURRENT);
+  }
+
+  public Lucene90PointsFormat(int version) {
+    if (VERSION_TO_BKD_VERSION.containsKey(version) == false) {
+      throw new IllegalArgumentException("Invalid version: " + version);
+    }
+    this.version = version;
+  }
 
   @Override
   public PointsWriter fieldsWriter(SegmentWriteState state) throws IOException {
-    return new Lucene90PointsWriter(state);
+    return new Lucene90PointsWriter(state, version);
   }
 
   @Override
   public PointsReader fieldsReader(SegmentReadState state) throws IOException {
     return new Lucene90PointsReader(state);
   }
+
+  static int bkdVersion(int version) {
+    return VERSION_TO_BKD_VERSION.get(version);
+  }
 }
diff --git a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsWriter.java b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsWriter.java
index 45a946e8ac40..95b9cada9a3c 100644
--- a/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/codecs/lucene90/Lucene90PointsWriter.java
@@ -46,16 +46,18 @@ public class Lucene90PointsWriter extends PointsWriter {
   final SegmentWriteState writeState;
   final int maxPointsInLeafNode;
   final double maxMBSortInHeap;
+  final int version;
   private boolean finished;
 
   /** Full constructor */
   public Lucene90PointsWriter(
-      SegmentWriteState writeState, int maxPointsInLeafNode, double maxMBSortInHeap)
+      SegmentWriteState writeState, int maxPointsInLeafNode, double maxMBSortInHeap, int version)
       throws IOException {
     assert writeState.fieldInfos.hasPointValues();
     this.writeState = writeState;
     this.maxPointsInLeafNode = maxPointsInLeafNode;
     this.maxMBSortInHeap = maxMBSortInHeap;
+    this.version = version;
     String dataFileName =
         IndexFileNames.segmentFileName(
             writeState.segmentInfo.name,
@@ -105,15 +107,22 @@ public Lucene90PointsWriter(
     }
   }
 
+  public Lucene90PointsWriter(
+      SegmentWriteState writeState, int maxPointsInLeafNode, double maxMBSortInHeap)
+      throws IOException {
+    this(writeState, maxPointsInLeafNode, maxMBSortInHeap, Lucene90PointsFormat.VERSION_CURRENT);
+  }
+
   /**
    * Uses the defaults values for {@code maxPointsInLeafNode} (512) and {@code maxMBSortInHeap}
    * (16.0)
    */
-  public Lucene90PointsWriter(SegmentWriteState writeState) throws IOException {
+  public Lucene90PointsWriter(SegmentWriteState writeState, int version) throws IOException {
     this(
         writeState,
         BKDConfig.DEFAULT_MAX_POINTS_IN_LEAF_NODE,
-        BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP);
+        BKDWriter.DEFAULT_MAX_MB_SORT_IN_HEAP,
+        version);
   }
 
   @Override
@@ -135,7 +144,8 @@ public void writeField(FieldInfo fieldInfo, PointsReader reader) throws IOExcept
             writeState.segmentInfo.name,
             config,
             maxMBSortInHeap,
-            values.size())) {
+            values.size(),
+            Lucene90PointsFormat.bkdVersion(version))) {
 
       if (values instanceof MutablePointTree) {
         IORunnable finalizer =
@@ -233,7 +243,8 @@ public void merge(MergeState mergeState) throws IOException {
                   writeState.segmentInfo.name,
                   config,
                   maxMBSortInHeap,
-                  totMaxSize)) {
+                  totMaxSize,
+                  Lucene90PointsFormat.bkdVersion(version))) {
             List<PointValues> pointValues = new ArrayList<>();
             List<MergeState.DocMap> docMaps = new ArrayList<>();
             for (int i = 0; i < mergeState.pointsReaders.length; i++) {
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
index a90c79a8c808..e70530288a22 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDReader.java
@@ -33,7 +33,6 @@
  * @lucene.experimental
  */
 public class BKDReader extends PointValues {
-
   final BKDConfig config;
   final int numLeaves;
   final IndexInput in;
@@ -261,7 +260,7 @@ private BKDPointTree(
           1,
           minPackedValue,
           maxPackedValue,
-          new BKDReaderDocIDSetIterator(config.maxPointsInLeafNode()),
+          new BKDReaderDocIDSetIterator(config.maxPointsInLeafNode(), version),
           new byte[config.packedBytesLength()],
           new byte[config.packedIndexBytesLength()],
           new byte[config.packedIndexBytesLength()],
@@ -590,7 +589,8 @@ public void addAll(PointValues.IntersectVisitor visitor, boolean grown) throws I
         // How many points are stored in this leaf cell:
         int count = leafNodes.readVInt();
         // No need to call grow(), it has been called up-front
-        docIdsWriter.readInts(leafNodes, count, visitor);
+        // Borrow scratchIterator.docIds as decoding buffer
+        docIdsWriter.readInts(leafNodes, count, visitor, scratchIterator.docIDs);
       } else {
         pushLeft();
         addAll(visitor, grown);
@@ -1028,9 +1028,9 @@ private static class BKDReaderDocIDSetIterator extends DocIdSetIterator {
     final int[] docIDs;
     private final DocIdsWriter docIdsWriter;
 
-    public BKDReaderDocIDSetIterator(int maxPointsInLeafNode) {
+    public BKDReaderDocIDSetIterator(int maxPointsInLeafNode, int version) {
       this.docIDs = new int[maxPointsInLeafNode];
-      this.docIdsWriter = new DocIdsWriter(maxPointsInLeafNode);
+      this.docIdsWriter = new DocIdsWriter(maxPointsInLeafNode, version);
     }
 
     @Override
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
index 2af93ef5ca04..9e090b92f336 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/BKDWriter.java
@@ -85,7 +85,8 @@ public class BKDWriter implements Closeable {
   public static final int VERSION_SELECTIVE_INDEXING = 6;
   public static final int VERSION_LOW_CARDINALITY_LEAVES = 7;
   public static final int VERSION_META_FILE = 9;
-  public static final int VERSION_CURRENT = VERSION_META_FILE;
+  public static final int VERSION_VECTORIZED_DOCID = 10;
+  public static final int VERSION_CURRENT = VERSION_VECTORIZED_DOCID;
 
   /** Number of splits before we compute the exact bounding box of an inner node. */
   private static final int SPLITS_BEFORE_EXACT_BOUNDS = 4;
@@ -103,6 +104,7 @@ public class BKDWriter implements Closeable {
   final TrackingDirectoryWrapper tempDir;
   final String tempFileNamePrefix;
   final double maxMBSortInHeap;
+  final int version;
 
   final byte[] scratchDiff;
   final byte[] scratch;
@@ -139,6 +141,28 @@ public BKDWriter(
       BKDConfig config,
       double maxMBSortInHeap,
       long totalPointCount) {
+    this(
+        maxDoc,
+        tempDir,
+        tempFileNamePrefix,
+        config,
+        maxMBSortInHeap,
+        totalPointCount,
+        BKDWriter.VERSION_CURRENT);
+  }
+
+  public BKDWriter(
+      int maxDoc,
+      Directory tempDir,
+      String tempFileNamePrefix,
+      BKDConfig config,
+      double maxMBSortInHeap,
+      long totalPointCount,
+      int version) {
+    if (version < VERSION_START || version > VERSION_CURRENT) {
+      throw new IllegalArgumentException("Version out of range: " + version);
+    }
+    this.version = version;
     verifyParams(maxMBSortInHeap, totalPointCount);
     // We use tracking dir to deal with removing files on exception, so each place that
     // creates temp files doesn't need crazy try/finally/sucess logic:
@@ -165,7 +189,7 @@ public BKDWriter(
 
     // Maximum number of points we hold in memory at any time
     maxPointsSortInHeap = (int) ((maxMBSortInHeap * 1024 * 1024) / (config.bytesPerDoc()));
-    docIdsWriter = new DocIdsWriter(config.maxPointsInLeafNode());
+    docIdsWriter = new DocIdsWriter(config.maxPointsInLeafNode(), version);
     // Finally, we must be able to hold at least the leaf node in heap during build:
     if (maxPointsSortInHeap < config.maxPointsInLeafNode()) {
       throw new IllegalArgumentException(
@@ -1245,7 +1269,7 @@ private void writeIndex(
       byte[] packedIndex,
       long dataStartFP)
       throws IOException {
-    CodecUtil.writeHeader(metaOut, CODEC_NAME, VERSION_CURRENT);
+    CodecUtil.writeHeader(metaOut, CODEC_NAME, version);
     metaOut.writeVInt(config.numDims());
     metaOut.writeVInt(config.numIndexDims());
     metaOut.writeVInt(countPerLeaf);
diff --git a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java
index 805d7079b981..759940416bd2 100644
--- a/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java
+++ b/lucene/core/src/java/org/apache/lucene/util/bkd/DocIdsWriter.java
@@ -28,7 +28,8 @@
 import org.apache.lucene.util.IntsRef;
 import org.apache.lucene.util.LongsRef;
 
-final class DocIdsWriter {
+/** Public for jmh benchmark. */
+public final class DocIdsWriter {
 
   private static final byte CONTINUOUS_IDS = (byte) -2;
   private static final byte BITSET_IDS = (byte) -1;
@@ -38,7 +39,10 @@ final class DocIdsWriter {
   // These signs are legacy, should no longer be used in the writing side.
   private static final byte LEGACY_DELTA_VINT = (byte) 0;
 
+  private static final int[] BATCHES = new int[] {512, 128};
+
   private final int[] scratch;
+
   private final LongsRef scratchLongs = new LongsRef();
 
   /**
@@ -52,16 +56,19 @@ final class DocIdsWriter {
    */
   private final IntsRef scratchIntsRef = new IntsRef();
 
+  private final int version;
+
   {
     // This is here to not rely on the default constructor of IntsRef to set offset to 0
     scratchIntsRef.offset = 0;
   }
 
-  DocIdsWriter(int maxPointsInLeaf) {
-    scratch = new int[maxPointsInLeaf];
+  public DocIdsWriter(int maxPointsInLeaf, int version) {
+    this.scratch = new int[maxPointsInLeaf];
+    this.version = version;
   }
 
-  void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOException {
+  public void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOException {
     // docs can be sorted either when all docs in a block have the same value
     // or when a segment is sorted
     boolean strictlySorted = true;
@@ -101,44 +108,45 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx
         scratch[i] = docIds[start + i] - min;
       }
       out.writeVInt(min);
-      final int halfLen = count >>> 1;
-      for (int i = 0; i < halfLen; ++i) {
-        scratch[i] = scratch[halfLen + i] | (scratch[i] << 16);
-      }
-      for (int i = 0; i < halfLen; i++) {
-        out.writeInt(scratch[i]);
-      }
-      if ((count & 1) == 1) {
-        out.writeShort((short) scratch[count - 1]);
+      if (version < BKDWriter.VERSION_VECTORIZED_DOCID) {
+        writeInts16(0, scratch, count, out);
+      } else {
+        int k = 0;
+        for (int batchSize : BATCHES) {
+          for (int bound = count - batchSize + 1; k < bound; k += batchSize) {
+            writeInts16(k, scratch, batchSize, out);
+          }
+        }
+        for (; k < count; k++) {
+          out.writeShort((short) scratch[k]);
+        }
       }
     } else {
       if (max <= 0xFFFFFF) {
         out.writeByte(BPV_24);
-        // write them the same way we are reading them.
-        int i;
-        for (i = 0; i < count - 7; i += 8) {
-          int doc1 = docIds[start + i];
-          int doc2 = docIds[start + i + 1];
-          int doc3 = docIds[start + i + 2];
-          int doc4 = docIds[start + i + 3];
-          int doc5 = docIds[start + i + 4];
-          int doc6 = docIds[start + i + 5];
-          int doc7 = docIds[start + i + 6];
-          int doc8 = docIds[start + i + 7];
-          long l1 = (doc1 & 0xffffffL) << 40 | (doc2 & 0xffffffL) << 16 | ((doc3 >>> 8) & 0xffffL);
-          long l2 =
-              (doc3 & 0xffL) << 56
-                  | (doc4 & 0xffffffL) << 32
-                  | (doc5 & 0xffffffL) << 8
-                  | ((doc6 >> 16) & 0xffL);
-          long l3 = (doc6 & 0xffffL) << 48 | (doc7 & 0xffffffL) << 24 | (doc8 & 0xffffffL);
-          out.writeLong(l1);
-          out.writeLong(l2);
-          out.writeLong(l3);
-        }
-        for (; i < count; ++i) {
-          out.writeShort((short) (docIds[start + i] >>> 8));
-          out.writeByte((byte) docIds[start + i]);
+        if (version < BKDWriter.VERSION_VECTORIZED_DOCID) {
+          writeScalarInts24(docIds, start, count, out);
+        } else {
+          int k = 0;
+          for (int batchSize : BATCHES) {
+            for (int bound = count - batchSize + 1; k < bound; k += batchSize) {
+              final int quarterLen = batchSize >>> 2;
+              final int quarterLen3 = quarterLen * 3;
+              for (int i = k; i < k + quarterLen3; i++) {
+                scratch[i] = docIds[i + start] << 8;
+              }
+              for (int i = k; i < k + quarterLen; i++) {
+                final int longIdx = i + quarterLen3 + start;
+                scratch[i] |= docIds[longIdx] >>> 16;
+                scratch[i + quarterLen] |= (docIds[longIdx] >>> 8) & 0xFF;
+                scratch[i + quarterLen * 2] |= docIds[longIdx] & 0xFF;
+              }
+              for (int i = k; i < k + quarterLen3; i++) {
+                out.writeInt(scratch[i]);
+              }
+            }
+          }
+          writeScalarInts24(docIds, start + k, count - k, out);
         }
       } else {
         out.writeByte(BPV_32);
@@ -149,6 +157,49 @@ void writeDocIds(int[] docIds, int start, int count, DataOutput out) throws IOEx
     }
   }
 
+  private static void writeInts16(int k, int[] scratch, int count, DataOutput out)
+      throws IOException {
+    final int halfLen = count >> 1;
+    for (int i = k; i < k + halfLen; i++) {
+      scratch[i] = scratch[halfLen + i] | (scratch[i] << 16);
+    }
+    for (int i = k; i < k + halfLen; i++) {
+      out.writeInt(scratch[i]);
+    }
+    if ((count & 1) == 1) {
+      out.writeShort((short) scratch[k + count - 1]);
+    }
+  }
+
+  private static void writeScalarInts24(int[] docIds, int start, int count, DataOutput out)
+      throws IOException {
+    int i;
+    for (i = 0; i < count - 7; i += 8) {
+      int doc1 = docIds[start + i];
+      int doc2 = docIds[start + i + 1];
+      int doc3 = docIds[start + i + 2];
+      int doc4 = docIds[start + i + 3];
+      int doc5 = docIds[start + i + 4];
+      int doc6 = docIds[start + i + 5];
+      int doc7 = docIds[start + i + 6];
+      int doc8 = docIds[start + i + 7];
+      long l1 = (doc1 & 0xffffffL) << 40 | (doc2 & 0xffffffL) << 16 | ((doc3 >>> 8) & 0xffffL);
+      long l2 =
+          (doc3 & 0xffL) << 56
+              | (doc4 & 0xffffffL) << 32
+              | (doc5 & 0xffffffL) << 8
+              | ((doc6 >> 16) & 0xffL);
+      long l3 = (doc6 & 0xffffL) << 48 | (doc7 & 0xffffffL) << 24 | (doc8 & 0xffffffL);
+      out.writeLong(l1);
+      out.writeLong(l2);
+      out.writeLong(l3);
+    }
+    for (; i < count; ++i) {
+      out.writeShort((short) (docIds[start + i] >>> 8));
+      out.writeByte((byte) docIds[start + i]);
+    }
+  }
+
   private static void writeIdsAsBitSet(int[] docIds, int start, int count, DataOutput out)
       throws IOException {
     int min = docIds[start];
@@ -183,7 +234,7 @@ private static void writeIdsAsBitSet(int[] docIds, int start, int count, DataOut
   }
 
   /** Read {@code count} integers into {@code docIDs}. */
-  void readInts(IndexInput in, int count, int[] docIDs) throws IOException {
+  public void readInts(IndexInput in, int count, int[] docIDs) throws IOException {
     final int bpv = in.readByte();
     switch (bpv) {
       case CONTINUOUS_IDS:
@@ -193,10 +244,18 @@ void readInts(IndexInput in, int count, int[] docIDs) throws IOException {
         readBitSet(in, count, docIDs);
         break;
       case DELTA_BPV_16:
-        readDelta16(in, count, docIDs);
+        if (version < BKDWriter.VERSION_VECTORIZED_DOCID) {
+          readDelta16Legacy(in, count, docIDs);
+        } else {
+          readDelta16(in, count, docIDs);
+        }
         break;
       case BPV_24:
-        readInts24(in, count, docIDs);
+        if (version < BKDWriter.VERSION_VECTORIZED_DOCID) {
+          readScalarInts24(in, count, docIDs, 0);
+        } else {
+          readInts24(in, count, docIDs);
+        }
         break;
       case BPV_32:
         readInts32(in, count, docIDs);
@@ -248,9 +307,9 @@ private void readBitSet(IndexInput in, int count, int[] docIDs) throws IOExcepti
     assert pos == count : "pos: " + pos + ", count: " + count;
   }
 
-  private static void readDelta16(IndexInput in, int count, int[] docIDs) throws IOException {
+  private static void readDelta16Legacy(IndexInput in, int count, int[] docIDs) throws IOException {
     final int min = in.readVInt();
-    final int halfLen = count >>> 1;
+    final int halfLen = count >> 1;
     in.readInts(docIDs, 0, halfLen);
     for (int i = 0; i < halfLen; ++i) {
       int l = docIDs[i];
@@ -262,9 +321,67 @@ private static void readDelta16(IndexInput in, int count, int[] docIDs) throws I
     }
   }
 
-  private static void readInts24(IndexInput in, int count, int[] docIDs) throws IOException {
+  private static void readDelta16(IndexInput in, int count, int[] docIds) throws IOException {
+    final int min = in.readVInt();
+    int k = 0;
+    for (int bound = count - 511; k < bound; k += 512) {
+      in.readInts(docIds, k, 256);
+      // Can be inlined to make offsets consistent so that loop get auto-vectorized.
+      inner16(k, docIds, 256, min);
+    }
+    for (int bound = count - 127; k < bound; k += 128) {
+      in.readInts(docIds, k, 64);
+      inner16(k, docIds, 64, min);
+    }
+    for (; k < count; k++) {
+      docIds[k] = Short.toUnsignedInt(in.readShort()) + min;
+    }
+  }
+
+  private static void inner16(int k, int[] docIds, int half, int min) {
+    for (int i = k, to = k + half; i < to; ++i) {
+      final int l = docIds[i];
+      docIds[i] = (l >>> 16) + min;
+      docIds[i + half] = (l & 0xFFFF) + min;
+    }
+  }
+
+  private void readInts24(IndexInput in, int count, int[] docIDs) throws IOException {
+    int k = 0;
+    for (int bound = count - 511; k < bound; k += 512) {
+      in.readInts(scratch, k, 384);
+      shift(k, docIDs, scratch, 384);
+      // Can be inlined to make offsets consistent so that loop get auto-vectorized.
+      remainder24(k, docIDs, scratch, 128, 256, 384);
+    }
+    for (int bound = count - 127; k < bound; k += 128) {
+      in.readInts(scratch, k, 96);
+      shift(k, docIDs, scratch, 96);
+      remainder24(k, docIDs, scratch, 32, 64, 96);
+    }
+    readScalarInts24(in, count - k, docIDs, k);
+  }
+
+  private static void shift(int k, int[] docIds, int[] scratch, int halfAndQuarter) {
+    for (int i = k, to = k + halfAndQuarter; i < to; i++) {
+      docIds[i] = scratch[i] >>> 8;
+    }
+  }
+
+  private static void remainder24(
+      int k, int[] docIds, int[] scratch, int quarter, int half, int halfAndQuarter) {
+    for (int i = k, to = k + quarter; i < to; i++) {
+      docIds[i + halfAndQuarter] =
+          ((scratch[i] & 0xFF) << 16)
+              | ((scratch[i + quarter] & 0xFF) << 8)
+              | (scratch[i + half] & 0xFF);
+    }
+  }
+
+  private static void readScalarInts24(IndexInput in, int count, int[] docIDs, int offset)
+      throws IOException {
     int i;
-    for (i = 0; i < count - 7; i += 8) {
+    for (i = offset; i < offset + count - 7; i += 8) {
       long l1 = in.readLong();
       long l2 = in.readLong();
       long l3 = in.readLong();
@@ -277,7 +394,7 @@ private static void readInts24(IndexInput in, int count, int[] docIDs) throws IO
       docIDs[i + 6] = (int) (l3 >>> 24) & 0xffffff;
       docIDs[i + 7] = (int) l3 & 0xffffff;
     }
-    for (; i < count; ++i) {
+    for (; i < offset + count; ++i) {
       docIDs[i] = (Short.toUnsignedInt(in.readShort()) << 8) | Byte.toUnsignedInt(in.readByte());
     }
   }
@@ -290,7 +407,8 @@ private static void readInts32(IndexInput in, int count, int[] docIDs) throws IO
    * Read {@code count} integers and feed the result directly to {@link
    * IntersectVisitor#visit(int)}.
    */
-  void readInts(IndexInput in, int count, IntersectVisitor visitor) throws IOException {
+  public void readInts(IndexInput in, int count, IntersectVisitor visitor, int[] buffer)
+      throws IOException {
     final int bpv = in.readByte();
     switch (bpv) {
       case CONTINUOUS_IDS:
@@ -300,10 +418,18 @@ void readInts(IndexInput in, int count, IntersectVisitor visitor) throws IOExcep
         readBitSet(in, count, visitor);
         break;
       case DELTA_BPV_16:
-        readDelta16(in, count, visitor);
+        if (version < BKDWriter.VERSION_VECTORIZED_DOCID) {
+          readLegacyDelta16(in, count, visitor);
+        } else {
+          readDelta16(in, count, visitor);
+        }
         break;
       case BPV_24:
-        readInts24(in, count, visitor);
+        if (version < BKDWriter.VERSION_VECTORIZED_DOCID) {
+          readScalarInts24(in, count, visitor);
+        } else {
+          readInts24(in, count, visitor, buffer);
+        }
         break;
       case BPV_32:
         readInts32(in, count, visitor);
@@ -348,8 +474,25 @@ private void readDelta16(IndexInput in, int count, IntersectVisitor visitor) thr
     visitor.visit(scratchIntsRef);
   }
 
-  private void readInts24(IndexInput in, int count, IntersectVisitor visitor) throws IOException {
-    readInts24(in, count, scratch);
+  private void readLegacyDelta16(IndexInput in, int count, IntersectVisitor visitor)
+      throws IOException {
+    readDelta16Legacy(in, count, scratch);
+    scratchIntsRef.ints = scratch;
+    scratchIntsRef.length = count;
+    visitor.visit(scratchIntsRef);
+  }
+
+  private void readInts24(IndexInput in, int count, IntersectVisitor visitor, int[] buffer)
+      throws IOException {
+    readInts24(in, count, buffer);
+    scratchIntsRef.ints = buffer;
+    scratchIntsRef.length = count;
+    visitor.visit(scratchIntsRef);
+  }
+
+  private void readScalarInts24(IndexInput in, int count, IntersectVisitor visitor)
+      throws IOException {
+    readScalarInts24(in, count, scratch, 0);
     scratchIntsRef.ints = scratch;
     scratchIntsRef.length = count;
     visitor.visit(scratchIntsRef);
diff --git a/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90PointsFormatV0.java b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90PointsFormatV0.java
new file mode 100644
index 000000000000..fd27eef04889
--- /dev/null
+++ b/lucene/core/src/test/org/apache/lucene/codecs/lucene90/TestLucene90PointsFormatV0.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.lucene.codecs.lucene90;
+
+import org.apache.lucene.codecs.Codec;
+import org.apache.lucene.codecs.PointsFormat;
+import org.apache.lucene.tests.codecs.asserting.AssertingCodec;
+import org.apache.lucene.tests.index.BasePointsFormatTestCase;
+
+public class TestLucene90PointsFormatV0 extends BasePointsFormatTestCase {
+
+  @Override
+  protected Codec getCodec() {
+    return new AssertingCodec() {
+      @Override
+      public PointsFormat pointsFormat() {
+        return new Lucene90PointsFormat(Lucene90PointsFormat.VERSION_START);
+      }
+    };
+  }
+}
diff --git a/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java b/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java
index 086eb460e56e..9d25c3527b7c 100644
--- a/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java
+++ b/lucene/core/src/test/org/apache/lucene/util/bkd/TestDocIdsWriter.java
@@ -36,6 +36,9 @@
 
 public class TestDocIdsWriter extends LuceneTestCase {
 
+  private static final int[] VERSIONS =
+      new int[] {BKDWriter.VERSION_META_FILE, BKDWriter.VERSION_CURRENT};
+
   public void testRandom() throws Exception {
     int numIters = atLeast(100);
     try (Directory dir = newDirectory()) {
@@ -113,7 +116,9 @@ public void testContinuousIds() throws Exception {
 
   private void test(Directory dir, int[] ints) throws Exception {
     final long len;
-    DocIdsWriter docIdsWriter = new DocIdsWriter(ints.length);
+    // It is hard to get BPV24-encoded docs in TextLuceneXXPointsFormat, test bwc here as well.
+    final int version = VERSIONS[random().nextInt(VERSIONS.length)];
+    DocIdsWriter docIdsWriter = new DocIdsWriter(ints.length, version);
     try (IndexOutput out = dir.createOutput("tmp", IOContext.DEFAULT)) {
       docIdsWriter.writeDocIds(ints, 0, ints.length, out);
       len = out.getFilePointer();
@@ -149,7 +154,8 @@ public void visit(int docID, byte[] packedValue) throws IOException {
             public Relation compare(byte[] minPackedValue, byte[] maxPackedValue) {
               throw new UnsupportedOperationException();
             }
-          });
+          },
+          new int[ints.length]);
       assertArrayEquals(ints, read);
       assertEquals(len, in.getFilePointer());
     }