Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions lucene/CHANGES.txt
Original file line number Diff line number Diff line change
Expand Up @@ -91,6 +91,8 @@ Improvements
* GITHUB#14239: Hunspell's option to tolerate affix rule count mismatches was
improved to tolerate more instances of this problem. (Robert Muir)

* GITHUB#14213: Allowing indexing stored-only StoredField directly from DataInput. (Tim Brooks)

Optimizations
---------------------

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.StoredFieldDataInput;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.ByteArrayDataInput;
Expand Down Expand Up @@ -290,7 +291,7 @@ private static void readField(DataInput in, StoredFieldVisitor visitor, FieldInf
switch (bits & TYPE_MASK) {
case BYTE_ARR:
int length = in.readVInt();
visitor.binaryField(info, in, length);
visitor.binaryField(info, new StoredFieldDataInput(in, length));
break;
case STRING:
visitor.stringField(info, in.readString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,8 @@
import org.apache.lucene.index.FieldInfo;
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.StoredFieldDataInput;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.util.Accountable;
import org.apache.lucene.util.BytesRef;

Expand Down Expand Up @@ -73,10 +73,11 @@ public void finishDocument() throws IOException {}
/** Writes a stored double value. */
public abstract void writeField(FieldInfo info, double value) throws IOException;

/** Writes a stored binary value from a {@link DataInput} and a {@code length}. */
public void writeField(FieldInfo info, DataInput value, int length) throws IOException {
/** Writes a stored binary value from a {@link StoredFieldDataInput}. */
public void writeField(FieldInfo info, StoredFieldDataInput value) throws IOException {
int length = value.length();
final byte[] bytes = new byte[length];
value.readBytes(bytes, 0, length);
value.getDataInput().readBytes(bytes, 0, length);
writeField(info, new BytesRef(bytes, 0, length));
}

Expand Down Expand Up @@ -191,8 +192,8 @@ public MergeVisitor(MergeState mergeState, int readerIndex) {
}

@Override
public void binaryField(FieldInfo fieldInfo, DataInput value, int length) throws IOException {
writeField(remap(fieldInfo), value, length);
public void binaryField(FieldInfo fieldInfo, StoredFieldDataInput value) throws IOException {
writeField(remap(fieldInfo), value);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
import org.apache.lucene.index.FieldInfos;
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.StoredFieldDataInput;
import org.apache.lucene.index.StoredFieldVisitor;
import org.apache.lucene.store.AlreadyClosedException;
import org.apache.lucene.store.ByteArrayDataInput;
Expand Down Expand Up @@ -261,7 +262,7 @@ private static void readField(DataInput in, StoredFieldVisitor visitor, FieldInf
switch (bits & TYPE_MASK) {
case BYTE_ARR:
int length = in.readVInt();
visitor.binaryField(info, in, length);
visitor.binaryField(info, new StoredFieldDataInput(in, length));
break;
case STRING:
visitor.stringField(info, in.readString());
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,9 +34,9 @@
import org.apache.lucene.index.IndexFileNames;
import org.apache.lucene.index.MergeState;
import org.apache.lucene.index.SegmentInfo;
import org.apache.lucene.index.StoredFieldDataInput;
import org.apache.lucene.store.ByteBuffersDataInput;
import org.apache.lucene.store.ByteBuffersDataOutput;
import org.apache.lucene.store.DataInput;
import org.apache.lucene.store.DataOutput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IOContext;
Expand Down Expand Up @@ -311,12 +311,13 @@ public void writeField(FieldInfo info, BytesRef value) throws IOException {
}

@Override
public void writeField(FieldInfo info, DataInput value, int length) throws IOException {
public void writeField(FieldInfo info, StoredFieldDataInput value) throws IOException {
int length = value.getLength();
++numStoredFieldsInDoc;
final long infoAndBits = (((long) info.number) << TYPE_BITS) | BYTE_ARR;
bufferedDocs.writeVLong(infoAndBits);
bufferedDocs.writeVInt(length);
bufferedDocs.copyBytes(value, length);
bufferedDocs.copyBytes(value.getDataInput(), length);
}

@Override
Expand Down
3 changes: 3 additions & 0 deletions lucene/core/src/java/org/apache/lucene/document/Field.java
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
import org.apache.lucene.index.IndexOptions;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.IndexableFieldType;
import org.apache.lucene.index.StoredFieldDataInput;
import org.apache.lucene.util.BytesRef;

/**
Expand Down Expand Up @@ -619,6 +620,8 @@ public StoredValue storedValue() {
return new StoredValue((double) fieldsData);
} else if (fieldsData instanceof BytesRef) {
return new StoredValue((BytesRef) fieldsData);
} else if (fieldsData instanceof StoredFieldDataInput) {
return new StoredValue((StoredFieldDataInput) fieldsData);
} else if (fieldsData instanceof String) {
return new StoredValue((String) fieldsData);
} else {
Expand Down
16 changes: 16 additions & 0 deletions lucene/core/src/java/org/apache/lucene/document/StoredField.java
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
package org.apache.lucene.document;

import org.apache.lucene.index.IndexReader; // javadocs
import org.apache.lucene.index.StoredFieldDataInput;
import org.apache.lucene.search.IndexSearcher; // javadocs
import org.apache.lucene.util.BytesRef;

Expand Down Expand Up @@ -105,6 +106,21 @@ public StoredField(String name, BytesRef value) {
super(name, value, TYPE);
}

/**
* Create a stored-only field with the given data input value.
*
* @param name field name
* @param value BytesRef pointing to binary content (not copied)
* @throws IllegalArgumentException if the field name or value is null.
*/
public StoredField(String name, StoredFieldDataInput value) {
super(name, TYPE);
if (value == null) {
throw new IllegalArgumentException("store field data input must not be null");
}
fieldsData = value;
}

/**
* Create a stored-only field with the given string value.
*
Expand Down
28 changes: 27 additions & 1 deletion lucene/core/src/java/org/apache/lucene/document/StoredValue.java
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@

import java.util.Objects;
import org.apache.lucene.index.IndexableField;
import org.apache.lucene.index.StoredFieldDataInput;
import org.apache.lucene.util.BytesRef;

/**
Expand All @@ -39,6 +40,8 @@ public enum Type {
DOUBLE,
/** Type of binary values. */
BINARY,
/** Type of data input values. */
DATA_INPUT,
/** Type of string values. */
STRING;
}
Expand All @@ -48,6 +51,7 @@ public enum Type {
private long longValue;
private float floatValue;
private double doubleValue;
private StoredFieldDataInput dataInput;
private BytesRef binaryValue;
private String stringValue;

Expand Down Expand Up @@ -81,7 +85,13 @@ public StoredValue(BytesRef value) {
binaryValue = Objects.requireNonNull(value);
}

/** Ctor for binary values. */
/** Ctor for data input values. */
public StoredValue(StoredFieldDataInput value) {
type = Type.DATA_INPUT;
dataInput = Objects.requireNonNull(value);
}

/** Ctor for string values. */
public StoredValue(String value) {
type = Type.STRING;
stringValue = Objects.requireNonNull(value);
Expand Down Expand Up @@ -132,6 +142,14 @@ public void setBinaryValue(BytesRef value) {
binaryValue = Objects.requireNonNull(value);
}

/** Set a data input value. */
public void setDataInputValue(StoredFieldDataInput value) {
if (type != Type.DATA_INPUT) {
throw new IllegalArgumentException("Cannot set a data input value on a " + type + " value");
}
dataInput = Objects.requireNonNull(value);
}

/** Set a string value. */
public void setStringValue(String value) {
if (type != Type.STRING) {
Expand Down Expand Up @@ -180,6 +198,14 @@ public BytesRef getBinaryValue() {
return binaryValue;
}

/** Retrieve a data input value. */
public StoredFieldDataInput getDataInputValue() {
if (type != Type.DATA_INPUT) {
throw new IllegalArgumentException("Cannot get a data input value on a " + type + " value");
}
return dataInput;
}

/** Retrieve a string value. */
public String getStringValue() {
if (type != Type.STRING) {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -138,8 +138,8 @@ private static class CopyVisitor extends StoredFieldVisitor {
}

@Override
public void binaryField(FieldInfo fieldInfo, DataInput value, int length) throws IOException {
writer.writeField(fieldInfo, value, length);
public void binaryField(FieldInfo fieldInfo, StoredFieldDataInput value) throws IOException {
writer.writeField(fieldInfo, value);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.lucene.index;

import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.DataInput;

/**
* A fixed size DataInput which includes the length of the input. For use as a StoredField.
*
* @param in the data input
* @param length the length of the data input
* @lucene.experimental
*/
public record StoredFieldDataInput(DataInput in, int length) {

/** Creates a StoredFieldDataInput from a ByteArrayDataInput */
public StoredFieldDataInput(ByteArrayDataInput byteArrayDataInput) {
this(byteArrayDataInput, byteArrayDataInput.length());
}

/** Returns the data input */
public DataInput getDataInput() {
return in;
}

/** Returns the length of the data input */
public int getLength() {
return length;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,6 @@
import java.io.IOException;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.DocumentStoredFieldVisitor;
import org.apache.lucene.store.DataInput;

/**
* Expert: provides a low-level means of accessing the stored field values in an index. See {@link
Expand All @@ -41,15 +40,17 @@ public abstract class StoredFieldVisitor {
protected StoredFieldVisitor() {}

/**
* Expert: Process a binary field directly from the {@link DataInput}. Implementors of this method
* must read {@code length} bytes from the given {@link DataInput}. The default implementation
* reads all byes in a newly created byte array and calls {@link #binaryField(FieldInfo, byte[])}.
* Expert: Process a binary field directly from the {@link StoredFieldDataInput}. Implementors of
* this method must read {@code StoredFieldDataInput#length} bytes from the given {@link
* StoredFieldDataInput}. The default implementation reads all bytes in a newly created byte array
* and calls {@link #binaryField(FieldInfo, byte[])}.
*
* @param value newly allocated byte array with the binary contents.
* @param value the stored field data input.
*/
public void binaryField(FieldInfo fieldInfo, DataInput value, int length) throws IOException {
public void binaryField(FieldInfo fieldInfo, StoredFieldDataInput value) throws IOException {
int length = value.length();
final byte[] data = new byte[length];
value.readBytes(data, 0, length);
value.getDataInput().readBytes(data, 0, value.getLength());
binaryField(fieldInfo, data);
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,9 @@ void writeField(FieldInfo info, StoredValue value) throws IOException {
case BINARY:
writer.writeField(info, value.getBinaryValue());
break;
case DATA_INPUT:
writer.writeField(info, value.getDataInputValue());
break;
case STRING:
writer.writeField(info, value.getStringValue());
break;
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@

import java.nio.charset.StandardCharsets;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.StoredFieldDataInput;
import org.apache.lucene.store.ByteArrayDataInput;
import org.apache.lucene.store.Directory;
import org.apache.lucene.tests.index.RandomIndexWriter;
import org.apache.lucene.tests.util.LuceneTestCase;
Expand Down Expand Up @@ -71,4 +73,48 @@ public void testBinaryFieldInIndex() throws Exception {
reader.close();
dir.close();
}

public void testBinaryFieldFromDataInputInIndex() throws Exception {
FieldType ft = new FieldType();
ft.setStored(true);
byte[] byteArray = binaryValStored.getBytes(StandardCharsets.UTF_8);
StoredFieldDataInput storedFieldDataInput =
new StoredFieldDataInput(new ByteArrayDataInput(byteArray));
StoredField binaryFldStored = new StoredField("binaryStored", storedFieldDataInput);
Field stringFldStored = new Field("stringStored", binaryValStored, ft);

Document doc = new Document();

doc.add(binaryFldStored);

doc.add(stringFldStored);

/* test for field count */
assertEquals(2, doc.getFields().size());

/* add the doc to a ram index */
Directory dir = newDirectory();
RandomIndexWriter writer = new RandomIndexWriter(random(), dir);
writer.addDocument(doc);

/* open a reader and fetch the document */
IndexReader reader = writer.getReader();
Document docFromReader = reader.storedFields().document(0);
assertTrue(docFromReader != null);

/* fetch the binary stored field and compare its content with the original one */
BytesRef bytes = docFromReader.getBinaryValue("binaryStored");
assertNotNull(bytes);
String binaryFldStoredTest =
new String(bytes.bytes, bytes.offset, bytes.length, StandardCharsets.UTF_8);
assertTrue(binaryFldStoredTest.equals(binaryValStored));

/* fetch the string field and compare its content with the original one */
String stringFldStoredTest = docFromReader.get("stringStored");
assertTrue(stringFldStoredTest.equals(binaryValStored));

writer.close();
reader.close();
dir.close();
}
}
Loading