Skip to content

Commit

Permalink
add json path constructor to term (#2367)
Browse files Browse the repository at this point in the history
  • Loading branch information
PSeitz authored Apr 22, 2024
1 parent 1417eaf commit 047da20
Show file tree
Hide file tree
Showing 5 changed files with 57 additions and 65 deletions.
38 changes: 9 additions & 29 deletions src/core/json_utils.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ use rustc_hash::FxHashMap;

use crate::postings::{IndexingContext, IndexingPosition, PostingsWriter};
use crate::schema::document::{ReferenceValue, ReferenceValueLeaf, Value};
use crate::schema::{Field, Type};
use crate::schema::Type;
use crate::time::format_description::well_known::Rfc3339;
use crate::time::{OffsetDateTime, UtcOffset};
use crate::tokenizer::TextAnalyzer;
Expand Down Expand Up @@ -349,44 +349,24 @@ pub(crate) fn encode_column_name(
path.into()
}

pub fn term_from_json_paths<'a>(
json_field: Field,
paths: impl Iterator<Item = &'a str>,
expand_dots_enabled: bool,
) -> Term {
let mut json_path = JsonPathWriter::with_expand_dots(expand_dots_enabled);
for path in paths {
json_path.push(path);
}
json_path.set_end();
let mut term = Term::with_type_and_field(Type::Json, json_field);

term.append_bytes(json_path.as_str().as_bytes());
term
}

#[cfg(test)]
mod tests {
use super::split_json_path;
use crate::json_utils::term_from_json_paths;
use crate::schema::Field;
use crate::Term;

#[test]
fn test_json_writer() {
let field = Field::from_field_id(1);

let mut term = term_from_json_paths(field, ["attributes", "color"].into_iter(), false);
let mut term = Term::from_field_json_path(field, "attributes.color", false);
term.append_type_and_str("red");
assert_eq!(
format!("{:?}", term),
"Term(field=1, type=Json, path=attributes.color, type=Str, \"red\")"
);

let mut term = term_from_json_paths(
field,
["attributes", "dimensions", "width"].into_iter(),
false,
);
let mut term = Term::from_field_json_path(field, "attributes.dimensions.width", false);
term.append_type_and_fast_value(400i64);
assert_eq!(
format!("{:?}", term),
Expand All @@ -397,7 +377,7 @@ mod tests {
#[test]
fn test_string_term() {
let field = Field::from_field_id(1);
let mut term = term_from_json_paths(field, ["color"].into_iter(), false);
let mut term = Term::from_field_json_path(field, "color", false);
term.append_type_and_str("red");

assert_eq!(term.serialized_term(), b"\x00\x00\x00\x01jcolor\x00sred")
Expand All @@ -406,7 +386,7 @@ mod tests {
#[test]
fn test_i64_term() {
let field = Field::from_field_id(1);
let mut term = term_from_json_paths(field, ["color"].into_iter(), false);
let mut term = Term::from_field_json_path(field, "color", false);
term.append_type_and_fast_value(-4i64);

assert_eq!(
Expand All @@ -418,7 +398,7 @@ mod tests {
#[test]
fn test_u64_term() {
let field = Field::from_field_id(1);
let mut term = term_from_json_paths(field, ["color"].into_iter(), false);
let mut term = Term::from_field_json_path(field, "color", false);
term.append_type_and_fast_value(4u64);

assert_eq!(
Expand All @@ -430,7 +410,7 @@ mod tests {
#[test]
fn test_f64_term() {
let field = Field::from_field_id(1);
let mut term = term_from_json_paths(field, ["color"].into_iter(), false);
let mut term = Term::from_field_json_path(field, "color", false);
term.append_type_and_fast_value(4.0f64);
assert_eq!(
term.serialized_term(),
Expand All @@ -441,7 +421,7 @@ mod tests {
#[test]
fn test_bool_term() {
let field = Field::from_field_id(1);
let mut term = term_from_json_paths(field, ["color"].into_iter(), false);
let mut term = Term::from_field_json_path(field, "color", false);
term.append_type_and_fast_value(true);
assert_eq!(
term.serialized_term(),
Expand Down
5 changes: 2 additions & 3 deletions src/core/tests.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
use crate::collector::Count;
use crate::directory::{RamDirectory, WatchCallback};
use crate::indexer::{LogMergePolicy, NoMergePolicy};
use crate::json_utils::term_from_json_paths;
use crate::query::TermQuery;
use crate::schema::{Field, IndexRecordOption, Schema, INDEXED, STRING, TEXT};
use crate::tokenizer::TokenizerManager;
Expand Down Expand Up @@ -417,7 +416,7 @@ fn test_non_text_json_term_freq() {
let segment_reader = searcher.segment_reader(0u32);
let inv_idx = segment_reader.inverted_index(field).unwrap();

let mut term = term_from_json_paths(field, ["tenant_id"].iter().cloned(), false);
let mut term = Term::from_field_json_path(field, "tenant_id", false);
term.append_type_and_fast_value(75u64);

let postings = inv_idx
Expand Down Expand Up @@ -451,7 +450,7 @@ fn test_non_text_json_term_freq_bitpacked() {
let segment_reader = searcher.segment_reader(0u32);
let inv_idx = segment_reader.inverted_index(field).unwrap();

let mut term = term_from_json_paths(field, ["tenant_id"].iter().cloned(), false);
let mut term = Term::from_field_json_path(field, "tenant_id", false);
term.append_type_and_fast_value(75u64);

let mut postings = inv_idx
Expand Down
37 changes: 17 additions & 20 deletions src/indexer/segment_writer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -498,7 +498,6 @@ mod tests {
use crate::collector::{Count, TopDocs};
use crate::directory::RamDirectory;
use crate::fastfield::FastValue;
use crate::json_utils::term_from_json_paths;
use crate::postings::TermInfo;
use crate::query::{PhraseQuery, QueryParser};
use crate::schema::document::Value;
Expand Down Expand Up @@ -647,9 +646,8 @@ mod tests {

let mut term_stream = term_dict.stream().unwrap();

let term_from_path = |paths: &[&str]| -> Term {
term_from_json_paths(json_field, paths.iter().cloned(), false)
};
let term_from_path =
|path: &str| -> Term { Term::from_field_json_path(json_field, path, false) };

fn set_fast_val<T: FastValue>(val: T, mut term: Term) -> Term {
term.append_type_and_fast_value(val);
Expand All @@ -660,23 +658,22 @@ mod tests {
term
}

let term = term_from_path(&["bool"]);
let term = term_from_path("bool");
assert!(term_stream.advance());
assert_eq!(
term_stream.key(),
set_fast_val(true, term).serialized_value_bytes()
);

let term = term_from_path(&["complexobject", "field.with.dot"]);

let term = term_from_path("complexobject.field\\.with\\.dot");
assert!(term_stream.advance());
assert_eq!(
term_stream.key(),
set_fast_val(1i64, term).serialized_value_bytes()
);

// Date
let term = term_from_path(&["date"]);
let term = term_from_path("date");

assert!(term_stream.advance());
assert_eq!(
Expand All @@ -691,65 +688,65 @@ mod tests {
);

// Float
let term = term_from_path(&["float"]);
let term = term_from_path("float");
assert!(term_stream.advance());
assert_eq!(
term_stream.key(),
set_fast_val(-0.2f64, term).serialized_value_bytes()
);

// Number In Array
let term = term_from_path(&["my_arr"]);
let term = term_from_path("my_arr");
assert!(term_stream.advance());
assert_eq!(
term_stream.key(),
set_fast_val(2i64, term).serialized_value_bytes()
);

let term = term_from_path(&["my_arr"]);
let term = term_from_path("my_arr");
assert!(term_stream.advance());
assert_eq!(
term_stream.key(),
set_fast_val(3i64, term).serialized_value_bytes()
);

let term = term_from_path(&["my_arr"]);
let term = term_from_path("my_arr");
assert!(term_stream.advance());
assert_eq!(
term_stream.key(),
set_fast_val(4i64, term).serialized_value_bytes()
);

// El in Array
let term = term_from_path(&["my_arr", "my_key"]);
let term = term_from_path("my_arr.my_key");
assert!(term_stream.advance());
assert_eq!(
term_stream.key(),
set_str("tokens", term).serialized_value_bytes()
);
let term = term_from_path(&["my_arr", "my_key"]);
let term = term_from_path("my_arr.my_key");
assert!(term_stream.advance());
assert_eq!(
term_stream.key(),
set_str("two", term).serialized_value_bytes()
);

// Signed
let term = term_from_path(&["signed"]);
let term = term_from_path("signed");
assert!(term_stream.advance());
assert_eq!(
term_stream.key(),
set_fast_val(-2i64, term).serialized_value_bytes()
);

let term = term_from_path(&["toto"]);
let term = term_from_path("toto");
assert!(term_stream.advance());
assert_eq!(
term_stream.key(),
set_str("titi", term).serialized_value_bytes()
);
// Unsigned
let term = term_from_path(&["unsigned"]);
let term = term_from_path("unsigned");
assert!(term_stream.advance());
assert_eq!(
term_stream.key(),
Expand All @@ -776,7 +773,7 @@ mod tests {
let searcher = reader.searcher();
let segment_reader = searcher.segment_reader(0u32);
let inv_index = segment_reader.inverted_index(json_field).unwrap();
let mut term = term_from_json_paths(json_field, ["mykey"].into_iter(), false);
let mut term = Term::from_field_json_path(json_field, "mykey", false);
term.append_type_and_str("token");
let term_info = inv_index.get_term_info(&term).unwrap().unwrap();
assert_eq!(
Expand Down Expand Up @@ -815,7 +812,7 @@ mod tests {
let searcher = reader.searcher();
let segment_reader = searcher.segment_reader(0u32);
let inv_index = segment_reader.inverted_index(json_field).unwrap();
let mut term = term_from_json_paths(json_field, ["mykey"].into_iter(), false);
let mut term = Term::from_field_json_path(json_field, "mykey", false);
term.append_type_and_str("two tokens");
let term_info = inv_index.get_term_info(&term).unwrap().unwrap();
assert_eq!(
Expand Down Expand Up @@ -856,7 +853,7 @@ mod tests {
let reader = index.reader().unwrap();
let searcher = reader.searcher();

let term = term_from_json_paths(json_field, ["mykey", "field"].into_iter(), false);
let term = Term::from_field_json_path(json_field, "mykey.field", false);

let mut hello_term = term.clone();
hello_term.append_type_and_str("hello");
Expand Down
14 changes: 3 additions & 11 deletions src/query/query_parser/query_parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,9 +11,7 @@ use rustc_hash::FxHashMap;

use super::logical_ast::*;
use crate::index::Index;
use crate::json_utils::{
convert_to_fast_value_and_append_to_json_term, split_json_path, term_from_json_paths,
};
use crate::json_utils::convert_to_fast_value_and_append_to_json_term;
use crate::query::range_query::{is_type_valid_for_fastfield_range_query, RangeQuery};
use crate::query::{
AllQuery, BooleanQuery, BoostQuery, EmptyQuery, FuzzyTermQuery, Occur, PhrasePrefixQuery,
Expand Down Expand Up @@ -966,14 +964,8 @@ fn generate_literals_for_json_object(
let index_record_option = text_options.index_option();
let mut logical_literals = Vec::new();

let paths = split_json_path(json_path);
let get_term_with_path = || {
term_from_json_paths(
field,
paths.iter().map(|el| el.as_str()),
json_options.is_expand_dots_enabled(),
)
};
let get_term_with_path =
|| Term::from_field_json_path(field, json_path, json_options.is_expand_dots_enabled());

// Try to convert the phrase to a fast value
if let Some(term) = convert_to_fast_value_and_append_to_json_term(get_term_with_path(), phrase)
Expand Down
28 changes: 26 additions & 2 deletions src/schema/term.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@ use std::{fmt, str};

use columnar::{MonotonicallyMappableToU128, MonotonicallyMappableToU64};
use common::json_path_writer::{JSON_END_OF_PATH, JSON_PATH_SEGMENT_SEP_STR};
use common::JsonPathWriter;

use super::date_time_options::DATE_TIME_PRECISION_INDEXED;
use super::Field;
use crate::fastfield::FastValue;
use crate::json_utils::split_json_path;
use crate::schema::{Facet, Type};
use crate::DateTime;

Expand All @@ -33,6 +35,28 @@ impl Term {
Term(data)
}

/// Creates a term from a json path.
///
/// The json path can address a nested value in a JSON object.
/// e.g. `{"k8s": {"node": {"id": 5}}}` can be addressed via `k8s.node.id`.
///
/// In case there are dots in the field name, and the `expand_dots_enabled` parameter is not
/// set they need to be escaped with a backslash.
/// e.g. `{"k8s.node": {"id": 5}}` can be addressed via `k8s\.node.id`.
pub fn from_field_json_path(field: Field, json_path: &str, expand_dots_enabled: bool) -> Term {
let paths = split_json_path(json_path);
let mut json_path = JsonPathWriter::with_expand_dots(expand_dots_enabled);
for path in paths {
json_path.push(&path);
}
json_path.set_end();
let mut term = Term::with_type_and_field(Type::Json, field);

term.append_bytes(json_path.as_str().as_bytes());

term
}

pub(crate) fn with_type_and_field(typ: Type, field: Field) -> Term {
let mut term = Self::with_capacity(8);
term.set_field_and_type(field, typ);
Expand Down Expand Up @@ -165,7 +189,7 @@ impl Term {
/// This is used in JSON type to append a fast value after the path.
///
/// It will not clear existing bytes.
pub(crate) fn append_type_and_fast_value<T: FastValue>(&mut self, val: T) {
pub fn append_type_and_fast_value<T: FastValue>(&mut self, val: T) {
self.0.push(T::to_type().to_code());
let value = if T::to_type() == Type::Date {
DateTime::from_u64(val.to_u64())
Expand All @@ -181,7 +205,7 @@ impl Term {
/// This is used in JSON type to append a str after the path.
///
/// It will not clear existing bytes.
pub(crate) fn append_type_and_str(&mut self, val: &str) {
pub fn append_type_and_str(&mut self, val: &str) {
self.0.push(Type::Str.to_code());
self.0.extend(val.as_bytes().as_ref());
}
Expand Down

0 comments on commit 047da20

Please sign in to comment.