Skip to content

Commit

Permalink
add variant inverted cases
Browse files Browse the repository at this point in the history
  • Loading branch information
amorynan committed Sep 5, 2024
1 parent 83e12a8 commit 8c133e7
Show file tree
Hide file tree
Showing 5 changed files with 338 additions and 0 deletions.
6 changes: 6 additions & 0 deletions be/src/vec/functions/array/function_array_index.h
Original file line number Diff line number Diff line change
Expand Up @@ -200,6 +200,12 @@ class FunctionArrayIndex : public IFunction {

Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) const override {
DBUG_EXECUTE_IF("array_contains.skip_expr", {
if (input_rows_count > 0) {
return Status::Error<ErrorCode::INTERNAL_ERROR>(
"array_contains.skip_expr is enabled, but here has input_rows_count {}");
}
});
return _execute_dispatch(block, arguments, result, input_rows_count);
}

Expand Down

Large diffs are not rendered by default.

Large diffs are not rendered by default.

Original file line number Diff line number Diff line change
@@ -0,0 +1,92 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("load") {

sql """ DROP TABLE IF EXISTS test_variant_inverted_index; """

sql """ CREATE TABLE IF NOT EXISTS test_variant_inverted_index_null
(
col_int_undef_signed_not_null int not null,
col_variant variant null,
col_variant_parser_eng variant null,
col_variant_parser_uni variant null,
INDEX col_variant_idx (`col_variant`) USING INVERTED,
INDEX col_variant_parser_eng_idx (`col_variant_parser_eng`) USING INVERTED PROPERTIES("parser" = "english"),
INDEX col_variant_parser_uni_idx (`col_variant_parser_uni`) USING INVERTED PROPERTIES("parser" = "unicode"),
) engine=olap
UNIQUE KEY(col_int_undef_signed_not_null)
distributed by hash(col_int_undef_signed_not_null)
properties("enable_unique_key_merge_on_write" = "true", "replication_num" = "1"); """



sql """ DROP TABLE IF EXISTS test_variant_inverted_index_not_null; """
sql """ CREATE TABLE IF NOT EXISTS test_variant_inverted_index_not_null
(
col_int_undef_signed_not_null int not null,
col_variant variant not null,
col_variant_parser_eng variant not null,
col_variant_parser_uni variant not null,
INDEX col_variant_idx (`col_variant`) USING INVERTED,
INDEX col_variant_parser_eng_idx (`col_variant_parser_eng`) USING INVERTED PROPERTIES("parser" = "english"),
INDEX col_variant_parser_uni_idx (`col_variant_parser_uni`) USING INVERTED PROPERTIES("parser" = "unicode"),
) UNIQUE KEY(col_int_undef_signed_not_null)
distributed by hash(col_int_undef_signed_not_null)
properties("enable_unique_key_merge_on_write" = "true", "replication_num" = "1"); """

// stream_load
def load_data = {table_name, strip_flag, read_flag, format_flag, exprs, json_root, json_paths,
where_expr, fuzzy_flag, column_sep, file_name ->
streamLoad {
table table_name

// set http request header params
set 'read_json_by_line', read_flag
set 'strip_outer_array', strip_flag
set 'format', format_flag
set 'columns', exprs
set 'jsonpaths', json_paths
set 'json_root', json_root
set 'where', where_expr
set 'fuzzy_parse', fuzzy_flag
set 'column_separator', column_sep
set 'max_filter_ratio', '0.6'
file file_name // import json file
time 10000 // limit inflight 10s

// if declared a check callback, the default check condition will ignore.
// So you must check all condition
check { result, exception, startTime, endTime ->
if (exception != null) {
throw exception
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
assertEquals("success", json.Status.toLowerCase())
assertEquals(json.NumberTotalRows, json.NumberLoadedRows + json.NumberUnselectedRows + json.NumberFilteredRows)
assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
}
}
}

// load data :table_name, strip_flag, read_flag, format_flag, exprs, json_root, json_paths,
// where_expr, fuzzy_flag, column_sep, file_name
def file="test_variant_inverted_index.csv"
load_data.call("test_variant_inverted_index_null", "false", "false", "csv", null, null, null, null, null, "\t", file)
load_data.call("test_variant_inverted_index_not_null", "false", "false", "csv", null, null, null, null, null, "\t", file)
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,145 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

suite("test_variant_query", "nonConcurrent"){

// Test case for match query with inverted index
// case1. simple string case in variant
def checkpoints_name = "match.invert_index_not_support_execute_match"
try {
GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name)
qt_sql_string1 """ select * from test_variant_inverted_index_null where col_variant['cell_type'] match 'markdown' order by col_int_undef_signed_not_null""";
} finally {
GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
}

try {
GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name)
qt_sql_string2 """ select * from test_variant_inverted_index_null where col_variant_parser_eng['cell_type'] match 'markdown' order by col_int_undef_signed_not_null""";
} finally {
GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
}

try {
GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name)
qt_sql_string3 """ select * from test_variant_inverted_index_null where col_variant_parser_uni['cell_type'] match 'markdown' order by col_int_undef_signed_not_null""";
} finally {
GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
}

// case2. simple int case in variant
try {
GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name)
qt_sql_int1 """ select * from test_variant_inverted_index_null where col_variant['execution_count'] in (7,8) order by col_int_undef_signed_not_null""";
} finally {
GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
}

try {
GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name)
qt_sql_int2 """ select * from test_variant_inverted_index_null where col_variant_parser_eng['execution_count'] in (7,8) order by col_int_undef_signed_not_null""";
} finally {
GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
}

try {
GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name)
qt_sql_int3 """ select * from test_variant_inverted_index_null where col_variant_parser_uni['execution_count'] in (7,8) order by col_int_undef_signed_not_null""";
} finally {
GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
}

// case2. array<string> case in variant
try {
GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name)
qt_sql_as1 """ select * from test_variant_inverted_index_null where array_contains(cast(col_variant['source'] as array<string>), "test_data.head()") order by col_int_undef_signed_not_null""";
} finally {
GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
}

try {
GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name)
qt_sql_as2 """ select * from test_variant_inverted_index_null where array_contains(cast(col_variant_parser_eng['source'] as array<string>), "test_data.head()") order by col_int_undef_signed_not_null""";
} finally {
GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
}

try {
GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name)
qt_sql_as3 """ select * from test_variant_inverted_index_null where array_contains(cast(col_variant_parser_uni['source'] as array<string>), "test_data.head()") order by col_int_undef_signed_not_null""";
} finally {
GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
}

// now we not support variant to extract array object element with inverted index to speed up query
// and inverted index can't support function in query like:
// mysql> select * from test_variant_inverted_index_null where json_extract(col_variant['outputs'], '$.[1].output_type') match 'execute_result';
//ERROR 1105 (HY000): errCode = 2, detailMessage = Only support match left operand is SlotRef, right operand is Literal. But meet expression (json_extract(cast(col_variant['outputs']#12 as VARCHAR(65533)), '$.[1].output_type') MATCH_ANY 'execute_result')
// // case3. array<object> case in variant
// // case3.1 array<object> for simple string case
// try {
// GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name)
// sql """ select * from test_variant_inverted_index_null where col_variant['output'][0]['output_type'] match 'execute_result' order by col_int_undef_signed_not_null""";
// } finally {
// GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
// }
//
// try {
// GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name)
// sql """ select * from test_variant_inverted_index_null where col_variant_parser_eng['output'][0]['output_type'] match 'mark' order by col_int_undef_signed_not_null""";
// } finally {
// GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
// }
//
// try {
// GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name)
// sql """ select * from test_variant_inverted_index_null where col_variant_parser_uni['output'][0]['output_type'] match 'mark' order by col_int_undef_signed_not_null""";
// } finally {
// GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
// }
//
// // case3.2 array<object> for simple int case
// try {
// GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name)
// sql """ select * from test_variant_inverted_index_null where col_variant['output'][0]['execution_count'] in (7,8) order by col_int_undef_signed_not_null""";
// } finally {
// GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
// }
//
// try {
// GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name)
// sql """ select * from test_variant_inverted_index_null where col_variant_parser_eng['output'][0]['execution_count'] in (7,8) order by col_int_undef_signed_not_null""";
// } finally {
// GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
// }
//
// try {
// GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name)
// sql """ select * from test_variant_inverted_index_null where col_variant_parser_uni['output'][0]['execution_count'] in (7,8) order by col_int_undef_signed_not_null""";
// } finally {
// GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
// }
//
// // case3.3 array<object> for array<string> case
// try {
// GetDebugPoint().enableDebugPointForAllBEs(checkpoints_name)
// sql """ select * from test_variant_inverted_index_null where array_contains(cast(col_variant['output'][0]['data']['text/html'] as array<string>), "<td>i loooooooovvvvvveee my kindle not that the dx...</td>\\n") order by col_int_undef_signed_not_null""";
// } finally {
// GetDebugPoint().disableDebugPointForAllBEs(checkpoints_name)
// }

}

0 comments on commit 8c133e7

Please sign in to comment.