20
20
#include < fmt/format.h>
21
21
#include < gen_cpp/Exprs_types.h>
22
22
#include < gen_cpp/Metrics_types.h>
23
+ #include < gen_cpp/Opcodes_types.h>
23
24
#include < gen_cpp/PaloInternalService_types.h>
24
25
#include < gen_cpp/PlanNodes_types.h>
26
+ #include < glog/logging.h>
25
27
28
+ #include < algorithm>
26
29
#include < boost/iterator/iterator_facade.hpp>
27
30
#include < iterator>
28
31
#include < map>
32
+ #include < ranges>
29
33
#include < tuple>
34
+ #include < unordered_map>
30
35
#include < utility>
31
36
32
37
#include " common/compiler_util.h" // IWYU pragma: keep
33
38
#include " common/config.h"
34
39
#include " common/logging.h"
35
40
#include " common/object_pool.h"
41
+ #include " common/status.h"
36
42
#include " io/cache/block/block_file_cache_profile.h"
37
43
#include " runtime/descriptors.h"
38
44
#include " runtime/runtime_state.h"
39
45
#include " runtime/types.h"
46
+ #include " util/runtime_profile.h"
40
47
#include " vec/aggregate_functions/aggregate_function.h"
41
48
#include " vec/columns/column.h"
42
49
#include " vec/columns/column_nullable.h"
67
74
#include " vec/exec/scan/vscan_node.h"
68
75
#include " vec/exprs/vexpr.h"
69
76
#include " vec/exprs/vexpr_context.h"
77
+ #include " vec/exprs/vexpr_fwd.h"
70
78
#include " vec/exprs/vslot_ref.h"
71
79
#include " vec/functions/function.h"
72
80
#include " vec/functions/function_string.h"
@@ -161,6 +169,8 @@ Status VFileScanner::prepare(
161
169
ADD_TIMER_WITH_LEVEL (_parent->_scanner_profile , " FileScannerPreFilterTimer" , 1 );
162
170
_convert_to_output_block_timer = ADD_TIMER_WITH_LEVEL (
163
171
_parent->_scanner_profile , " FileScannerConvertOuputBlockTime" , 1 );
172
+ _runtime_filter_partition_prune_timer = ADD_TIMER_WITH_LEVEL (
173
+ _parent->_scanner_profile , " FileScannerRuntimeFilterPartitionPruningTime" , 1 );
164
174
_empty_file_counter =
165
175
ADD_COUNTER_WITH_LEVEL (_parent->_scanner_profile , " EmptyFileNum" , TUnit::UNIT, 1 );
166
176
_not_found_file_counter = ADD_COUNTER_WITH_LEVEL (_parent->_scanner_profile ,
@@ -169,6 +179,9 @@ Status VFileScanner::prepare(
169
179
ADD_COUNTER_WITH_LEVEL (_parent->_scanner_profile , " FileNumber" , TUnit::UNIT, 1 );
170
180
_has_fully_rf_file_counter = ADD_COUNTER_WITH_LEVEL (_parent->_scanner_profile ,
171
181
" HasFullyRfFileNumber" , TUnit::UNIT, 1 );
182
+ _runtime_filter_partition_pruned_range_counter =
183
+ ADD_COUNTER_WITH_LEVEL (_parent->_scanner_profile ,
184
+ " RuntimeFilterPartitionPrunedRangeNum" , TUnit::UNIT, 1 );
172
185
} else {
173
186
_get_block_timer =
174
187
ADD_TIMER_WITH_LEVEL (_local_state->scanner_profile (), " FileScannerGetBlockTime" , 1 );
@@ -184,6 +197,8 @@ Status VFileScanner::prepare(
184
197
" FileScannerPreFilterTimer" , 1 );
185
198
_convert_to_output_block_timer = ADD_TIMER_WITH_LEVEL (
186
199
_local_state->scanner_profile (), " FileScannerConvertOuputBlockTime" , 1 );
200
+ _runtime_filter_partition_prune_timer = ADD_TIMER_WITH_LEVEL (
201
+ _local_state->scanner_profile (), " FileScannerRuntimeFilterPartitionPruningTime" , 1 );
187
202
_empty_file_counter = ADD_COUNTER_WITH_LEVEL (_local_state->scanner_profile (),
188
203
" EmptyFileNum" , TUnit::UNIT, 1 );
189
204
_not_found_file_counter = ADD_COUNTER_WITH_LEVEL (_local_state->scanner_profile (),
@@ -192,6 +207,9 @@ Status VFileScanner::prepare(
192
207
TUnit::UNIT, 1 );
193
208
_has_fully_rf_file_counter = ADD_COUNTER_WITH_LEVEL (_local_state->scanner_profile (),
194
209
" HasFullyRfFileNumber" , TUnit::UNIT, 1 );
210
+ _runtime_filter_partition_pruned_range_counter =
211
+ ADD_COUNTER_WITH_LEVEL (_local_state->scanner_profile (),
212
+ " RuntimeFilterPartitionPrunedRangeNum" , TUnit::UNIT, 1 );
195
213
}
196
214
197
215
_file_cache_statistics.reset (new io::FileCacheStatistics ());
@@ -231,6 +249,113 @@ Status VFileScanner::prepare(
231
249
return Status::OK ();
232
250
}
233
251
252
+ // check if the expr is a partition pruning expr
253
+ bool VFileScanner::_check_partition_prune_expr (const VExprSPtr& expr) {
254
+ if (expr->is_slot_ref ()) {
255
+ auto * slot_ref = static_cast <VSlotRef*>(expr.get ());
256
+ return _partition_slot_index_map.find (slot_ref->slot_id ()) !=
257
+ _partition_slot_index_map.end ();
258
+ }
259
+ if (expr->is_literal ()) {
260
+ return true ;
261
+ }
262
+ return std::ranges::all_of (expr->children (), [this ](const auto & child) {
263
+ return _check_partition_prune_expr (child);
264
+ });
265
+ }
266
+
267
+ void VFileScanner::_init_runtime_filter_partition_prune_ctxs () {
268
+ _runtime_filter_partition_prune_ctxs.clear ();
269
+ for (auto & conjunct : _conjuncts) {
270
+ auto impl = conjunct->root ()->get_impl ();
271
+ // If impl is not null, which means this a conjuncts from runtime filter.
272
+ auto expr = impl ? impl : conjunct->root ();
273
+ if (_check_partition_prune_expr (expr)) {
274
+ _runtime_filter_partition_prune_ctxs.emplace_back (conjunct);
275
+ }
276
+ }
277
+ }
278
+
279
+ void VFileScanner::_init_runtime_filter_partition_prune_block () {
280
+ // init block with empty column
281
+ for (auto const * slot_desc : _real_tuple_desc->slots ()) {
282
+ if (!slot_desc->need_materialize ()) {
283
+ // should be ignored from reading
284
+ continue ;
285
+ }
286
+ _runtime_filter_partition_prune_block.insert (
287
+ ColumnWithTypeAndName (slot_desc->get_empty_mutable_column (),
288
+ slot_desc->get_data_type_ptr (), slot_desc->col_name ()));
289
+ }
290
+ }
291
+
292
+ Status VFileScanner::_process_runtime_filters_partition_prune (bool & can_filter_all) {
293
+ SCOPED_TIMER (_runtime_filter_partition_prune_timer);
294
+ if (_runtime_filter_partition_prune_ctxs.empty () || _partition_col_descs.empty ()) {
295
+ return Status::OK ();
296
+ }
297
+ size_t partition_value_column_size = 1 ;
298
+
299
+ // 1. Get partition key values to string columns.
300
+ std::unordered_map<SlotId, MutableColumnPtr> parititon_slot_id_to_column;
301
+ for (auto const & partition_col_desc : _partition_col_descs) {
302
+ const auto & [partition_value, partition_slot_desc] = partition_col_desc.second ;
303
+ auto test_serde = partition_slot_desc->get_data_type_ptr ()->get_serde ();
304
+ auto partition_value_column = partition_slot_desc->get_data_type_ptr ()->create_column ();
305
+ auto * col_ptr = static_cast <IColumn*>(partition_value_column.get ());
306
+ Slice slice (partition_value.data (), partition_value.size ());
307
+ int num_deserialized = 0 ;
308
+ RETURN_IF_ERROR (test_serde->deserialize_column_from_fixed_json (
309
+ *col_ptr, slice, partition_value_column_size, &num_deserialized, {}));
310
+ parititon_slot_id_to_column[partition_slot_desc->id ()] = std::move (partition_value_column);
311
+ }
312
+
313
+ // 2. Fill _runtime_filter_partition_prune_block from the partition column, then execute conjuncts and filter block.
314
+ // 2.1 Fill _runtime_filter_partition_prune_block from the partition column to match the conjuncts executing.
315
+ size_t index = 0 ;
316
+ bool first_column_filled = false ;
317
+ for (auto const * slot_desc : _real_tuple_desc->slots ()) {
318
+ if (!slot_desc->need_materialize ()) {
319
+ // should be ignored from reading
320
+ continue ;
321
+ }
322
+ if (parititon_slot_id_to_column.find (slot_desc->id ()) !=
323
+ parititon_slot_id_to_column.end ()) {
324
+ auto data_type = slot_desc->get_data_type_ptr ();
325
+ auto partition_value_column = std::move (parititon_slot_id_to_column[slot_desc->id ()]);
326
+ if (data_type->is_nullable ()) {
327
+ _runtime_filter_partition_prune_block.insert (
328
+ index , ColumnWithTypeAndName (
329
+ ColumnNullable::create (
330
+ std::move (partition_value_column),
331
+ ColumnUInt8::create (partition_value_column_size, 0 )),
332
+ data_type, slot_desc->col_name ()));
333
+ } else {
334
+ _runtime_filter_partition_prune_block.insert (
335
+ index , ColumnWithTypeAndName (std::move (partition_value_column), data_type,
336
+ slot_desc->col_name ()));
337
+ }
338
+ if (index == 0 ) {
339
+ first_column_filled = true ;
340
+ }
341
+ }
342
+ index ++;
343
+ }
344
+
345
+ // 2.2 Execute conjuncts.
346
+ if (!first_column_filled) {
347
+ // VExprContext.execute has an optimization, the filtering is executed when block->rows() > 0
348
+ // The following process may be tricky and time-consuming, but we have no other way.
349
+ _runtime_filter_partition_prune_block.get_by_position (0 ).column ->assume_mutable ()->resize (
350
+ partition_value_column_size);
351
+ }
352
+ IColumn::Filter result_filter (_runtime_filter_partition_prune_block.rows (), 1 );
353
+ RETURN_IF_ERROR (VExprContext::execute_conjuncts (_runtime_filter_partition_prune_ctxs, nullptr ,
354
+ &_runtime_filter_partition_prune_block,
355
+ &result_filter, &can_filter_all));
356
+ return Status::OK ();
357
+ }
358
+
234
359
Status VFileScanner::_process_conjuncts_for_dict_filter () {
235
360
_slot_id_to_filter_conjuncts.clear ();
236
361
_not_single_slot_filter_conjuncts.clear ();
@@ -294,6 +419,11 @@ Status VFileScanner::open(RuntimeState* state) {
294
419
RETURN_IF_ERROR (_split_source->get_next (&_first_scan_range, &_current_range));
295
420
if (_first_scan_range) {
296
421
RETURN_IF_ERROR (_init_expr_ctxes ());
422
+ if (_state->query_options ().enable_runtime_filter_partition_prune &&
423
+ !_partition_slot_index_map.empty ()) {
424
+ _init_runtime_filter_partition_prune_ctxs ();
425
+ _init_runtime_filter_partition_prune_block ();
426
+ }
297
427
} else {
298
428
// there's no scan range in split source. stop scanner directly.
299
429
_scanner_eof = true ;
@@ -775,6 +905,29 @@ Status VFileScanner::_get_next_reader() {
775
905
const TFileRangeDesc& range = _current_range;
776
906
_current_range_path = range.path ;
777
907
908
+ if (!_partition_slot_descs.empty ()) {
909
+ // we need get partition columns first for runtime filter partition pruning
910
+ RETURN_IF_ERROR (_generate_parititon_columns ());
911
+
912
+ if (_state->query_options ().enable_runtime_filter_partition_prune ) {
913
+ // if enable_runtime_filter_partition_prune is true, we need to check whether this range can be filtered out
914
+ // by runtime filter partition prune
915
+ if (_push_down_conjuncts.size () < _conjuncts.size ()) {
916
+ // there are new runtime filters, need to re-init runtime filter partition pruning ctxs
917
+ _init_runtime_filter_partition_prune_ctxs ();
918
+ }
919
+
920
+ bool can_filter_all = false ;
921
+ RETURN_IF_ERROR (_process_runtime_filters_partition_prune (can_filter_all));
922
+ if (can_filter_all) {
923
+ // this range can be filtered out by runtime filter partition pruning
924
+ // so we need to skip this range
925
+ COUNTER_UPDATE (_runtime_filter_partition_pruned_range_counter, 1 );
926
+ continue ;
927
+ }
928
+ }
929
+ }
930
+
778
931
// create reader for specific format
779
932
Status init_status;
780
933
// for compatibility, if format_type is not set in range, use the format type of params
@@ -1018,7 +1171,8 @@ Status VFileScanner::_get_next_reader() {
1018
1171
_missing_cols.clear ();
1019
1172
RETURN_IF_ERROR (_cur_reader->get_columns (&_name_to_col_type, &_missing_cols));
1020
1173
_cur_reader->set_push_down_agg_type (_get_push_down_agg_type ());
1021
- RETURN_IF_ERROR (_generate_fill_columns ());
1174
+ RETURN_IF_ERROR (_generate_missing_columns ());
1175
+ RETURN_IF_ERROR (_cur_reader->set_fill_columns (_partition_col_descs, _missing_col_descs));
1022
1176
if (VLOG_NOTICE_IS_ON && !_missing_cols.empty () && _is_load) {
1023
1177
fmt::memory_buffer col_buf;
1024
1178
for (auto & col : _missing_cols) {
@@ -1048,10 +1202,8 @@ Status VFileScanner::_get_next_reader() {
1048
1202
return Status::OK ();
1049
1203
}
1050
1204
1051
- Status VFileScanner::_generate_fill_columns () {
1205
+ Status VFileScanner::_generate_parititon_columns () {
1052
1206
_partition_col_descs.clear ();
1053
- _missing_col_descs.clear ();
1054
-
1055
1207
const TFileRangeDesc& range = _current_range;
1056
1208
if (range.__isset .columns_from_path && !_partition_slot_descs.empty ()) {
1057
1209
for (const auto & slot_desc : _partition_slot_descs) {
@@ -1072,7 +1224,11 @@ Status VFileScanner::_generate_fill_columns() {
1072
1224
}
1073
1225
}
1074
1226
}
1227
+ return Status::OK ();
1228
+ }
1075
1229
1230
+ Status VFileScanner::_generate_missing_columns () {
1231
+ _missing_col_descs.clear ();
1076
1232
if (!_missing_cols.empty ()) {
1077
1233
for (auto slot_desc : _real_tuple_desc->slots ()) {
1078
1234
if (!slot_desc->is_materialized ()) {
@@ -1090,8 +1246,7 @@ Status VFileScanner::_generate_fill_columns() {
1090
1246
_missing_col_descs.emplace (slot_desc->col_name (), it->second );
1091
1247
}
1092
1248
}
1093
-
1094
- return _cur_reader->set_fill_columns (_partition_col_descs, _missing_col_descs);
1249
+ return Status::OK ();
1095
1250
}
1096
1251
1097
1252
Status VFileScanner::_init_expr_ctxes () {
0 commit comments