Skip to content

Commit

Permalink
[feature-wip](array-type) add function array_union/array_except/array…
Browse files Browse the repository at this point in the history
…_intersect (#10781)

Add array_union/array_except/array_intersect function.
  • Loading branch information
xy720 authored Jul 22, 2022
1 parent 9d21b21 commit 3744321
Show file tree
Hide file tree
Showing 18 changed files with 1,160 additions and 2 deletions.
3 changes: 3 additions & 0 deletions be/src/vec/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,9 @@ set(VEC_FILES
functions/array/function_arrays_overlap.cpp
functions/array/function_array_distinct.cpp
functions/array/function_array_remove.cpp
functions/array/function_array_union.cpp
functions/array/function_array_except.cpp
functions/array/function_array_intersect.cpp
exprs/table_function/vexplode_json_array.cpp
functions/math.cpp
functions/function_bitmap.cpp
Expand Down
73 changes: 73 additions & 0 deletions be/src/vec/functions/array/function_array_binary.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#pragma once

#include "vec/columns/column_array.h"
#include "vec/data_types/data_type_array.h"
#include "vec/data_types/data_type_number.h"
#include "vec/functions/array/function_array_utils.h"
#include "vec/functions/function.h"
#include "vec/functions/function_helpers.h"

namespace doris::vectorized {

// Functions with arguments is two arrays of the same element type.
template <typename Impl, typename Name>
class FunctionArrayBinary : public IFunction {
public:
static constexpr auto name = Name::name;
static FunctionPtr create() { return std::make_shared<FunctionArrayBinary>(); }
String get_name() const override { return name; }
bool is_variadic() const override { return false; }
size_t get_number_of_arguments() const override { return 2; }

DataTypePtr get_return_type_impl(const DataTypes& arguments) const override {
DCHECK(is_array(arguments[0])) << arguments[0]->get_name();
DCHECK(is_array(arguments[1])) << arguments[1]->get_name();
DCHECK(arguments[0]->equals(*arguments[1]))
<< "data type " << arguments[0]->get_name() << " not equal with "
<< arguments[1]->get_name();
return Impl::get_return_type(arguments);
}

Status execute_impl(FunctionContext* context, Block& block, const ColumnNumbers& arguments,
size_t result, size_t input_rows_count) override {
auto left_column =
block.get_by_position(arguments[0]).column->convert_to_full_column_if_const();
auto right_column =
block.get_by_position(arguments[1]).column->convert_to_full_column_if_const();
Status ret = Status::RuntimeError(
fmt::format("execute failed, unsupported types for function {}({}, {})", get_name(),
block.get_by_position(arguments[0]).type->get_name(),
block.get_by_position(arguments[1]).type->get_name()));
// extract array column
ColumnArrayExecutionData left_data;
ColumnArrayExecutionData right_data;
ColumnPtr res_ptr = nullptr;
if (extract_column_array_info(*left_column, left_data) &&
extract_column_array_info(*right_column, right_data)) {
ret = Impl::execute(res_ptr, left_data, right_data);
}
if (ret == Status::OK()) {
block.replace_by_position(result, std::move(res_ptr));
}
return ret;
}
};

} // namespace doris::vectorized
79 changes: 79 additions & 0 deletions be/src/vec/functions/array/function_array_except.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "vec/functions/array/function_array_binary.h"
#include "vec/functions/array/function_array_set.h"
#include "vec/functions/simple_function_factory.h"

namespace doris::vectorized {

struct NameArrayExcept {
static constexpr auto name = "array_except";
};

template <typename Set, typename Element>
struct ExceptAction {
// True if set has null element
bool null_flag = false;
// True if result_set has null element
bool result_null_flag = false;
// True if it should execute the left array first.
static constexpr auto execute_left_column_first = false;

// Handle Null element.
// Return true means this null element should put into result column.
template <bool is_left>
bool apply_null() {
if constexpr (is_left) {
if (!null_flag) {
null_flag = true;
return true;
}
} else {
if (!null_flag) {
null_flag = true;
}
}
return false;
}

// Handle Non-Null element.
// Return ture means this Non-Null element should put into result column.
template <bool is_left>
bool apply(Set& set, Set& result_set, const Element& elem) {
if constexpr (is_left) {
if (!set.find(elem)) {
set.insert(elem);
return true;
}
} else {
if (!set.find(elem)) {
set.insert(elem);
}
}
return false;
}
};

using FunctionArrayExcept =
FunctionArrayBinary<ArraySetImpl<SetOperation::EXCEPT>, NameArrayExcept>;

void register_function_array_except(SimpleFunctionFactory& factory) {
factory.register_function<FunctionArrayExcept>();
}

} // namespace doris::vectorized
79 changes: 79 additions & 0 deletions be/src/vec/functions/array/function_array_intersect.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.

#include "vec/functions/array/function_array_binary.h"
#include "vec/functions/array/function_array_set.h"
#include "vec/functions/simple_function_factory.h"

namespace doris::vectorized {

struct NameArrayIntersect {
static constexpr auto name = "array_intersect";
};

template <typename Set, typename Element>
struct IntersectAction {
// True if set has null element
bool null_flag = false;
// True if result_set has null element
bool result_null_flag = false;
// True if it should execute the left array first.
static constexpr auto execute_left_column_first = false;

// Handle Null element.
// Return true means this null element should put into result column.
template <bool is_left>
bool apply_null() {
if constexpr (is_left) {
if (!result_null_flag) {
result_null_flag = true;
return null_flag;
}
} else {
if (!null_flag) {
null_flag = true;
}
}
return false;
}

// Handle Non-Null element.
// Return ture means this Non-Null element should put into result column.
template <bool is_left>
bool apply(Set& set, Set& result_set, const Element& elem) {
if constexpr (is_left) {
if (set.find(elem) && !result_set.find(elem)) {
result_set.insert(elem);
return true;
}
} else {
if (!set.find(elem)) {
set.insert(elem);
}
}
return false;
}
};

using FunctionArrayIntersect =
FunctionArrayBinary<ArraySetImpl<SetOperation::INTERSECT>, NameArrayIntersect>;

void register_function_array_intersect(SimpleFunctionFactory& factory) {
factory.register_function<FunctionArrayIntersect>();
}

} // namespace doris::vectorized
6 changes: 6 additions & 0 deletions be/src/vec/functions/array/function_array_register.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,9 @@ void register_function_array_distinct(SimpleFunctionFactory&);
void register_function_array_remove(SimpleFunctionFactory&);
void register_function_array_sort(SimpleFunctionFactory&);
void register_function_arrays_overlap(SimpleFunctionFactory&);
void register_function_array_union(SimpleFunctionFactory&);
void register_function_array_except(SimpleFunctionFactory&);
void register_function_array_intersect(SimpleFunctionFactory&);

void register_function_array(SimpleFunctionFactory& factory) {
register_function_array_element(factory);
Expand All @@ -40,6 +43,9 @@ void register_function_array(SimpleFunctionFactory& factory) {
register_function_array_remove(factory);
register_function_array_sort(factory);
register_function_arrays_overlap(factory);
register_function_array_union(factory);
register_function_array_except(factory);
register_function_array_intersect(factory);
}

} // namespace doris::vectorized
Loading

0 comments on commit 3744321

Please sign in to comment.