Skip to content

Commit

Permalink
update ut
Browse files Browse the repository at this point in the history
  • Loading branch information
amorynan committed Dec 30, 2024
1 parent 036c0f2 commit 8019768
Show file tree
Hide file tree
Showing 2 changed files with 260 additions and 20 deletions.
223 changes: 203 additions & 20 deletions be/test/vec/data_types/data_type_array_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,8 +98,88 @@ class DataTypeArrayTest : public CommonDataTypeTest {
array_float, array_double, array_ipv4, array_ipv6, array_date,
array_datetime, array_datev2, array_datetimev2, array_varchar, array_decimal,
array_decimal64, array_decimal128, array_decimal256};
// array<array<tinyint>>
BaseInputTypeSet array_array_tinyint = {TypeIndex::Array, TypeIndex::Array,
TypeIndex::Int8};
// array<array<smallint>>
BaseInputTypeSet array_array_smallint = {TypeIndex::Array, TypeIndex::Array,
TypeIndex::Int16};
// array<array<int>>
BaseInputTypeSet array_array_int = {TypeIndex::Array, TypeIndex::Array, TypeIndex::Int32};
// array<array<bigint>>
BaseInputTypeSet array_array_bigint = {TypeIndex::Array, TypeIndex::Array,
TypeIndex::Int64};
// array<array<largeint>>
BaseInputTypeSet array_array_largeint = {TypeIndex::Array, TypeIndex::Array,
TypeIndex::Int128};
// array<array<float>>
BaseInputTypeSet array_array_float = {TypeIndex::Array, TypeIndex::Array,
TypeIndex::Float32};
// array<array<double>>
BaseInputTypeSet array_array_double = {TypeIndex::Array, TypeIndex::Array,
TypeIndex::Float64};
// array<array<ipv4>>
BaseInputTypeSet array_array_ipv4 = {TypeIndex::Array, TypeIndex::Array, TypeIndex::IPv4};
// array<array<ipv6>>
BaseInputTypeSet array_array_ipv6 = {TypeIndex::Array, TypeIndex::Array, TypeIndex::IPv6};
// array<array<date>>
BaseInputTypeSet array_array_date = {TypeIndex::Array, TypeIndex::Array, TypeIndex::Date};
// array<array<datetime>>
BaseInputTypeSet array_array_datetime = {TypeIndex::Array, TypeIndex::Array,
TypeIndex::DateTime};
// array<array<datev2>>
BaseInputTypeSet array_array_datev2 = {TypeIndex::Array, TypeIndex::Array,
TypeIndex::DateV2};
// array<array<datetimev2>>
BaseInputTypeSet array_array_datetimev2 = {TypeIndex::Array, TypeIndex::Array,
TypeIndex::DateTimeV2};
// array<array<varchar>>
BaseInputTypeSet array_array_varchar = {TypeIndex::Array, TypeIndex::Array,
TypeIndex::String};
// array<array<decimal32(9, 5)>> UT
BaseInputTypeSet array_array_decimal = {TypeIndex::Array, TypeIndex::Array,
TypeIndex::Decimal32};
// array<array<decimal64(18, 9)>> UT
BaseInputTypeSet array_array_decimal64 = {TypeIndex::Array, TypeIndex::Array,
TypeIndex::Decimal64};
// array<array<decimal128(38, 20)>> UT
BaseInputTypeSet array_array_decimal128 = {TypeIndex::Array, TypeIndex::Array,
TypeIndex::Decimal128V3};
// array<array<decimal256(76, 40)>> UT
BaseInputTypeSet array_array_decimal256 = {TypeIndex::Array, TypeIndex::Array,
TypeIndex::Decimal256};
// array<map<char,double>>
BaseInputTypeSet array_map_char_double = {TypeIndex::Array, TypeIndex::Map,
TypeIndex::String, TypeIndex::Float64};
// test_array_map<datetime,decimal<76,56>>.csv
BaseInputTypeSet array_map_datetime_decimal = {TypeIndex::Array, TypeIndex::Map,
TypeIndex::DateTime, TypeIndex::Decimal256};
// test_array_map<ipv4,ipv6>.csv
BaseInputTypeSet array_map_ipv4_ipv6 = {TypeIndex::Array, TypeIndex::Map, TypeIndex::IPv4,
TypeIndex::IPv6};
// test_array_map<largeInt,string>.csv
BaseInputTypeSet array_map_largeint_string = {TypeIndex::Array, TypeIndex::Map,
TypeIndex::Int128, TypeIndex::String};
// array<struct<f1:int,f2:date,f3:decimal,f4:string,f5:double,f6:ipv4,f7:ipv6>>
BaseInputTypeSet array_struct = {
TypeIndex::Array, TypeIndex::Struct, TypeIndex::Int32,
TypeIndex::Date, TypeIndex::Decimal32, TypeIndex::String,
TypeIndex::Float64, TypeIndex::IPv4, TypeIndex::IPv6};

array_descs.reserve(array_typeIndex.size());
std::vector<BaseInputTypeSet> array_array_typeIndex = {
array_array_tinyint, array_array_smallint, array_array_int,
array_array_bigint, array_array_largeint, array_array_float,
array_array_double, array_array_ipv4, array_array_ipv6,
array_array_date, array_array_datetime, array_array_datev2,
array_array_datetimev2, array_array_varchar, array_array_decimal,
array_array_decimal64, array_array_decimal128, array_array_decimal256};
std::vector<BaseInputTypeSet> array_map_typeIndex = {
array_map_char_double, array_map_datetime_decimal, array_map_ipv4_ipv6,
array_map_largeint_string};
std::vector<BaseInputTypeSet> array_struct_typeIndex = {array_struct};

array_descs.reserve(array_typeIndex.size() + array_array_typeIndex.size() +
array_map_typeIndex.size() + array_struct_typeIndex.size());
for (int i = 0; i < array_typeIndex.size(); i++) {
array_descs.push_back(ut_type::UTDataTypeDescs());
InputTypeSet input_types {};
Expand All @@ -108,12 +188,67 @@ class DataTypeArrayTest : public CommonDataTypeTest {
EXPECT_EQ(input_types[1].type(), &typeid(Nullable)) << "nested type is not nullable";
EXPECT_TRUE(parse_ut_data_type(input_types, array_descs[i]));
}
for (int i = 0; i < array_array_typeIndex.size(); i++) {
array_descs.push_back(ut_type::UTDataTypeDescs());
InputTypeSet input_types {};
input_types.push_back(array_array_typeIndex[i][0]);
input_types.push_back(Nullable {static_cast<TypeIndex>(array_array_typeIndex[i][1])});
input_types.push_back(Nullable {static_cast<TypeIndex>(array_array_typeIndex[i][2])});
EXPECT_EQ(input_types[1].type(), &typeid(Nullable)) << "nested type is not nullable";
EXPECT_EQ(input_types[2].type(), &typeid(Nullable)) << "nested type is not nullable";
EXPECT_TRUE(parse_ut_data_type(input_types, array_descs[i + array_typeIndex.size()]));
}

for (int i = 0; i < array_map_typeIndex.size(); i++) {
array_descs.push_back(ut_type::UTDataTypeDescs());
InputTypeSet input_types {};
input_types.push_back(array_map_typeIndex[i][0]); // array
input_types.push_back(
Nullable {static_cast<TypeIndex>(array_map_typeIndex[i][1])}); // map
input_types.push_back(
Nullable {static_cast<TypeIndex>(array_map_typeIndex[i][2])}); // key
input_types.push_back(
Nullable {static_cast<TypeIndex>(array_map_typeIndex[i][3])}); // val
EXPECT_EQ(input_types[1].type(), &typeid(Nullable)) << "nested type is not nullable";
EXPECT_EQ(input_types[2].type(), &typeid(Nullable)) << "nested type is not nullable";
EXPECT_TRUE(parse_ut_data_type(
input_types,
array_descs[i + array_typeIndex.size() + array_array_typeIndex.size()]));
}

for (int i = 0; i < array_struct_typeIndex.size(); i++) {
array_descs.push_back(ut_type::UTDataTypeDescs());
InputTypeSet input_types {};
input_types.push_back(array_struct_typeIndex[i][0]); // arr
input_types.push_back(
Nullable {static_cast<TypeIndex>(array_struct_typeIndex[i][1])}); // struct
input_types.push_back(
Nullable {static_cast<TypeIndex>(array_struct_typeIndex[i][2])}); // f1
input_types.push_back(
Nullable {static_cast<TypeIndex>(array_struct_typeIndex[i][3])}); // f2
input_types.push_back(
Nullable {static_cast<TypeIndex>(array_struct_typeIndex[i][4])}); // f3
input_types.push_back(
Nullable {static_cast<TypeIndex>(array_struct_typeIndex[i][5])}); // f4
input_types.push_back(
Nullable {static_cast<TypeIndex>(array_struct_typeIndex[i][6])}); // f5
input_types.push_back(
Nullable {static_cast<TypeIndex>(array_struct_typeIndex[i][7])}); // f6
input_types.push_back(
Nullable {static_cast<TypeIndex>(array_struct_typeIndex[i][8])}); // f7

EXPECT_EQ(input_types[1].type(), &typeid(Nullable)) << "nested type is not nullable";
EXPECT_TRUE(parse_ut_data_type(
input_types,
array_descs[i + array_typeIndex.size() + array_array_typeIndex.size() +
array_map_typeIndex.size()]));
}

// create column_array for each data type
// step2. according to the datatype to make column_array
// && load data from csv file into column_array
EXPECT_EQ(array_descs.size(), data_files.size());
for (int i = 0; i < array_typeIndex.size(); i++) {
for (int i = 0; i < array_descs.size(); i++) {
auto& desc = array_descs[i];
auto& data_file = data_files[i];
// first is array type
Expand All @@ -135,24 +270,48 @@ class DataTypeArrayTest : public CommonDataTypeTest {
}

std::string data_file_dir = "regression-test/data/nereids_function_p0/array/";
vector<string> data_files = {data_file_dir + "test_array_tinyint.csv",
data_file_dir + "test_array_smallint.csv",
data_file_dir + "test_array_int.csv",
data_file_dir + "test_array_bigint.csv",
data_file_dir + "test_array_largeint.csv",
data_file_dir + "test_array_float.csv",
data_file_dir + "test_array_double.csv",
data_file_dir + "test_array_ipv4.csv",
data_file_dir + "test_array_ipv6.csv",
data_file_dir + "test_array_date.csv",
data_file_dir + "test_array_datetime.csv",
data_file_dir + "test_array_date.csv",
data_file_dir + "test_array_datetimev2(6).csv",
data_file_dir + "test_array_varchar(65535).csv",
data_file_dir + "test_array_decimalv3(7,4).csv",
data_file_dir + "test_array_decimalv3(16,10).csv",
data_file_dir + "test_array_decimalv3(38,30).csv",
data_file_dir + "test_array_decimalv3(76,56).csv"};

vector<string> data_files = {
// array-scalar
data_file_dir + "test_array_tinyint.csv", data_file_dir + "test_array_smallint.csv",
data_file_dir + "test_array_int.csv", data_file_dir + "test_array_bigint.csv",
data_file_dir + "test_array_largeint.csv", data_file_dir + "test_array_float.csv",
data_file_dir + "test_array_double.csv", data_file_dir + "test_array_ipv4.csv",
data_file_dir + "test_array_ipv6.csv", data_file_dir + "test_array_date.csv",
data_file_dir + "test_array_datetime.csv", data_file_dir + "test_array_date.csv",
data_file_dir + "test_array_datetimev2(6).csv",
data_file_dir + "test_array_varchar(65535).csv",
data_file_dir + "test_array_decimalv3(7,4).csv",
data_file_dir + "test_array_decimalv3(16,10).csv",
data_file_dir + "test_array_decimalv3(38,30).csv",
data_file_dir + "test_array_decimalv3(76,56).csv",
// array-array
data_file_dir + "test_array_array_tinyint.csv",
data_file_dir + "test_array_array_smallint.csv",
data_file_dir + "test_array_array_int.csv",
data_file_dir + "test_array_array_bigint.csv",
data_file_dir + "test_array_array_largeint.csv",
data_file_dir + "test_array_array_float.csv",
data_file_dir + "test_array_array_double.csv",
data_file_dir + "test_array_array_ipv4.csv",
data_file_dir + "test_array_array_ipv6.csv",
data_file_dir + "test_array_array_date.csv",
data_file_dir + "test_array_array_datetime.csv",
data_file_dir + "test_array_array_date.csv",
data_file_dir + "test_array_array_datetimev2(5).csv",
data_file_dir + "test_array_array_varchar(65535).csv",
data_file_dir + "test_array_array_decimalv3(1,0).csv",
data_file_dir + "test_array_array_decimalv3(27,9).csv",
data_file_dir + "test_array_array_decimalv3(38,30).csv",
data_file_dir + "test_array_array_decimalv3(76,56).csv",
// array-map
data_file_dir + "test_array_map<char,double>.csv",
data_file_dir + "test_array_map<datetime,decimal<76,56>>.csv",
data_file_dir + "test_array_map<ipv4,ipv6>.csv",
data_file_dir + "test_array_map<largeInt,string>.csv",
// array-struct
data_file_dir + "test_array_struct.csv"};

vector<ut_type::UTDataTypeDescs> array_descs; // array<> descs matrix
MutableColumns array_columns; // column_array list
DataTypes array_types;
Expand Down Expand Up @@ -265,6 +424,7 @@ TEST_F(DataTypeArrayTest, SerdeHiveTextAndJsonFormatTest) {
}

TEST_F(DataTypeArrayTest, SerdePbTest) {
// fix serde pb for read decimal64 not support
CommonDataTypeSerdeTest::assert_pb_format(array_columns, serdes);
}

Expand All @@ -283,15 +443,38 @@ TEST_F(DataTypeArrayTest, SerdeArrowTest) {
for (int i = 0; i < 17; i++) {
array_cols.push_back(array_columns[i]->get_ptr());
}
for (int i = 18; i < 35; i++) {
array_cols.push_back(array_columns[i]->get_ptr());
}
array_cols.push_back(array_columns[36]->get_ptr());
DataTypes types;
for (int i = 0; i < 17; i++) {
types.push_back(array_types[i]);
}
for (int i = 18; i < 35; i++) {
types.push_back(array_types[i]);
}
types.push_back(array_types[36]);
DataTypeSerDeSPtrs serde;
for (int i = 0; i < 17; i++) {
serde.push_back(serdes[i]);
}
for (int i = 18; i < 35; i++) {
serde.push_back(serdes[i]);
}
serde.push_back(serdes[36]);
CommonDataTypeSerdeTest::assert_arrow_format(array_cols, serde, types);
{
for (int i = 38; i < 41; ++i) {
MutableColumns error_cols;
error_cols.push_back(array_columns[i]->get_ptr());
DataTypeSerDeSPtrs serde1;
serde1.push_back(serdes[i]);
DataTypes typ;
typ.push_back(array_types[i]);
EXPECT_ANY_THROW(CommonDataTypeSerdeTest::assert_arrow_format(error_cols, serde1, typ));
}
}
}

//================== datatype for array ut test ==================
Expand Down
57 changes: 57 additions & 0 deletions be/test/vec/function/function_test_util.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@
#include "vec/data_types/data_type_ipv4.h"
#include "vec/data_types/data_type_ipv6.h"
#include "vec/data_types/data_type_jsonb.h"
#include "vec/data_types/data_type_map.h"
#include "vec/data_types/data_type_string.h"
#include "vec/data_types/data_type_struct.h"
#include "vec/data_types/data_type_time_v2.h"
#include "vec/exprs/table_function/table_function.h"
#include "vec/runtime/vdatetime_value.h"
Expand Down Expand Up @@ -189,9 +191,61 @@ size_t type_index_to_data_type(const std::vector<AnyType>& input_types, size_t i
return ret;
}
desc.children.push_back(sub_desc.type_desc);
if (sub_desc.is_nullable) {
sub_type = make_nullable(sub_type);
}
type = std::make_shared<DataTypeArray>(sub_type);
return ret + 1;
}
case TypeIndex::Map: {
desc.type = doris::PrimitiveType::TYPE_MAP;
ut_type::UTDataTypeDesc key_desc;
DataTypePtr key_type = nullptr;
ut_type::UTDataTypeDesc value_desc;
DataTypePtr value_type = nullptr;
++index;
size_t ret = type_index_to_data_type(input_types, index, key_desc, key_type);
if (ret <= 0) {
return ret;
}
++index;
ret = type_index_to_data_type(input_types, index, value_desc, value_type);
if (ret <= 0) {
return ret;
}
desc.children.push_back(key_desc.type_desc);
desc.children.push_back(value_desc.type_desc);
if (key_desc.is_nullable) {
key_type = make_nullable(key_type);
}
if (value_desc.is_nullable) {
value_type = make_nullable(value_type);
}
type = std::make_shared<DataTypeMap>(key_type, value_type);
return ret + 1;
}
case TypeIndex::Struct: {
desc.type = doris::PrimitiveType::TYPE_STRUCT;
++index;
size_t ret = 0;
DataTypes sub_types;
while (index < input_types.size()) {
ut_type::UTDataTypeDesc sub_desc;
DataTypePtr sub_type = nullptr;
ret = type_index_to_data_type(input_types, index, sub_desc, sub_type);
if (ret <= 0) {
return ret;
}
desc.children.push_back(sub_desc.type_desc);
if (sub_desc.is_nullable) {
sub_type = make_nullable(sub_type);
sub_types.push_back(sub_type);
}
++index;
}
type = std::make_shared<DataTypeStruct>(sub_types);
return ret + 1;
}
case TypeIndex::Nullable: {
++index;
size_t ret = type_index_to_data_type(input_types, index, ut_desc, type);
Expand Down Expand Up @@ -220,6 +274,9 @@ bool parse_ut_data_type(const std::vector<AnyType>& input_types, ut_type::UTData
}
size_t res = type_index_to_data_type(input_types, i, desc, desc.data_type);
if (res <= 0) {
std::cout << "return error, res:" << res << ", i:" << i
<< ", input_types.size():" << input_types.size()
<< "desc : " << desc.type_desc.debug_string() << std::endl;
return false;
}
if (desc.is_nullable) {
Expand Down

0 comments on commit 8019768

Please sign in to comment.