From ab18b648d01d76fb37d32b67a059025f92f6ccd1 Mon Sep 17 00:00:00 2001 From: daidai Date: Mon, 27 Jan 2025 17:43:57 +0800 Subject: [PATCH 1/2] [enchement](schema change)Standardize the behavior after a table schema change. --- .../vec/exec/format/column_type_convert.cpp | 181 +- be/src/vec/exec/format/column_type_convert.h | 410 +++- be/src/vec/exec/format/orc/vorc_reader.cpp | 61 +- .../format/parquet/parquet_column_convert.cpp | 9 +- .../format/parquet/parquet_column_convert.h | 2 +- .../create_preinstalled_scripts/run75.hql | 515 +++++ .../orc_schema_change/origin_file.orc | Bin 0 -> 2533 bytes .../parquet_schema_change/origin_file.parquet | Bin 0 -> 3651 bytes .../hive/test_hive_schema_change_orc.out | 1611 ++++++++++++++++ .../hive/test_hive_schema_change_parquet.out | 1653 +++++++++++++++++ .../hive/test_hive_schema_change_orc.groovy | 1034 +++++++++++ .../test_hive_schema_change_parquet.groovy | 1034 +++++++++++ 12 files changed, 6395 insertions(+), 115 deletions(-) create mode 100644 docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run75.hql create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_schema_change/origin_file.orc create mode 100644 docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/parquet_table/parquet_schema_change/origin_file.parquet create mode 100644 regression-test/data/external_table_p0/hive/test_hive_schema_change_orc.out create mode 100644 regression-test/data/external_table_p0/hive/test_hive_schema_change_parquet.out create mode 100644 regression-test/suites/external_table_p0/hive/test_hive_schema_change_orc.groovy create mode 100644 regression-test/suites/external_table_p0/hive/test_hive_schema_change_parquet.groovy diff --git a/be/src/vec/exec/format/column_type_convert.cpp b/be/src/vec/exec/format/column_type_convert.cpp index 0442158b690c39..28ac8f098e1f60 100644 --- a/be/src/vec/exec/format/column_type_convert.cpp +++ b/be/src/vec/exec/format/column_type_convert.cpp @@ -19,8 +19,17 @@ namespace doris::vectorized::converter { +const std::set SafeCastString::FALSE_VALUES = {"false", "off", "no", "0", + ""}; + +#define FOR_LOGICAL_INTEGER_TYPES(M) \ + M(TYPE_TINYINT) \ + M(TYPE_SMALLINT) \ + M(TYPE_INT) \ + M(TYPE_BIGINT) \ + M(TYPE_LARGEINT) + #define FOR_LOGICAL_NUMERIC_TYPES(M) \ - M(TYPE_BOOLEAN) \ M(TYPE_TINYINT) \ M(TYPE_SMALLINT) \ M(TYPE_INT) \ @@ -30,7 +39,6 @@ namespace doris::vectorized::converter { M(TYPE_DOUBLE) #define FOR_LOGICAL_DECIMAL_TYPES(M) \ - M(TYPE_DECIMALV2) \ M(TYPE_DECIMAL32) \ M(TYPE_DECIMAL64) \ M(TYPE_DECIMAL128I) \ @@ -126,46 +134,70 @@ static std::unique_ptr _numeric_converter(const TypeDescrip PrimitiveType src_primitive_type = src_type.type; PrimitiveType dst_primitive_type = remove_nullable(dst_type)->get_type_as_type_descriptor().type; - switch (src_primitive_type) { -#define DISPATCH(SRC_PTYPE) \ - case SRC_PTYPE: { \ - switch (dst_primitive_type) { \ - case TYPE_BOOLEAN: \ - return std::make_unique>(); \ + + switch (dst_primitive_type) { +#define DISPATCH(DST_PTYPE) \ + case DST_PTYPE: { \ + switch (src_primitive_type) { \ case TYPE_TINYINT: \ - return std::make_unique>(); \ + return std::make_unique>(); \ case TYPE_SMALLINT: \ - return std::make_unique>(); \ + return std::make_unique>(); \ case TYPE_INT: \ - return std::make_unique>(); \ + return std::make_unique>(); \ case TYPE_BIGINT: \ - return std::make_unique>(); \ + return std::make_unique>(); \ case TYPE_LARGEINT: \ - return std::make_unique>(); \ - case TYPE_FLOAT: \ - return std::make_unique>(); \ - case TYPE_DOUBLE: \ - return std::make_unique>(); \ + return std::make_unique>(); \ default: \ return std::make_unique(src_type, dst_type); \ } \ } - FOR_LOGICAL_NUMERIC_TYPES(DISPATCH) + FOR_LOGICAL_INTEGER_TYPES(DISPATCH) +#undef DISPATCH + + case TYPE_FLOAT: { + switch (src_primitive_type) { +#define DISPATCH(SRC_PTYPE) \ + case SRC_PTYPE: { \ + return std::make_unique>(); \ + } + FOR_LOGICAL_INTEGER_TYPES(DISPATCH) +#undef DISPATCH + default: + return std::make_unique(src_type, dst_type); + } + } + + case TYPE_DOUBLE: { + switch (src_primitive_type) { +#define DISPATCH(SRC_PTYPE) \ + case SRC_PTYPE: { \ + return std::make_unique>(); \ + } + FOR_LOGICAL_NUMERIC_TYPES(DISPATCH) #undef DISPATCH + default: + return std::make_unique(src_type, dst_type); + } + } default: return std::make_unique(src_type, dst_type); } } +template static std::unique_ptr _to_string_converter(const TypeDescriptor& src_type, const DataTypePtr& dst_type) { PrimitiveType src_primitive_type = src_type.type; // numeric type to string, using native std::to_string - if (_is_numeric_type(src_primitive_type)) { + if (src_primitive_type == TYPE_BOOLEAN) { + return std::make_unique(); + } else if (_is_numeric_type(src_primitive_type)) { switch (src_primitive_type) { #define DISPATCH(SRC_PTYPE) \ case SRC_PTYPE: \ - return std::make_unique>(); + return std::make_unique>(); FOR_LOGICAL_NUMERIC_TYPES(DISPATCH) #undef DISPATCH default: @@ -195,14 +227,16 @@ static std::unique_ptr _to_string_converter(const TypeDescr return std::make_unique(src_type, dst_type); } +template static std::unique_ptr _from_string_converter(const TypeDescriptor& src_type, const DataTypePtr& dst_type) { PrimitiveType dst_primitive_type = remove_nullable(dst_type)->get_type_as_type_descriptor().type; switch (dst_primitive_type) { -#define DISPATCH(DST_PTYPE) \ - case DST_PTYPE: \ - return std::make_unique>(remove_nullable(dst_type)); +#define DISPATCH(DST_PTYPE) \ + case DST_PTYPE: \ + return std::make_unique>( \ + remove_nullable(dst_type)); FOR_ALL_LOGICAL_TYPES(DISPATCH) #undef DISPATCH default: @@ -216,24 +250,26 @@ static std::unique_ptr _numeric_to_decimal_converter( PrimitiveType dst_primitive_type = remove_nullable(dst_type)->get_type_as_type_descriptor().type; int scale = remove_nullable(dst_type)->get_scale(); + int precision = remove_nullable(dst_type)->get_precision(); switch (src_primitive_type) { -#define DISPATCH(SRC_PTYPE) \ - case SRC_PTYPE: { \ - switch (dst_primitive_type) { \ - case TYPE_DECIMALV2: \ - return std::make_unique>(scale); \ - case TYPE_DECIMAL32: \ - return std::make_unique>(scale); \ - case TYPE_DECIMAL64: \ - return std::make_unique>(scale); \ - case TYPE_DECIMAL128I: \ - return std::make_unique>( \ - scale); \ - case TYPE_DECIMAL256: \ - return std::make_unique>(scale); \ - default: \ - return std::make_unique(src_type, dst_type); \ - } \ +#define DISPATCH(SRC_PTYPE) \ + case SRC_PTYPE: { \ + switch (dst_primitive_type) { \ + case TYPE_DECIMAL32: \ + return std::make_unique>( \ + precision, scale); \ + case TYPE_DECIMAL64: \ + return std::make_unique>( \ + precision, scale); \ + case TYPE_DECIMAL128I: \ + return std::make_unique>( \ + precision, scale); \ + case TYPE_DECIMAL256: \ + return std::make_unique>( \ + precision, scale); \ + default: \ + return std::make_unique(src_type, dst_type); \ + } \ } FOR_LOGICAL_NUMERIC_TYPES(DISPATCH) #undef DISPATCH @@ -252,8 +288,6 @@ static std::unique_ptr _decimal_to_numeric_converter( #define DISPATCH(DST_PTYPE) \ case DST_PTYPE: { \ switch (src_primitive_type) { \ - case TYPE_DECIMALV2: \ - return std::make_unique>(scale); \ case TYPE_DECIMAL32: \ return std::make_unique>(scale); \ case TYPE_DECIMAL64: \ @@ -274,18 +308,57 @@ static std::unique_ptr _decimal_to_numeric_converter( } } -std::unique_ptr ColumnTypeConverter::get_converter( - const TypeDescriptor& src_type, const DataTypePtr& dst_type) { +static std::unique_ptr _decimal_converter(const TypeDescriptor& src_type, + const DataTypePtr& dst_type) { + int from_precision = src_type.precision; + int from_scale = src_type.scale; + int to_precision = remove_nullable(dst_type)->get_precision(); + int to_scale = remove_nullable(dst_type)->get_scale(); + + if (from_scale == to_scale && from_precision == to_precision) { + return std::make_unique(); + } + PrimitiveType src_primitive_type = src_type.type; PrimitiveType dst_primitive_type = remove_nullable(dst_type)->get_type_as_type_descriptor().type; - if (src_primitive_type == dst_primitive_type) { - return std::make_unique(); + switch (dst_primitive_type) { +#define DISPATCH(DST_PTYPE) \ + case DST_PTYPE: { \ + switch (src_primitive_type) { \ + case TYPE_DECIMAL32: \ + return std::make_unique>( \ + from_precision, from_scale, to_precision, to_scale); \ + case TYPE_DECIMAL64: \ + return std::make_unique>( \ + from_precision, from_scale, to_precision, to_scale); \ + case TYPE_DECIMAL128I: \ + return std::make_unique>( \ + from_precision, from_scale, to_precision, to_scale); \ + case TYPE_DECIMAL256: \ + return std::make_unique>( \ + from_precision, from_scale, to_precision, to_scale); \ + default: \ + return std::make_unique(src_type, dst_type); \ + } \ } - if (is_string_type(src_primitive_type) && is_string_type(dst_primitive_type)) { - return std::make_unique(); + FOR_LOGICAL_DECIMAL_TYPES(DISPATCH) +#undef DISPATCH + default: + return std::make_unique(src_type, dst_type); } +} + +std::unique_ptr ColumnTypeConverter::get_converter( + const TypeDescriptor& src_type, const DataTypePtr& dst_type, FileFormat file_format) { + PrimitiveType src_primitive_type = src_type.type; + PrimitiveType dst_primitive_type = + remove_nullable(dst_type)->get_type_as_type_descriptor().type; if (_is_decimal_type(src_primitive_type) && _is_decimal_type(dst_primitive_type)) { + return _decimal_converter(src_type, dst_type); + } + + if (src_primitive_type == dst_primitive_type) { return std::make_unique(); } @@ -298,13 +371,21 @@ std::unique_ptr ColumnTypeConverter::get_converter( // change to string type // example: decimal -> string if (is_string_type(dst_primitive_type)) { - return _to_string_converter(src_type, dst_type); + if (file_format == ORC) { + return _to_string_converter(src_type, dst_type); + } else { + return _to_string_converter(src_type, dst_type); + } } // string type to other type // example: string -> date if (is_string_type(src_primitive_type)) { - return _from_string_converter(src_type, dst_type); + if (file_format == ORC) { + return _from_string_converter(src_type, dst_type); + } else { + return _from_string_converter(src_type, dst_type); + } } // date to datetime, datetime to date diff --git a/be/src/vec/exec/format/column_type_convert.h b/be/src/vec/exec/format/column_type_convert.h index d4a8186549ab1d..3052d32aa1a84b 100644 --- a/be/src/vec/exec/format/column_type_convert.h +++ b/be/src/vec/exec/format/column_type_convert.h @@ -26,12 +26,30 @@ namespace doris::vectorized::converter { +enum FileFormat { COMMON, ORC, PARQUET }; + template -constexpr bool is_decimal_type_const() { +constexpr bool is_decimal_type() { return type == TYPE_DECIMALV2 || type == TYPE_DECIMAL32 || type == TYPE_DECIMAL64 || type == TYPE_DECIMAL128I || type == TYPE_DECIMAL256; } +template +constexpr bool is_integer_type() { + return type == TYPE_INT || type == TYPE_TINYINT || type == TYPE_SMALLINT || + type == TYPE_BIGINT || type == TYPE_LARGEINT; +} + +template +constexpr bool is_real_type() { + return type == TYPE_FLOAT || type == TYPE_DOUBLE; +} + +template +constexpr bool is_numeric_type() { + return is_integer_type() || is_real_type(); +} + /** * Unified schema change interface for all format readers: * @@ -55,7 +73,8 @@ class ColumnTypeConverter { * @param dst_type column type from FE planner(the changed column type) */ static std::unique_ptr get_converter(const TypeDescriptor& src_type, - const DataTypePtr& dst_type); + const DataTypePtr& dst_type, + FileFormat file_format); ColumnTypeConverter() = default; virtual ~ColumnTypeConverter() = default; @@ -123,11 +142,14 @@ class UnsupportedConverter : public ColumnTypeConverter { }; template -class NumericToNumericConverter : public ColumnTypeConverter { + requires(is_integer_type() && is_integer_type()) +class IntegerToIntegerConverter : public ColumnTypeConverter { +public: Status convert(ColumnPtr& src_col, MutableColumnPtr& dst_col) override { using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; - using DstCppType = typename PrimitiveTypeTraits::CppType; + using SrcCppType = typename PrimitiveTypeTraits::CppType; using DstColumnType = typename PrimitiveTypeTraits::ColumnType; + using DstCppType = typename PrimitiveTypeTraits::CppType; ColumnPtr from_col = remove_nullable(src_col); MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); @@ -137,29 +159,140 @@ class NumericToNumericConverter : public ColumnTypeConverter { to_col->resize(start_idx + rows); auto& data = static_cast(*to_col.get()).get_data(); for (int i = 0; i < rows; ++i) { + if constexpr (sizeof(DstCppType) < sizeof(SrcCppType)) { + SrcCppType src_value = src_data[i]; + if ((SrcCppType)std::numeric_limits::min() > src_value || + src_value > (SrcCppType)std::numeric_limits::max()) { + return Status::InternalError("Failed to cast value '{}' to {} column", + src_value, dst_col->get_name()); + } + } + data[start_idx + i] = static_cast(src_data[i]); } + return Status::OK(); + } +}; + +template + requires(is_numeric_type() && is_real_type()) +class NumericToFloatPointConverter : public ColumnTypeConverter { + static constexpr long MIN_EXACT_DOUBLE = -(1L << 52); // -2^52 + static constexpr long MAX_EXACT_DOUBLE = (1L << 52) - 1; // 2^52 - 1 + static constexpr long MIN_EXACT_FLOAT = -(1L << 23); // -2^23 + static constexpr long MAX_EXACT_FLOAT = (1L << 23) - 1; // 2^23 - 1 + + bool overflow(typename PrimitiveTypeTraits::CppType value) const { + if constexpr (DstPrimitiveType == TYPE_DOUBLE) { + return value < MIN_EXACT_DOUBLE || value > MAX_EXACT_DOUBLE; + } else if constexpr (DstPrimitiveType == TYPE_FLOAT) { + return value < MIN_EXACT_FLOAT || value > MAX_EXACT_FLOAT; + } + return true; // Default case, should not occur + } + +public: + Status convert(ColumnPtr& src_col, MutableColumnPtr& dst_col) override { + using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; + using SrcCppType = typename PrimitiveTypeTraits::CppType; + using DstColumnType = typename PrimitiveTypeTraits::ColumnType; + using DstCppType = typename PrimitiveTypeTraits::CppType; + ColumnPtr from_col = remove_nullable(src_col); + MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + + NullMap* null_map = nullptr; + if (dst_col->is_nullable()) { + null_map = + &static_cast(dst_col.get())->get_null_map_data(); + } + + size_t rows = from_col->size(); + auto& src_data = static_cast(from_col.get())->get_data(); + size_t start_idx = to_col->size(); + to_col->resize(start_idx + rows); + auto& data = static_cast(*to_col.get()).get_data(); + for (int i = 0; i < rows; ++i) { + SrcCppType src_value = src_data[i]; + if constexpr (is_integer_type()) { + if (overflow(src_value)) { + if (null_map == nullptr) { + return Status::InternalError("Failed to cast value '{}' to {} column", + src_value, dst_col->get_name()); + } else { + (*null_map)[start_idx + i] = 1; + } + } + } + + data[start_idx + i] = static_cast(src_value); + } + return Status::OK(); + } +}; +class BooleanToStringConverter : public ColumnTypeConverter { +public: + Status convert(ColumnPtr& src_col, MutableColumnPtr& dst_col) override { + using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; + ColumnPtr from_col = remove_nullable(src_col); + MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + + size_t rows = from_col->size(); + auto& src_data = static_cast(from_col.get())->get_data(); + auto& string_col = static_cast(*to_col.get()); + for (int i = 0; i < rows; ++i) { + std::string value = src_data[i] != 0 ? "TRUE" : "FALSE"; + string_col.insert_data(value.data(), value.size()); + } return Status::OK(); } }; -template +template + requires(is_numeric_type()) class NumericToStringConverter : public ColumnTypeConverter { +private: +public: Status convert(ColumnPtr& src_col, MutableColumnPtr& dst_col) override { using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; ColumnPtr from_col = remove_nullable(src_col); MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + NullMap* null_map = nullptr; + if (dst_col->is_nullable()) { + null_map = &reinterpret_cast(dst_col.get()) + ->get_null_map_data(); + } + size_t rows = from_col->size(); + size_t start_idx = to_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); auto& string_col = static_cast(*to_col.get()); for (int i = 0; i < rows; ++i) { - if constexpr (SrcPrimitiveType == TYPE_LARGEINT) { - string value = int128_to_string(src_data[i]); - string_col.insert_data(value.data(), value.size()); + if constexpr (SrcPrimitiveType == TYPE_FLOAT || SrcPrimitiveType == TYPE_DOUBLE) { + if (fileFormat == FileFormat::ORC && std::isnan(src_data[i])) { + if (null_map == nullptr) { + return Status::InternalError("Failed to cast value '{}' to {} column", + src_data[i], dst_col->get_name()); + } else { + (*null_map)[start_idx + i] = 1; + } + } + char buf[128]; + int strlen; + if constexpr (SrcPrimitiveType == TYPE_FLOAT) { + strlen = FastFloatToBuffer(src_data[i], buf); + } else { + strlen = FastDoubleToBuffer(src_data[i], buf); + } + string_col.insert_data(buf, strlen); } else { - string value = std::to_string(src_data[i]); + std::string value; + if constexpr (SrcPrimitiveType == TYPE_LARGEINT) { + value = int128_to_string(src_data[i]); + } else { + value = std::to_string(src_data[i]); + } string_col.insert_data(value.data(), value.size()); } } @@ -193,8 +326,9 @@ class DecimalToStringConverter : public ColumnTypeConverter { } }; -template +template class TimeToStringConverter : public ColumnTypeConverter { +public: Status convert(ColumnPtr& src_col, MutableColumnPtr& dst_col) override { using SrcCppType = typename PrimitiveTypeTraits::CppType; using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; @@ -214,16 +348,33 @@ class TimeToStringConverter : public ColumnTypeConverter { } }; -template +template struct SafeCastString {}; template <> struct SafeCastString { + // Ref: https://github.com/apache/hive/blob/4df4d75bf1e16fe0af75aad0b4179c34c07fc975/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/primitive/PrimitiveObjectInspectorUtils.java#L559 + static const std::set FALSE_VALUES; static bool safe_cast_string(const char* startptr, const int buffer_size, PrimitiveTypeTraits::ColumnType::value_type* value) { - int32 cast_to_int = 0; - bool can_cast = safe_strto32(startptr, buffer_size, &cast_to_int); - *value = cast_to_int == 0 ? 0 : 1; + std::string str_value(startptr, buffer_size); + std::transform(str_value.begin(), str_value.end(), str_value.begin(), ::tolower); + bool is_false = (FALSE_VALUES.find(str_value) != FALSE_VALUES.end()); + *value = is_false ? 0 : 1; + return true; + } +}; + +//Apache Hive reads 0 as false, numeric string as true and non-numeric string as null for ORC file format +// https://github.com/apache/orc/blob/fb1c4cb9461d207db652fc253396e57640ed805b/java/core/src/java/org/apache/orc/impl/ConvertTreeReaderFactory.java#L567 +template <> +struct SafeCastString { + static bool safe_cast_string(const char* startptr, const int buffer_size, + PrimitiveTypeTraits::ColumnType::value_type* value) { + std::string str_value(startptr, buffer_size); + int64 cast_to_long = 0; + bool can_cast = safe_strto64(startptr, buffer_size, &cast_to_long); + *value = cast_to_long == 0 ? 0 : 1; return can_cast; } }; @@ -285,23 +436,34 @@ struct SafeCastString { } }; -template <> -struct SafeCastString { +template +struct SafeCastString { static bool safe_cast_string(const char* startptr, const int buffer_size, PrimitiveTypeTraits::ColumnType::value_type* value) { float cast_to_float = 0; bool can_cast = safe_strtof(std::string(startptr, buffer_size), &cast_to_float); + if (can_cast && fileFormat == ORC) { + // Apache Hive reads Float.NaN as null when coerced to varchar for ORC file format. + if (std::isnan(cast_to_float)) { + return false; + } + } *value = cast_to_float; return can_cast; } }; -template <> -struct SafeCastString { +template +struct SafeCastString { static bool safe_cast_string(const char* startptr, const int buffer_size, PrimitiveTypeTraits::ColumnType::value_type* value) { double cast_to_double = 0; bool can_cast = safe_strtod(std::string(startptr, buffer_size), &cast_to_double); + if (can_cast && fileFormat == ORC) { + if (std::isnan(cast_to_double)) { + return false; + } + } *value = cast_to_double; return can_cast; } @@ -357,7 +519,7 @@ struct SafeCastDecimalString { } }; -template +template class CastStringConverter : public ColumnTypeConverter { private: DataTypePtr _dst_type_desc; @@ -387,17 +549,21 @@ class CastStringConverter : public ColumnTypeConverter { DstCppType& value = data[start_idx + i]; auto string_value = string_col.get_data_at(i); bool can_cast = false; - if constexpr (is_decimal_type_const()) { + if constexpr (is_decimal_type()) { can_cast = SafeCastDecimalString::safe_cast_string( string_value.data, string_value.size, &value, _dst_type_desc->get_precision(), _dst_type_desc->get_scale()); } else if constexpr (DstPrimitiveType == TYPE_DATETIMEV2) { can_cast = SafeCastString::safe_cast_string( string_value.data, string_value.size, &value, _dst_type_desc->get_scale()); + } else if constexpr (DstPrimitiveType == TYPE_BOOLEAN && fileFormat == ORC) { + can_cast = SafeCastString::safe_cast_string( + string_value.data, string_value.size, &value); } else { can_cast = SafeCastString::safe_cast_string( string_value.data, string_value.size, &value); } + if (!can_cast) { if (null_map == nullptr) { return Status::InternalError("Failed to cast string '{}' to not null column", @@ -415,6 +581,7 @@ class CastStringConverter : public ColumnTypeConverter { // only support date & datetime v2 template class TimeV2Converter : public ColumnTypeConverter { +public: Status convert(ColumnPtr& src_col, MutableColumnPtr& dst_col) override { using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; using DstColumnType = typename PrimitiveTypeTraits::ColumnType; @@ -442,61 +609,95 @@ class TimeV2Converter : public ColumnTypeConverter { }; template + requires(is_numeric_type() && is_decimal_type()) class NumericToDecimalConverter : public ColumnTypeConverter { private: + int _precision; int _scale; public: - NumericToDecimalConverter(int scale) : _scale(scale) {} + NumericToDecimalConverter(int precision, int scale) : _precision(precision), _scale(scale) {} Status convert(ColumnPtr& src_col, MutableColumnPtr& dst_col) override { using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; + using SrcCppType = typename PrimitiveTypeTraits::CppType; using DstColumnType = typename PrimitiveTypeTraits::ColumnType; using DstNativeType = typename PrimitiveTypeTraits::ColumnType::value_type::NativeType; - using DstCppType = typename PrimitiveTypeTraits::ColumnType::value_type; + using DstDorisType = typename PrimitiveTypeTraits::ColumnType::value_type; ColumnPtr from_col = remove_nullable(src_col); MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + NullMap* null_map = nullptr; + if (dst_col->is_nullable()) { + null_map = &reinterpret_cast(dst_col.get()) + ->get_null_map_data(); + } + size_t rows = from_col->size(); auto& src_data = static_cast(from_col.get())->get_data(); size_t start_idx = to_col->size(); to_col->resize(start_idx + rows); auto& data = static_cast(*to_col.get()).get_data(); - int64_t scale_factor = 1; - if (_scale > DecimalV2Value::SCALE) { - scale_factor = common::exp10_i64(_scale - DecimalV2Value::SCALE); - } else if (_scale < DecimalV2Value::SCALE) { - scale_factor = common::exp10_i64(DecimalV2Value::SCALE - _scale); - } + + auto max_result = DataTypeDecimal::get_max_digits_number(_precision); + auto multiplier = DataTypeDecimal::get_scale_multiplier(_scale).value; for (int i = 0; i < rows; ++i) { - if constexpr (SrcPrimitiveType == TYPE_FLOAT || SrcPrimitiveType == TYPE_DOUBLE) { - DecimalV2Value decimal_value; - if constexpr (SrcPrimitiveType == TYPE_FLOAT) { - decimal_value.assign_from_float(src_data[i]); - } else { - decimal_value.assign_from_double(src_data[i]); + const SrcCppType& src_value = src_data[i]; + DstDorisType& res = data[start_idx + i]; + + if constexpr (is_integer_type()) { + if constexpr (sizeof(DstNativeType) < sizeof(SrcCppType)) { + if (src_value > std::numeric_limits::max() || + src_value < std::numeric_limits::min()) { + if (null_map == nullptr) { + return Status::InternalError("Failed to cast value '{}' to {} column", + src_data[i], dst_col->get_name()); + } else { + (*null_map)[start_idx + i] = 1; + } + } } - int128_t decimal_int128 = reinterpret_cast(decimal_value); - if (_scale > DecimalV2Value::SCALE) { - decimal_int128 *= scale_factor; - } else if (_scale < DecimalV2Value::SCALE) { - decimal_int128 /= scale_factor; + if (common::mul_overflow(static_cast(src_value), multiplier, + res.value)) { + if (null_map == nullptr) { + return Status::InternalError("Failed to cast value '{}' to {} column", + src_data[i], dst_col->get_name()); + } else { + (*null_map)[start_idx + i] = 1; + } + } else { + if (res.value > max_result.value || res.value < -max_result.value) { + if (null_map == nullptr) { + return Status::InternalError("Failed to cast value '{}' to {} column", + src_data[i], dst_col->get_name()); + } else { + (*null_map)[start_idx + i] = 1; + } + } } - auto& v = reinterpret_cast(data[start_idx + i]); - v = (DstNativeType)decimal_int128; } else { - data[start_idx + i] = DstCppType::from_int_frac(src_data[i], 0, _scale); + res = static_cast(src_value * multiplier); + if (UNLIKELY(!std::isfinite(src_value) || + src_value * multiplier > max_result.value || + src_value * multiplier < -max_result.value)) { + if (null_map == nullptr) { + return Status::InternalError("Failed to cast value '{}' to {} column", + src_data[i], dst_col->get_name()); + } else { + (*null_map)[start_idx + i] = 1; + } + } } } - return Status::OK(); } }; template + requires(is_numeric_type() && is_decimal_type()) class DecimalToNumericConverter : public ColumnTypeConverter { private: int _scale; @@ -506,6 +707,8 @@ class DecimalToNumericConverter : public ColumnTypeConverter { Status convert(ColumnPtr& src_col, MutableColumnPtr& dst_col) override { using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; + using SrcNativeType = + typename PrimitiveTypeTraits::ColumnType::value_type::NativeType; using DstColumnType = typename PrimitiveTypeTraits::ColumnType; using DstCppType = typename PrimitiveTypeTraits::CppType; @@ -518,13 +721,44 @@ class DecimalToNumericConverter : public ColumnTypeConverter { to_col->resize(start_idx + rows); auto& data = static_cast(*to_col.get()).get_data(); - int64_t scale_factor = common::exp10_i64(_scale); + NullMap* null_map = nullptr; + if (dst_col->is_nullable()) { + null_map = &reinterpret_cast(dst_col.get()) + ->get_null_map_data(); + } + + SrcNativeType scale_factor; + if constexpr (sizeof(SrcNativeType) <= sizeof(int)) { + scale_factor = common::exp10_i32(_scale); + } else if constexpr (sizeof(SrcNativeType) <= sizeof(int64)) { + scale_factor = common::exp10_i64(_scale); + } else if constexpr (sizeof(SrcNativeType) <= sizeof(__int128)) { + scale_factor = common::exp10_i128(_scale); + } else if constexpr (sizeof(SrcNativeType) <= sizeof(wide::Int256)) { + scale_factor = common::exp10_i256(_scale); + } + for (int i = 0; i < rows; ++i) { if constexpr (DstPrimitiveType == TYPE_FLOAT || DstPrimitiveType == TYPE_DOUBLE) { data[start_idx + i] = static_cast(src_data[i].value / (double)scale_factor); } else { - data[start_idx + i] = static_cast(src_data[i].value / scale_factor); + SrcNativeType tmp_value = src_data[i].value / scale_factor; + + if constexpr (sizeof(SrcNativeType) > sizeof(DstCppType)) { + if ((SrcNativeType)std::numeric_limits::min() > tmp_value || + tmp_value > (SrcNativeType)std::numeric_limits::max()) { + if (null_map == nullptr) { + return Status::InternalError("Failed to cast value '{}' to {} column", + src_data[i].to_string(_scale), + dst_col->get_name()); + } else { + (*null_map)[start_idx + i] = 1; + } + } + } + + data[start_idx + i] = static_cast(tmp_value); } } @@ -532,4 +766,90 @@ class DecimalToNumericConverter : public ColumnTypeConverter { } }; +template +class DecimalToDecimalConverter : public ColumnTypeConverter { +private: + int _from_precision; + int _from_scale; + int _to_precision; + int _to_scale; + +public: + DecimalToDecimalConverter(int from_precision, int from_scale, int to_precision, int to_scale) + : _from_precision(from_precision), + _from_scale(from_scale), + _to_precision(to_precision), + _to_scale(to_scale) {} + + Status convert(ColumnPtr& src_col, MutableColumnPtr& dst_col) override { + using SrcColumnType = typename PrimitiveTypeTraits::ColumnType; + using DstColumnType = typename PrimitiveTypeTraits::ColumnType; + using SrcNativeType = typename PrimitiveTypeTraits< + SrcDecimalPrimitiveType>::ColumnType::value_type::NativeType; + using DstNativeType = typename PrimitiveTypeTraits< + DstDecimalPrimitiveType>::ColumnType::value_type::NativeType; + using MaxNativeType = std::conditional_t<(sizeof(SrcNativeType) > sizeof(DstNativeType)), + SrcNativeType, DstNativeType>; + + auto max_result = + DataTypeDecimal>::get_max_digits_number(_to_precision); + bool narrow_integral = (_to_precision - _to_scale) < (_from_precision - _from_scale); + + ColumnPtr from_col = remove_nullable(src_col); + MutableColumnPtr to_col = remove_nullable(dst_col->get_ptr())->assume_mutable(); + + size_t rows = from_col->size(); + auto& src_data = static_cast(from_col.get())->get_data(); + size_t start_idx = to_col->size(); + to_col->resize(start_idx + rows); + auto& data = static_cast(*to_col.get()).get_data(); + + for (int i = 0; i < rows; ++i) { + SrcNativeType src_value = src_data[i].value; + DstNativeType& res_value = data[start_idx + i].value; + + if (_to_scale > _from_scale) { + const MaxNativeType multiplier = + DataTypeDecimal>::get_scale_multiplier(_to_scale - + _from_scale); + MaxNativeType res; + if (common::mul_overflow(src_value, multiplier, res)) { + return Status::InternalError("Failed to cast value '{}' to {} column", + src_data[i].to_string(_from_scale), + dst_col->get_name()); + } else { + if (res > max_result.value || res < -max_result.value) { + return Status::InternalError("Failed to cast value '{}' to {} column", + src_data[i].to_string(_from_scale), + dst_col->get_name()); + } else { + res_value = static_cast(res); + } + } + } else if (_to_scale == _from_scale) { + res_value = src_value; + if (narrow_integral && + (res_value > max_result.value || res_value < -max_result.value)) { + return Status::InternalError("Failed to cast value '{}' to {} column", + src_data[i].to_string(_from_scale), + dst_col->get_name()); + } + } else { + MaxNativeType multiplier = + DataTypeDecimal>::get_scale_multiplier(_from_scale - + _to_scale) + .value; + res_value = src_value / multiplier; + + if (src_value % multiplier != 0 || res_value > max_result.value || + res_value < -max_result.value) { + return Status::InternalError("Failed to cast value '{}' to {} column", + src_data[i].to_string(_from_scale), + dst_col->get_name()); + } + } + } + return Status::OK(); + } +}; } // namespace doris::vectorized::converter diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index a488bef7455071..45edeaa00d32f7 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -474,20 +474,42 @@ std::tuple convert_to_orc_literal(const orc::Type* type, const auto* value = literal_data.data; try { switch (type->getKind()) { - case orc::TypeKind::BOOLEAN: + case orc::TypeKind::BOOLEAN: { + if (primitive_type != TYPE_BOOLEAN) { + return std::make_tuple(false, orc::Literal(false)); + } return std::make_tuple(true, orc::Literal(bool(*((uint8_t*)value)))); + } case orc::TypeKind::BYTE: - return std::make_tuple(true, orc::Literal(int64_t(*((int8_t*)value)))); case orc::TypeKind::SHORT: - return std::make_tuple(true, orc::Literal(int64_t(*((int16_t*)value)))); case orc::TypeKind::INT: - return std::make_tuple(true, orc::Literal(int64_t(*((int32_t*)value)))); - case orc::TypeKind::LONG: + case orc::TypeKind::LONG: { + if constexpr (primitive_type == TYPE_TINYINT) { + return std::make_tuple(true, orc::Literal(int64_t(*((int8_t*)value)))); + } else if constexpr (primitive_type == TYPE_SMALLINT) { + return std::make_tuple(true, orc::Literal(int64_t(*((int16_t*)value)))); + } else if constexpr (primitive_type == TYPE_INT) { + return std::make_tuple(true, orc::Literal(int64_t(*((int32_t*)value)))); + } else if constexpr (primitive_type == TYPE_BIGINT) { + return std::make_tuple(true, orc::Literal(int64_t(*((int64_t*)value)))); + } + return std::make_tuple(false, orc::Literal(false)); + } return std::make_tuple(true, orc::Literal(*((int64_t*)value))); - case orc::TypeKind::FLOAT: - return std::make_tuple(true, orc::Literal(double(*((float*)value)))); - case orc::TypeKind::DOUBLE: - return std::make_tuple(true, orc::Literal(*((double*)value))); + case orc::TypeKind::FLOAT: { + if constexpr (primitive_type == TYPE_FLOAT) { + return std::make_tuple(true, orc::Literal(double(*((float*)value)))); + } else if constexpr (primitive_type == TYPE_DOUBLE) { + return std::make_tuple(true, orc::Literal(double(*((double*)value)))); + } + return std::make_tuple(false, orc::Literal(false)); + } + case orc::TypeKind::DOUBLE: { + if (primitive_type == TYPE_DOUBLE) { + return std::make_tuple(true, orc::Literal(*((double*)value))); + } + return std::make_tuple(false, orc::Literal(false)); + } case orc::TypeKind::STRING: [[fallthrough]]; case orc::TypeKind::BINARY: @@ -496,7 +518,11 @@ std::tuple convert_to_orc_literal(const orc::Type* type, // case orc::TypeKind::CHAR: // [[fallthrough]]; case orc::TypeKind::VARCHAR: { - return std::make_tuple(true, orc::Literal(literal_data.data, literal_data.size)); + if (primitive_type == TYPE_STRING || primitive_type == TYPE_CHAR || + primitive_type == TYPE_VARCHAR) { + return std::make_tuple(true, orc::Literal(literal_data.data, literal_data.size)); + } + return std::make_tuple(false, orc::Literal(false)); } case orc::TypeKind::DECIMAL: { int128_t decimal_value; @@ -508,8 +534,10 @@ std::tuple convert_to_orc_literal(const orc::Type* type, decimal_value = *((int32_t*)value); } else if constexpr (primitive_type == TYPE_DECIMAL64) { decimal_value = *((int64_t*)value); - } else { + } else if constexpr (primitive_type == TYPE_DECIMAL128I) { decimal_value = *((int128_t*)value); + } else { + return std::make_tuple(false, orc::Literal(false)); } return std::make_tuple(true, orc::Literal(orc::Int128(uint64_t(decimal_value >> 64), uint64_t(decimal_value)), @@ -523,12 +551,14 @@ std::tuple convert_to_orc_literal(const orc::Type* type, cctz::civil_day civil_date(date_v1.year(), date_v1.month(), date_v1.day()); day_offset = cctz::convert(civil_date, utc0).time_since_epoch().count() / (24 * 60 * 60); - } else { // primitive_type == TYPE_DATEV2 + } else if (primitive_type == TYPE_DATEV2) { const DateV2Value date_v2 = *reinterpret_cast*>(value); cctz::civil_day civil_date(date_v2.year(), date_v2.month(), date_v2.day()); day_offset = cctz::convert(civil_date, utc0).time_since_epoch().count() / (24 * 60 * 60); + } else { + return std::make_tuple(false, orc::Literal(false)); } return std::make_tuple(true, orc::Literal(orc::PredicateDataType::DATE, day_offset)); } @@ -545,7 +575,7 @@ std::tuple convert_to_orc_literal(const orc::Type* type, datetime_v1.minute(), datetime_v1.second()); seconds = cctz::convert(civil_seconds, utc0).time_since_epoch().count(); nanos = 0; - } else { // primitive_type == TYPE_DATETIMEV2 + } else if (primitive_type == TYPE_DATETIMEV2) { const DateV2Value datetime_v2 = *reinterpret_cast*>(value); cctz::civil_second civil_seconds(datetime_v2.year(), datetime_v2.month(), @@ -553,6 +583,8 @@ std::tuple convert_to_orc_literal(const orc::Type* type, datetime_v2.minute(), datetime_v2.second()); seconds = cctz::convert(civil_seconds, utc0).time_since_epoch().count(); nanos = datetime_v2.microsecond() * 1000; + } else { + return std::make_tuple(false, orc::Literal(false)); } return std::make_tuple(true, orc::Literal(seconds, nanos)); } @@ -1754,7 +1786,8 @@ Status OrcReader::_orc_column_to_doris_column(const std::string& col_name, Colum if (!_converters.contains(converter_key)) { std::unique_ptr converter = - converter::ColumnTypeConverter::get_converter(src_type, data_type); + converter::ColumnTypeConverter::get_converter(src_type, data_type, + converter::FileFormat::ORC); if (!converter->support()) { return Status::InternalError( "The column type of '{}' has changed and is not supported: ", col_name, diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp index 49636d809aa0d8..80baa4b7f05d36 100644 --- a/be/src/vec/exec/format/parquet/parquet_column_convert.cpp +++ b/be/src/vec/exec/format/parquet/parquet_column_convert.cpp @@ -25,7 +25,6 @@ namespace doris::vectorized::parquet { const cctz::time_zone ConvertParams::utc0 = cctz::utc_time_zone(); #define FOR_LOGICAL_DECIMAL_TYPES(M) \ - M(TYPE_DECIMALV2) \ M(TYPE_DECIMAL32) \ M(TYPE_DECIMAL64) \ M(TYPE_DECIMAL128I) \ @@ -133,8 +132,8 @@ static void get_decimal_converter(FieldSchema* field_schema, TypeDescriptor src_ std::unique_ptr& physical_converter) { const tparquet::SchemaElement& parquet_schema = field_schema->parquet_schema; if (is_decimal(remove_nullable(dst_logical_type))) { - // using destination decimal type, avoid type and scale change - src_logical_type = remove_nullable(dst_logical_type)->get_type_as_type_descriptor(); + src_logical_type = create_decimal(parquet_schema.precision, parquet_schema.scale, false) + ->get_type_as_type_descriptor(); } tparquet::Type::type src_physical_type = parquet_schema.type; @@ -298,8 +297,8 @@ std::unique_ptr PhysicalToLogicalConverter::get_conv if (physical_converter->support()) { physical_converter->_convert_params = std::move(convert_params); - physical_converter->_logical_converter = - converter::ColumnTypeConverter::get_converter(src_logical_type, dst_logical_type); + physical_converter->_logical_converter = converter::ColumnTypeConverter::get_converter( + src_logical_type, dst_logical_type, converter::FileFormat::PARQUET); if (!physical_converter->_logical_converter->support()) { physical_converter.reset(new UnsupportedConverter( "Unsupported type change: " + diff --git a/be/src/vec/exec/format/parquet/parquet_column_convert.h b/be/src/vec/exec/format/parquet/parquet_column_convert.h index d35a69ff59c625..546aa8b8f6fda7 100644 --- a/be/src/vec/exec/format/parquet/parquet_column_convert.h +++ b/be/src/vec/exec/format/parquet/parquet_column_convert.h @@ -154,7 +154,7 @@ struct ConvertParams { * * Ultimate performance optimization: * 1. If process of (First => Second) is consistent, eg. from BYTE_ARRAY to string, no additional copies and conversions will be introduced; - * 2. If process of (Second => Third) is consistent, eg. from decimal(12, 4) to decimal(8, 2), no additional copies and conversions will be introduced; + * 2. If process of (Second => Third) is consistent, no additional copies and conversions will be introduced; * 3. Null map is share among all processes, no additional copies and conversions will be introduced in null map; * 4. Only create one physical column in physical conversion, and reused in each loop; * 5. Only create one logical column in logical conversion, and reused in each loop; diff --git a/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run75.hql b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run75.hql new file mode 100644 index 00000000000000..41db62fbaba961 --- /dev/null +++ b/docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_scripts/run75.hql @@ -0,0 +1,515 @@ +create database if not exists schema_change; +use schema_change; + +CREATE TABLE IF NOT EXISTS parquet_primitive_types_to_boolean ( + id INT, + bool_col BOOLEAN, + int_col BOOLEAN, + smallint_col BOOLEAN, + tinyint_col BOOLEAN, + bigint_col BOOLEAN, + float_col BOOLEAN, + double_col BOOLEAN, + string_col BOOLEAN, + char1_col BOOLEAN, + char2_col BOOLEAN, + varchar_col BOOLEAN, + date_col BOOLEAN, + timestamp_col BOOLEAN, + decimal1_col BOOLEAN, + decimal2_col BOOLEAN +) STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/parquet_table/parquet_schema_change'; + +CREATE TABLE IF NOT EXISTS parquet_primitive_types_to_bigint ( + id INT, + bool_col BIGINT, + int_col BIGINT, + smallint_col BIGINT, + tinyint_col BIGINT, + bigint_col BIGINT, + float_col BIGINT, + double_col BIGINT, + string_col BIGINT, + char1_col BIGINT, + char2_col BIGINT, + varchar_col BIGINT, + date_col BIGINT, + timestamp_col BIGINT, + decimal1_col BIGINT, + decimal2_col BIGINT +) STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/parquet_table/parquet_schema_change'; + +CREATE TABLE IF NOT EXISTS parquet_primitive_types_to_int ( + id INT, + bool_col INT, + int_col INT, + smallint_col INT, + tinyint_col INT, + bigint_col INT, + float_col INT, + double_col INT, + string_col INT, + char1_col INT, + char2_col INT, + varchar_col INT, + date_col INT, + timestamp_col INT, + decimal1_col INT, + decimal2_col INT +) STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/parquet_table/parquet_schema_change'; + +CREATE TABLE IF NOT EXISTS parquet_primitive_types_to_smallint ( + id INT, + bool_col SMALLINT, + int_col SMALLINT, + smallint_col SMALLINT, + tinyint_col SMALLINT, + bigint_col SMALLINT, + float_col SMALLINT, + double_col SMALLINT, + string_col SMALLINT, + char1_col SMALLINT, + char2_col SMALLINT, + varchar_col SMALLINT, + date_col SMALLINT, + timestamp_col SMALLINT, + decimal1_col SMALLINT, + decimal2_col SMALLINT +) STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/parquet_table/parquet_schema_change'; + +CREATE TABLE IF NOT EXISTS parquet_primitive_types_to_tinyint ( + id INT, + bool_col TINYINT, + int_col TINYINT, + smallint_col TINYINT, + tinyint_col TINYINT, + bigint_col TINYINT, + float_col TINYINT, + double_col TINYINT, + string_col TINYINT, + char1_col TINYINT, + char2_col TINYINT, + varchar_col TINYINT, + date_col TINYINT, + timestamp_col TINYINT, + decimal1_col TINYINT, + decimal2_col TINYINT +) STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/parquet_table/parquet_schema_change'; + +CREATE TABLE IF NOT EXISTS parquet_primitive_types_to_float ( + id INT, + bool_col FLOAT, + int_col FLOAT, + smallint_col FLOAT, + tinyint_col FLOAT, + bigint_col FLOAT, + float_col FLOAT, + double_col FLOAT, + string_col FLOAT, + char1_col FLOAT, + char2_col FLOAT, + varchar_col FLOAT, + date_col FLOAT, + timestamp_col FLOAT, + decimal1_col FLOAT, + decimal2_col FLOAT +) STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/parquet_table/parquet_schema_change'; + +CREATE TABLE IF NOT EXISTS parquet_primitive_types_to_double ( + id INT, + bool_col DOUBLE, + int_col DOUBLE, + smallint_col DOUBLE, + tinyint_col DOUBLE, + bigint_col DOUBLE, + float_col DOUBLE, + double_col DOUBLE, + string_col DOUBLE, + char1_col DOUBLE, + char2_col DOUBLE, + varchar_col DOUBLE, + date_col DOUBLE, + timestamp_col DOUBLE, + decimal1_col DOUBLE, + decimal2_col DOUBLE +) STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/parquet_table/parquet_schema_change'; + +CREATE TABLE IF NOT EXISTS parquet_primitive_types_to_string ( + id INT, + bool_col STRING, + int_col STRING, + smallint_col STRING, + tinyint_col STRING, + bigint_col STRING, + float_col STRING, + double_col STRING, + string_col STRING, + char1_col STRING, + char2_col STRING, + varchar_col STRING, + date_col STRING, + timestamp_col STRING, + decimal1_col STRING, + decimal2_col STRING +) STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/parquet_table/parquet_schema_change'; + +CREATE TABLE IF NOT EXISTS parquet_primitive_types_to_date ( + id INT, + bool_col DATE, + int_col DATE, + smallint_col DATE, + tinyint_col DATE, + bigint_col DATE, + float_col DATE, + double_col DATE, + string_col DATE, + char1_col DATE, + char2_col DATE, + varchar_col DATE, + date_col DATE, + timestamp_col DATE, + decimal1_col DATE, + decimal2_col DATE +) STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/parquet_table/parquet_schema_change'; + +CREATE TABLE IF NOT EXISTS parquet_primitive_types_to_timestamp ( + id INT, + bool_col TIMESTAMP, + int_col TIMESTAMP, + smallint_col TIMESTAMP, + tinyint_col TIMESTAMP, + bigint_col TIMESTAMP, + float_col TIMESTAMP, + double_col TIMESTAMP, + string_col TIMESTAMP, + char1_col TIMESTAMP, + char2_col TIMESTAMP, + varchar_col TIMESTAMP, + date_col TIMESTAMP, + timestamp_col TIMESTAMP, + decimal1_col TIMESTAMP, + decimal2_col TIMESTAMP +) STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/parquet_table/parquet_schema_change'; + + +CREATE TABLE IF NOT EXISTS parquet_primitive_types_to_decimal1 ( + id INT, + bool_col DECIMAL(20,5), + int_col DECIMAL(20,5), + smallint_col DECIMAL(20,5), + tinyint_col DECIMAL(20,5), + bigint_col DECIMAL(20,5), + float_col DECIMAL(20,5), + double_col DECIMAL(20,5), + string_col DECIMAL(20,5), + char1_col DECIMAL(20,5), + char2_col DECIMAL(20,5), + varchar_col DECIMAL(20,5), + date_col DECIMAL(20,5), + timestamp_col DECIMAL(20,5), + decimal1_col DECIMAL(20,5), + decimal2_col DECIMAL(20,5) +) STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/parquet_table/parquet_schema_change'; + +CREATE TABLE IF NOT EXISTS parquet_primitive_types_to_decimal2 ( + id INT, + bool_col DECIMAL(7,1), + int_col DECIMAL(7,1), + smallint_col DECIMAL(7,1), + tinyint_col DECIMAL(7,1), + bigint_col DECIMAL(7,1), + float_col DECIMAL(7,1), + double_col DECIMAL(7,1), + string_col DECIMAL(7,1), + char1_col DECIMAL(7,1), + char2_col DECIMAL(7,1), + varchar_col DECIMAL(7,1), + date_col DECIMAL(7,1), + timestamp_col DECIMAL(7,1), + decimal1_col DECIMAL(7,1), + decimal2_col DECIMAL(7,1) +) STORED AS PARQUET +LOCATION '/user/doris/preinstalled_data/parquet_table/parquet_schema_change'; + + + + +CREATE TABLE IF NOT EXISTS orc_primitive_types_to_boolean ( + id INT, + bool_col BOOLEAN, + int_col BOOLEAN, + smallint_col BOOLEAN, + tinyint_col BOOLEAN, + bigint_col BOOLEAN, + float_col BOOLEAN, + double_col BOOLEAN, + string_col BOOLEAN, + char1_col BOOLEAN, + char2_col BOOLEAN, + varchar_col BOOLEAN, + date_col BOOLEAN, + timestamp_col BOOLEAN, + decimal1_col BOOLEAN, + decimal2_col BOOLEAN +) STORED AS orc +LOCATION '/user/doris/preinstalled_data/orc_table/orc_schema_change'; + + +CREATE TABLE IF NOT EXISTS orc_primitive_types_to_bigint ( + id INT, + bool_col BIGINT, + int_col BIGINT, + smallint_col BIGINT, + tinyint_col BIGINT, + bigint_col BIGINT, + float_col BIGINT, + double_col BIGINT, + string_col BIGINT, + char1_col BIGINT, + char2_col BIGINT, + varchar_col BIGINT, + date_col BIGINT, + timestamp_col BIGINT, + decimal1_col BIGINT, + decimal2_col BIGINT +) STORED AS orc +LOCATION '/user/doris/preinstalled_data/orc_table/orc_schema_change'; + +CREATE TABLE IF NOT EXISTS orc_primitive_types_to_int ( + id INT, + bool_col INT, + int_col INT, + smallint_col INT, + tinyint_col INT, + bigint_col INT, + float_col INT, + double_col INT, + string_col INT, + char1_col INT, + char2_col INT, + varchar_col INT, + date_col INT, + timestamp_col INT, + decimal1_col INT, + decimal2_col INT +) STORED AS orc +LOCATION '/user/doris/preinstalled_data/orc_table/orc_schema_change'; + +CREATE TABLE IF NOT EXISTS orc_primitive_types_to_smallint ( + id INT, + bool_col SMALLINT, + int_col SMALLINT, + smallint_col SMALLINT, + tinyint_col SMALLINT, + bigint_col SMALLINT, + float_col SMALLINT, + double_col SMALLINT, + string_col SMALLINT, + char1_col SMALLINT, + char2_col SMALLINT, + varchar_col SMALLINT, + date_col SMALLINT, + timestamp_col SMALLINT, + decimal1_col SMALLINT, + decimal2_col SMALLINT +) STORED AS orc +LOCATION '/user/doris/preinstalled_data/orc_table/orc_schema_change'; + +CREATE TABLE IF NOT EXISTS orc_primitive_types_to_tinyint ( + id INT, + bool_col TINYINT, + int_col TINYINT, + smallint_col TINYINT, + tinyint_col TINYINT, + bigint_col TINYINT, + float_col TINYINT, + double_col TINYINT, + string_col TINYINT, + char1_col TINYINT, + char2_col TINYINT, + varchar_col TINYINT, + date_col TINYINT, + timestamp_col TINYINT, + decimal1_col TINYINT, + decimal2_col TINYINT +) STORED AS orc +LOCATION '/user/doris/preinstalled_data/orc_table/orc_schema_change'; + +CREATE TABLE IF NOT EXISTS orc_primitive_types_to_float ( + id INT, + bool_col FLOAT, + int_col FLOAT, + smallint_col FLOAT, + tinyint_col FLOAT, + bigint_col FLOAT, + float_col FLOAT, + double_col FLOAT, + string_col FLOAT, + char1_col FLOAT, + char2_col FLOAT, + varchar_col FLOAT, + date_col FLOAT, + timestamp_col FLOAT, + decimal1_col FLOAT, + decimal2_col FLOAT +) STORED AS orc +LOCATION '/user/doris/preinstalled_data/orc_table/orc_schema_change'; + +CREATE TABLE IF NOT EXISTS orc_primitive_types_to_double ( + id INT, + bool_col DOUBLE, + int_col DOUBLE, + smallint_col DOUBLE, + tinyint_col DOUBLE, + bigint_col DOUBLE, + float_col DOUBLE, + double_col DOUBLE, + string_col DOUBLE, + char1_col DOUBLE, + char2_col DOUBLE, + varchar_col DOUBLE, + date_col DOUBLE, + timestamp_col DOUBLE, + decimal1_col DOUBLE, + decimal2_col DOUBLE +) STORED AS orc +LOCATION '/user/doris/preinstalled_data/orc_table/orc_schema_change'; + +CREATE TABLE IF NOT EXISTS orc_primitive_types_to_string ( + id INT, + bool_col STRING, + int_col STRING, + smallint_col STRING, + tinyint_col STRING, + bigint_col STRING, + float_col STRING, + double_col STRING, + string_col STRING, + char1_col STRING, + char2_col STRING, + varchar_col STRING, + date_col STRING, + timestamp_col STRING, + decimal1_col STRING, + decimal2_col STRING +) STORED AS orc +LOCATION '/user/doris/preinstalled_data/orc_table/orc_schema_change'; + +CREATE TABLE IF NOT EXISTS orc_primitive_types_to_date ( + id INT, + bool_col DATE, + int_col DATE, + smallint_col DATE, + tinyint_col DATE, + bigint_col DATE, + float_col DATE, + double_col DATE, + string_col DATE, + char1_col DATE, + char2_col DATE, + varchar_col DATE, + date_col DATE, + timestamp_col DATE, + decimal1_col DATE, + decimal2_col DATE +) STORED AS orc +LOCATION '/user/doris/preinstalled_data/orc_table/orc_schema_change'; + +CREATE TABLE IF NOT EXISTS orc_primitive_types_to_timestamp ( + id INT, + bool_col TIMESTAMP, + int_col TIMESTAMP, + smallint_col TIMESTAMP, + tinyint_col TIMESTAMP, + bigint_col TIMESTAMP, + float_col TIMESTAMP, + double_col TIMESTAMP, + string_col TIMESTAMP, + char1_col TIMESTAMP, + char2_col TIMESTAMP, + varchar_col TIMESTAMP, + date_col TIMESTAMP, + timestamp_col TIMESTAMP, + decimal1_col TIMESTAMP, + decimal2_col TIMESTAMP +) STORED AS orc +LOCATION '/user/doris/preinstalled_data/orc_table/orc_schema_change'; + + +CREATE TABLE IF NOT EXISTS orc_primitive_types_to_decimal1 ( + id INT, + bool_col DECIMAL(20,5), + int_col DECIMAL(20,5), + smallint_col DECIMAL(20,5), + tinyint_col DECIMAL(20,5), + bigint_col DECIMAL(20,5), + float_col DECIMAL(20,5), + double_col DECIMAL(20,5), + string_col DECIMAL(20,5), + char1_col DECIMAL(20,5), + char2_col DECIMAL(20,5), + varchar_col DECIMAL(20,5), + date_col DECIMAL(20,5), + timestamp_col DECIMAL(20,5), + decimal1_col DECIMAL(20,5), + decimal2_col DECIMAL(20,5) +) STORED AS orc +LOCATION '/user/doris/preinstalled_data/orc_table/orc_schema_change'; + +CREATE TABLE IF NOT EXISTS orc_primitive_types_to_decimal2 ( + id INT, + bool_col DECIMAL(7,1), + int_col DECIMAL(7,1), + smallint_col DECIMAL(7,1), + tinyint_col DECIMAL(7,1), + bigint_col DECIMAL(7,1), + float_col DECIMAL(7,1), + double_col DECIMAL(7,1), + string_col DECIMAL(7,1), + char1_col DECIMAL(7,1), + char2_col DECIMAL(7,1), + varchar_col DECIMAL(7,1), + date_col DECIMAL(7,1), + timestamp_col DECIMAL(7,1), + decimal1_col DECIMAL(7,1), + decimal2_col DECIMAL(7,1) +) STORED AS orc +LOCATION '/user/doris/preinstalled_data/orc_table/orc_schema_change'; + + +MSCK REPAIR TABLE parquet_primitive_types_to_boolean; +MSCK REPAIR TABLE parquet_primitive_types_to_bigint; +MSCK REPAIR TABLE parquet_primitive_types_to_int; +MSCK REPAIR TABLE parquet_primitive_types_to_smallint; +MSCK REPAIR TABLE parquet_primitive_types_to_tinyint; +MSCK REPAIR TABLE parquet_primitive_types_to_float; +MSCK REPAIR TABLE parquet_primitive_types_to_double; +MSCK REPAIR TABLE parquet_primitive_types_to_string; +MSCK REPAIR TABLE parquet_primitive_types_to_date; +MSCK REPAIR TABLE parquet_primitive_types_to_timestamp; +MSCK REPAIR TABLE parquet_primitive_types_to_decimal1; +MSCK REPAIR TABLE parquet_primitive_types_to_decimal2; + +MSCK REPAIR TABLE orc_primitive_types_to_boolean; +MSCK REPAIR TABLE orc_primitive_types_to_bigint; +MSCK REPAIR TABLE orc_primitive_types_to_int; +MSCK REPAIR TABLE orc_primitive_types_to_smallint; +MSCK REPAIR TABLE orc_primitive_types_to_tinyint; +MSCK REPAIR TABLE orc_primitive_types_to_float; +MSCK REPAIR TABLE orc_primitive_types_to_double; +MSCK REPAIR TABLE orc_primitive_types_to_string; +MSCK REPAIR TABLE orc_primitive_types_to_date; +MSCK REPAIR TABLE orc_primitive_types_to_timestamp; +MSCK REPAIR TABLE orc_primitive_types_to_decimal1; +MSCK REPAIR TABLE orc_primitive_types_to_decimal2; \ No newline at end of file diff --git a/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_schema_change/origin_file.orc b/docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/orc_table/orc_schema_change/origin_file.orc new file mode 100644 index 0000000000000000000000000000000000000000..ab1af67fcf549c38f2f6c50d446f47a43b931ed1 GIT binary patch literal 2533 zcmZ8h3pkW%8-C|#W=w-IWk@n%h>RJOoYokc%5j`R4m%;z)+}nHAto};hf!fdv7~ey zivPEMHHfkaYj>|jcAJK#En92{e<{BI8+QNe`v2?t-uHU1_r9O!zMuDfzSlp{Lk9o? zmW;*`dB7T8QV1vj0C^3LNY=n1wW-uv1P{;wm?l3{@n)QTaE)vIJ#nH1^xnt zNu^LR5~7+FBdG{=;xXk4bR33umqJXr#k|(=nbk;7HLQw>1t{7Ml3;V9pYf+22%0O? z;?FPNH9iwygRgYuE7cC{)efF1<3tD=f+R3wFtD`Bif_k) z3ogUOan$kCDs_0m*7|R}n74ZPbHIYaS0G5~M)pMg_b?pOy)$}3+L2ky& zBLLyUE6monF*JK=tpku)%WlGh3@pi#s1qNOx;`eZ}d&>wZ9m z*FNW~$+`7kp>O~oC@TvdYANt&sMsw&9xMMC{WBjPil(GGx7=9?`>F6VJK0sqUe(D5 z;1jeF{o5}&Ezlff$3DrTW^`QGs*9#-nf_su(eD3X5ao3t?D?|HEAH2X^oSd?MVg0l z4Hvae{G9)yDs>*j%5H60t#Cu2{@fiG|z&AzjQJ7!Zo-;^s=T z9T!`SS-L@BEdX3pDiVR>B9HP{!_xpbvXDfyJ<2tsDCGBknnL!n>}|KPw%XZ+@|dR{ z8uWyJ$4ai({Za= zDSu^+feSoBOi`P(w-hXez zEt((m&20VfC^s>5I3uF(onBD>)2Y1!spir>uG052Gk;8X38FqOz8PBb-zl)0bLx3F z^SDkk-A2|i)Gd=9cZXhUUDMigI28+E^L_*h1>kUG9DztAoJWAe<<3P&7KIxdUomL< z78kMQ$*^U18Hac159_$_Etn-JYyVtkXKwzE?X^Ls4i1ZoTkeHW?bD*$A55*b4DNj~ z?<;(djj-kWc7&DdthY^^JL9UF5v_Gpvg1GF65eE**uS}RNzXvqn3lVtd(TbBGD&Bl zbMeueuaDoAI2JiG!)0KBm5K zwLQVx$i&x;ww6uqd_97H*d3leP~vKmHO`+IH=nFpn0j))ML1=XHI4++lOw(Ty*;{e zdH+a{$-UtoQ)3z{k0qjI*A>;|L=;JjB(5S>cAj@_PDK8NX*cb8&Ce(axrQPlFGlvV z@q&7^9j{t{)8z+dfHI@_Fk&%Z7SM~c*!2BH97Tej)j)A&G;O-)X;zi3#y`&3o1;*D zIS}+@quwdoDKmYxLR5g_Nj3-Tcs~CGW%IbMZJWMi(EjJ`wy3Xm-PKrOuUCaM#3f%e zQ&BEuPmoioCkr$FIdrn{U8g5S256G(vWOfOXy}q;8FsIhlFo#%D zZ1%m#_J+J(AyX>CwpQ(~kG0d~c@}#Y=kyi%$Ml_gH4?qk%HC!6P#;H6n6n{UP=z^tAREJgTS3Ai^OIn}a zJ2sZ>l+-l$+g!%;jORvCin9)F=aJFJ%@5op4>)YIsrfRzJS6v7+CB!2Q?V<>vM%80 z@K>{=hQ!b5*H7Loigv>lxV-GVLXZ+h{~C6~@uT2o;Htz+jh&6SP(-`{ry zc+cK(zFl@%*LUihywh8J>R(|cthH&~!xl;DTHjhv)>Y;ejc4tDiaw1p>bd8T$Jar| zkRr79u2AM-;e|}JSZ=;9mRqxJVT#pHI6cpBKOds+w+W7T%?jhwq9BK$o?7ZO> z>0p&P_lQJ0cAe$Kv84d>naR|3H8*~=+;+XZHkos|Gos}2EOcek+^D1`*rPGqrs7fi zz}b>^@heJu)U|8Vp#YP%R1tLkgn}o%^cMUwHu<=_?9<)X2L1~dpydhACBt@h${Wu+ zVMUF+t1V-#o>fT)TlQLq63*H>9a=oIb*ZUY(E;D53F=%ke9bEzwuP?c(*U xI)WGz{hanV$BtaG;D@MRyE=P5BXF%~Z3Id`zc<;tozt_m!_Lmk zoA-Y2&6{~MZ{4!>24_6M6N|WxP9b-=bz2EzOt&fcYphBdfuPcqLQ~W$K((RTQ43M0 zp*m2DP@Sm7s00S&S$vkHLQ!;uX=;^d!9YOv{?hNJfwPUbNu0KHI^D06lHK5!MwS$D z&^#(5%O*j%tR7nJeHj1d4U8{<$f4mJOwNN3S>W$Zrv-j?1VPiPDfDz2lq~*YA7kHN zN~ixDpG?zjfEW1I$igQY&7;ZU??8V)XaXNC$M~FR`LKdd@{j{$D*ZaSo(g#?4;3?+ z6`2x(+w>gu$$M1DgH-t2zrT6EZ2rtUZ)tmq$FJQbYp9S%sgPHxkY}mn@ou^wy9eW@ zyqg_?IcrOQKh2Z;k3Sr!YilbU`TMn>wxIs{S@*|v`|f;Df1_`re%sQqdf-1{_2XE* zFgZO=nlgQfDr+RpacHG7E;N(&fGGFsj(kK#8QJpfw%fzAUT84e3ny3I=qo>YuWqUH z=TEzr&Z<9n_OGf}4?a`hK;%_!vTj7dXYEZGCpMMrjp+n$u9sqDL$sqi5i_i=pBTuA zeuLNye?&B1KQ#ewaEcfpfq>Tt8llA$5d=+PXL$XAV2IoJ2?bvxFXx!0_*>q9_!d75 z7>NW!ks3NC21$g&5!y(Detw#*23gDkFjj5&D6ZS0iJlluLQ4=!oX_Xe0&U{1zyj@p zL0+NF&YoQ!dl#1gxJDIxjR=20lmo>w$?AbDu8mO5V#DVRN9f8R^CC6gNH`P>kbMC% zVWDK-iiL)cXie)kt{^+BpJ1Pj7#=gPX*<>l^G+%FB#&)cwqDrf4**j5Fc1nFhT(-h zMN5R0%NjSVpq+tH;YdEOIW0wx*_Lr#6|ZXKPIBC6BV(IZ zF}4wPXbodW*NRxvxo&NRZX+(;Mm(CMN*ciqIzRT7(t?8hD?h}lckmr1GTTyfo|Q7T zZlDq@1H~;t#%}CIKRkY^In%G-7CnH~H0-x{7Q9zU(Vk;Ln2t`4CdPO1T{+=kSDT7| zOY8QBU*30<69s#Jsi00{r*ccKIywG&)>YK)Mv&st9NYprTar7mSVLCW zx%TQs##PtE%!cCj`1aW5o|g7lN0e89SgGo2ycIiwj-$D&E3vtyD=|f%7Vk`1qmrJE zXd+>fDoxSiRJ?O%hBgmj0TgF*d}~(H0YXt*qAO}5Ifj@Qh~hb>!U!DN{5g*dG*(~% ztw2rf6=5=S*%%{jOWb7l;-Ml1& zZAFH_o>a6+#dBnOt&|_gvWH7eS({4=At8EP)sCy4!=)seeR*AraP)0BnmC`-;Dip( zd;~%Sf`AxSwP6(uLc94m=0TX4Z6U}pDlkr~+G*7@F`F=QpRznigPM7?u$*jU3z$Jg z8&o`l8aC{Sk5?YRTXWpJkbDck+?zq5UQx6wis#mxT0BU3q=(?@8RpL9+#o{utfHM& zJVP$w;GD-w-l564k7HOKzFDB%EbvUuC5)$ejU;64>O4(nF1KFxdNQ7hCB1a*FitVvvPrR$M!tf%2@>Ntd_r?>g6~2HG z4I_c_`4-0l%{6|XpE8xGQ4 Date: Thu, 30 Jan 2025 20:45:21 +0800 Subject: [PATCH 2/2] fix case. --- be/src/vec/exec/format/column_type_convert.cpp | 4 ++++ be/src/vec/exec/format/orc/vorc_reader.cpp | 1 - be/test/vec/exec/parquet/parquet_thrift_test.cpp | 6 +++--- 3 files changed, 7 insertions(+), 4 deletions(-) diff --git a/be/src/vec/exec/format/column_type_convert.cpp b/be/src/vec/exec/format/column_type_convert.cpp index 28ac8f098e1f60..6a705c4bddcee2 100644 --- a/be/src/vec/exec/format/column_type_convert.cpp +++ b/be/src/vec/exec/format/column_type_convert.cpp @@ -354,6 +354,10 @@ std::unique_ptr ColumnTypeConverter::get_converter( PrimitiveType src_primitive_type = src_type.type; PrimitiveType dst_primitive_type = remove_nullable(dst_type)->get_type_as_type_descriptor().type; + if (is_string_type(src_primitive_type) && is_string_type(dst_primitive_type)) { + return std::make_unique(); + } + if (_is_decimal_type(src_primitive_type) && _is_decimal_type(dst_primitive_type)) { return _decimal_converter(src_type, dst_type); } diff --git a/be/src/vec/exec/format/orc/vorc_reader.cpp b/be/src/vec/exec/format/orc/vorc_reader.cpp index 45edeaa00d32f7..00771b20dc2cec 100644 --- a/be/src/vec/exec/format/orc/vorc_reader.cpp +++ b/be/src/vec/exec/format/orc/vorc_reader.cpp @@ -495,7 +495,6 @@ std::tuple convert_to_orc_literal(const orc::Type* type, } return std::make_tuple(false, orc::Literal(false)); } - return std::make_tuple(true, orc::Literal(*((int64_t*)value))); case orc::TypeKind::FLOAT: { if constexpr (primitive_type == TYPE_FLOAT) { return std::make_tuple(true, orc::Literal(double(*((float*)value)))); diff --git a/be/test/vec/exec/parquet/parquet_thrift_test.cpp b/be/test/vec/exec/parquet/parquet_thrift_test.cpp index 3f3fa3b0f2eff1..c2c072d844ff07 100644 --- a/be/test/vec/exec/parquet/parquet_thrift_test.cpp +++ b/be/test/vec/exec/parquet/parquet_thrift_test.cpp @@ -292,8 +292,8 @@ static doris::TupleDescriptor* create_tuple_desc( for (int i = 0; i < column_descs.size(); ++i) { TSlotDescriptor t_slot_desc; - if (column_descs[i].type == TYPE_DECIMALV2) { - t_slot_desc.__set_slotType(TypeDescriptor::create_decimalv2_type(27, 9).to_thrift()); + if (column_descs[i].type == TYPE_DECIMAL128I) { + t_slot_desc.__set_slotType(TypeDescriptor::create_decimalv3_type(27, 9).to_thrift()); } else { TypeDescriptor descriptor(column_descs[i].type); if (column_descs[i].precision >= 0 && column_descs[i].scale >= 0) { @@ -356,7 +356,7 @@ static void create_block(std::unique_ptr& block) { {"binary_col", TYPE_STRING, sizeof(StringRef), true}, // 64-bit-length, see doris::get_slot_size in primitive_type.cpp {"timestamp_col", TYPE_DATETIMEV2, sizeof(int128_t), true}, - {"decimal_col", TYPE_DECIMALV2, sizeof(DecimalV2Value), true}, + {"decimal_col", TYPE_DECIMAL128I, sizeof(Decimal128V3), true}, {"char_col", TYPE_CHAR, sizeof(StringRef), true}, {"varchar_col", TYPE_VARCHAR, sizeof(StringRef), true}, {"date_col", TYPE_DATEV2, sizeof(uint32_t), true},