Skip to content

feat: implement transform ResultType #132

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
99 changes: 92 additions & 7 deletions src/iceberg/transform_function.cc
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,27 @@ Result<ArrowArray> BucketTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> BucketTransform::ResultType() const {
return NotImplemented("BucketTransform::result_type");
auto src_type = source_type();
if (!src_type) {
return NotSupported("null is not a valid input type for bucket transform");
}
switch (src_type->type_id()) {
case TypeId::kInt:
case TypeId::kLong:
case TypeId::kDecimal:
case TypeId::kDate:
case TypeId::kTime:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
case TypeId::kString:
case TypeId::kUuid:
case TypeId::kFixed:
case TypeId::kBinary:
return std::make_shared<IntType>();
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do you need to define a static variable to avoid creating a new object every time, or add a singleton to the corresponding type? The following return values are also similar.

Copy link
Collaborator Author

@zhjwpku zhjwpku Jun 30, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've always wanted to add a singleton for each primitive type. If we reach a consensus, I can create a separate PR to implement that. WDYT @wgtmac @lidavidm @gty404 @mapleFU @Fokko @raulcd

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

+1 Sorry - I had also meant to but as you can see recently I don't have the time.

default:
return NotSupported("{} is not a valid input type for bucket transform",
src_type->ToString());
}
}

TruncateTransform::TruncateTransform(std::shared_ptr<Type> const& source_type,
Expand All @@ -60,7 +80,21 @@ Result<ArrowArray> TruncateTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> TruncateTransform::ResultType() const {
return NotImplemented("TruncateTransform::result_type");
auto src_type = source_type();
if (!src_type) {
return NotSupported("null is not a valid input type for truncate transform");
}
switch (src_type->type_id()) {
case TypeId::kInt:
case TypeId::kLong:
case TypeId::kDecimal:
case TypeId::kString:
case TypeId::kBinary:
return src_type;
default:
return NotSupported("{} is not a valid input type for truncate transform",
src_type->ToString());
}
}

YearTransform::YearTransform(std::shared_ptr<Type> const& source_type)
Expand All @@ -71,7 +105,19 @@ Result<ArrowArray> YearTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> YearTransform::ResultType() const {
return NotImplemented("YearTransform::result_type");
auto src_type = source_type();
if (!src_type) {
return NotSupported("null is not a valid input type for year transform");
}
switch (src_type->type_id()) {
case TypeId::kDate:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
return std::make_shared<IntType>();
default:
return NotSupported("{} is not a valid input type for year transform",
src_type->ToString());
}
}

MonthTransform::MonthTransform(std::shared_ptr<Type> const& source_type)
Expand All @@ -82,7 +128,19 @@ Result<ArrowArray> MonthTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> MonthTransform::ResultType() const {
return NotImplemented("MonthTransform::result_type");
auto src_type = source_type();
if (!src_type) {
return NotSupported("null is not a valid input type for month transform");
}
switch (src_type->type_id()) {
case TypeId::kDate:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
return std::make_shared<IntType>();
default:
return NotSupported("{} is not a valid input type for month transform",
src_type->ToString());
}
}

DayTransform::DayTransform(std::shared_ptr<Type> const& source_type)
Expand All @@ -93,7 +151,19 @@ Result<ArrowArray> DayTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> DayTransform::ResultType() const {
return NotImplemented("DayTransform::result_type");
auto src_type = source_type();
if (!src_type) {
return NotSupported("null is not a valid input type for day transform");
}
switch (src_type->type_id()) {
case TypeId::kDate:
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
return std::make_shared<DateType>();
default:
return NotSupported("{} is not a valid input type for day transform",
src_type->ToString());
}
}

HourTransform::HourTransform(std::shared_ptr<Type> const& source_type)
Expand All @@ -104,7 +174,18 @@ Result<ArrowArray> HourTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> HourTransform::ResultType() const {
return NotImplemented("HourTransform::result_type");
auto src_type = source_type();
if (!src_type) {
return NotSupported("null is not a valid input type for hour transform");
}
switch (src_type->type_id()) {
case TypeId::kTimestamp:
case TypeId::kTimestampTz:
return std::make_shared<IntType>();
default:
return NotSupported("{} is not a valid input type for hour transform",
src_type->ToString());
}
}

VoidTransform::VoidTransform(std::shared_ptr<Type> const& source_type)
Expand All @@ -115,7 +196,11 @@ Result<ArrowArray> VoidTransform::Transform(const ArrowArray& input) {
}

Result<std::shared_ptr<Type>> VoidTransform::ResultType() const {
return NotImplemented("VoidTransform::result_type");
auto src_type = source_type();
if (!src_type) {
return NotSupported("null is not a valid input type for void transform");
}
return src_type;
}

} // namespace iceberg
77 changes: 77 additions & 0 deletions test/transform_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -117,4 +117,81 @@ TEST(TransformFromStringTest, NegativeCases) {
}
}

TEST(TransformResultTypeTest, PositiveCases) {
struct Case {
std::string str;
std::shared_ptr<Type> source_type;
std::shared_ptr<Type> expected_result_type;
};

const std::vector<Case> cases = {
{.str = "identity",
.source_type = std::make_shared<StringType>(),
.expected_result_type = std::make_shared<StringType>()},
{.str = "year",
.source_type = std::make_shared<TimestampType>(),
.expected_result_type = std::make_shared<IntType>()},
{.str = "month",
.source_type = std::make_shared<TimestampType>(),
.expected_result_type = std::make_shared<IntType>()},
{.str = "day",
.source_type = std::make_shared<TimestampType>(),
.expected_result_type = std::make_shared<DateType>()},
{.str = "hour",
.source_type = std::make_shared<TimestampType>(),
.expected_result_type = std::make_shared<IntType>()},
{.str = "void",
.source_type = std::make_shared<StringType>(),
.expected_result_type = std::make_shared<StringType>()},
{.str = "bucket[16]",
.source_type = std::make_shared<StringType>(),
.expected_result_type = std::make_shared<IntType>()},
{.str = "truncate[32]",
.source_type = std::make_shared<StringType>(),
.expected_result_type = std::make_shared<StringType>()},
};

for (const auto& c : cases) {
auto result = TransformFromString(c.str);
ASSERT_TRUE(result.has_value()) << "Failed to parse: " << c.str;

const auto& transform = result.value();
const auto transformPtr = transform->Bind(c.source_type);
ASSERT_TRUE(transformPtr.has_value()) << "Failed to bind: " << c.str;

auto result_type = transformPtr.value()->ResultType();
ASSERT_TRUE(result_type.has_value()) << "Failed to get result type for: " << c.str;
EXPECT_EQ(result_type.value()->type_id(), c.expected_result_type->type_id())
<< "Unexpected result type for: " << c.str;
}
}

TEST(TransformResultTypeTest, NegativeCases) {
struct Case {
std::string str;
std::shared_ptr<Type> source_type;
};

const std::vector<Case> cases = {
{.str = "identity", .source_type = nullptr},
{.str = "year", .source_type = std::make_shared<StringType>()},
{.str = "month", .source_type = std::make_shared<StringType>()},
{.str = "day", .source_type = std::make_shared<StringType>()},
{.str = "hour", .source_type = std::make_shared<StringType>()},
{.str = "void", .source_type = nullptr},
{.str = "bucket[16]", .source_type = std::make_shared<FloatType>()},
{.str = "truncate[32]", .source_type = std::make_shared<DoubleType>()}};

for (const auto& c : cases) {
auto result = TransformFromString(c.str);
ASSERT_TRUE(result.has_value()) << "Failed to parse: " << c.str;

const auto& transform = result.value();
auto transformPtr = transform->Bind(c.source_type);

auto result_type = transformPtr.value()->ResultType();
ASSERT_THAT(result_type, IsError(ErrorKind::kNotSupported));
}
}

} // namespace iceberg
Loading