Skip to content

refactor: add factory functions for primitive types #134

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 2 commits into from
Jul 7, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 7 additions & 17 deletions src/iceberg/expression/literal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -126,32 +126,22 @@ Literal::Literal(Value value, std::shared_ptr<PrimitiveType> type)
: value_(std::move(value)), type_(std::move(type)) {}

// Factory methods
Literal Literal::Boolean(bool value) {
return {Value{value}, std::make_shared<BooleanType>()};
}
Literal Literal::Boolean(bool value) { return {Value{value}, iceberg::boolean()}; }

Literal Literal::Int(int32_t value) {
return {Value{value}, std::make_shared<IntType>()};
}
Literal Literal::Int(int32_t value) { return {Value{value}, iceberg::int32()}; }

Literal Literal::Long(int64_t value) {
return {Value{value}, std::make_shared<LongType>()};
}
Literal Literal::Long(int64_t value) { return {Value{value}, iceberg::int64()}; }

Literal Literal::Float(float value) {
return {Value{value}, std::make_shared<FloatType>()};
}
Literal Literal::Float(float value) { return {Value{value}, iceberg::float32()}; }

Literal Literal::Double(double value) {
return {Value{value}, std::make_shared<DoubleType>()};
}
Literal Literal::Double(double value) { return {Value{value}, iceberg::float64()}; }

Literal Literal::String(std::string value) {
return {Value{std::move(value)}, std::make_shared<StringType>()};
return {Value{std::move(value)}, iceberg::string()};
}

Literal Literal::Binary(std::vector<uint8_t> value) {
return {Value{std::move(value)}, std::make_shared<BinaryType>()};
return {Value{std::move(value)}, iceberg::binary()};
}

Result<Literal> Literal::Deserialize(std::span<const uint8_t> data,
Expand Down
76 changes: 36 additions & 40 deletions src/iceberg/manifest_entry.h
Original file line number Diff line number Diff line change
Expand Up @@ -176,94 +176,90 @@ struct ICEBERG_EXPORT DataFile {
std::optional<int64_t> content_size_in_bytes;

inline static const SchemaField kContent = SchemaField::MakeRequired(
134, "content", std::make_shared<IntType>(),
134, "content", iceberg::int32(),
"Contents of the file: 0=data, 1=position deletes, 2=equality deletes");
inline static const SchemaField kFilePath = SchemaField::MakeRequired(
100, "file_path", std::make_shared<StringType>(), "Location URI with FS scheme");
inline static const SchemaField kFileFormat =
SchemaField::MakeRequired(101, "file_format", std::make_shared<IntType>(),
"File format name: avro, orc, or parquet");
100, "file_path", iceberg::string(), "Location URI with FS scheme");
inline static const SchemaField kFileFormat = SchemaField::MakeRequired(
101, "file_format", iceberg::int32(), "File format name: avro, orc, or parquet");
inline static const SchemaField kRecordCount = SchemaField::MakeRequired(
103, "record_count", std::make_shared<LongType>(), "Number of records in the file");
inline static const SchemaField kFileSize =
SchemaField::MakeRequired(104, "file_size_in_bytes", std::make_shared<LongType>(),
"Total file size in bytes");
103, "record_count", iceberg::int64(), "Number of records in the file");
inline static const SchemaField kFileSize = SchemaField::MakeRequired(
104, "file_size_in_bytes", iceberg::int64(), "Total file size in bytes");
inline static const SchemaField kColumnSizes = SchemaField::MakeOptional(
108, "column_sizes",
std::make_shared<MapType>(
SchemaField::MakeRequired(117, std::string(MapType::kKeyName),
std::make_shared<IntType>()),
iceberg::int32()),
SchemaField::MakeRequired(118, std::string(MapType::kValueName),
std::make_shared<LongType>())),
iceberg::int64())),
"Map of column id to total size on disk");
inline static const SchemaField kValueCounts = SchemaField::MakeOptional(
109, "value_counts",
std::make_shared<MapType>(
SchemaField::MakeRequired(119, std::string(MapType::kKeyName),
std::make_shared<IntType>()),
iceberg::int32()),
SchemaField::MakeRequired(120, std::string(MapType::kValueName),
std::make_shared<LongType>())),
iceberg::int64())),
"Map of column id to total count, including null and NaN");
inline static const SchemaField kNullValueCounts = SchemaField::MakeOptional(
110, "null_value_counts",
std::make_shared<MapType>(
SchemaField::MakeRequired(121, std::string(MapType::kKeyName),
std::make_shared<IntType>()),
iceberg::int32()),
SchemaField::MakeRequired(122, std::string(MapType::kValueName),
std::make_shared<LongType>())),
iceberg::int64())),
"Map of column id to null value count");
inline static const SchemaField kNanValueCounts = SchemaField::MakeOptional(
137, "nan_value_counts",
std::make_shared<MapType>(
SchemaField::MakeRequired(138, std::string(MapType::kKeyName),
std::make_shared<IntType>()),
iceberg::int32()),
SchemaField::MakeRequired(139, std::string(MapType::kValueName),
std::make_shared<LongType>())),
iceberg::int64())),
"Map of column id to number of NaN values in the column");
inline static const SchemaField kLowerBounds = SchemaField::MakeOptional(
125, "lower_bounds",
std::make_shared<MapType>(
SchemaField::MakeRequired(126, std::string(MapType::kKeyName),
std::make_shared<IntType>()),
iceberg::int32()),
SchemaField::MakeRequired(127, std::string(MapType::kValueName),
std::make_shared<BinaryType>())),
iceberg::binary())),
"Map of column id to lower bound");
inline static const SchemaField kUpperBounds = SchemaField::MakeOptional(
128, "upper_bounds",
std::make_shared<MapType>(
SchemaField::MakeRequired(129, std::string(MapType::kKeyName),
std::make_shared<IntType>()),
iceberg::int32()),
SchemaField::MakeRequired(130, std::string(MapType::kValueName),
std::make_shared<BinaryType>())),
iceberg::binary())),
"Map of column id to upper bound");
inline static const SchemaField kKeyMetadata =
SchemaField::MakeOptional(131, "key_metadata", std::make_shared<BinaryType>(),
"Encryption key metadata blob");
inline static const SchemaField kKeyMetadata = SchemaField::MakeOptional(
131, "key_metadata", iceberg::binary(), "Encryption key metadata blob");
inline static const SchemaField kSplitOffsets = SchemaField::MakeOptional(
132, "split_offsets",
std::make_shared<ListType>(SchemaField::MakeRequired(
133, std::string(ListType::kElementName), std::make_shared<LongType>())),
133, std::string(ListType::kElementName), iceberg::int64())),
"Splittable offsets");
inline static const SchemaField kEqualityIds = SchemaField::MakeOptional(
135, "equality_ids",
std::make_shared<ListType>(SchemaField::MakeRequired(
136, std::string(ListType::kElementName), std::make_shared<IntType>())),
136, std::string(ListType::kElementName), iceberg::int32())),
"Equality comparison field IDs");
inline static const SchemaField kSortOrderId = SchemaField::MakeOptional(
140, "sort_order_id", std::make_shared<IntType>(), "Sort order ID");
inline static const SchemaField kFirstRowId =
SchemaField::MakeOptional(142, "first_row_id", std::make_shared<LongType>(),
"Starting row ID to assign to new rows");
inline static const SchemaField kSortOrderId =
SchemaField::MakeOptional(140, "sort_order_id", iceberg::int32(), "Sort order ID");
inline static const SchemaField kFirstRowId = SchemaField::MakeOptional(
142, "first_row_id", iceberg::int64(), "Starting row ID to assign to new rows");
inline static const SchemaField kReferencedDataFile = SchemaField::MakeOptional(
143, "referenced_data_file", std::make_shared<StringType>(),
143, "referenced_data_file", iceberg::string(),
"Fully qualified location (URI with FS scheme) of a data file that all deletes "
"reference");
inline static const SchemaField kContentOffset =
SchemaField::MakeOptional(144, "content_offset", std::make_shared<LongType>(),
SchemaField::MakeOptional(144, "content_offset", iceberg::int64(),
"The offset in the file where the content starts");
inline static const SchemaField kContentSize = SchemaField::MakeOptional(
145, "content_size_in_bytes", std::make_shared<LongType>(),
"The length of referenced content stored in the file");
inline static const SchemaField kContentSize =
SchemaField::MakeOptional(145, "content_size_in_bytes", iceberg::int64(),
"The length of referenced content stored in the file");

static std::shared_ptr<StructType> Type(std::shared_ptr<StructType> partition_type);
};
Expand Down Expand Up @@ -293,13 +289,13 @@ struct ICEBERG_EXPORT ManifestEntry {
DataFile data_file;

inline static const SchemaField kStatus =
SchemaField::MakeRequired(0, "status", std::make_shared<IntType>());
SchemaField::MakeRequired(0, "status", iceberg::int32());
inline static const SchemaField kSnapshotId =
SchemaField::MakeOptional(1, "snapshot_id", std::make_shared<LongType>());
SchemaField::MakeOptional(1, "snapshot_id", iceberg::int64());
inline static const SchemaField kSequenceNumber =
SchemaField::MakeOptional(3, "sequence_number", std::make_shared<LongType>());
SchemaField::MakeOptional(3, "sequence_number", iceberg::int64());
inline static const SchemaField kFileSequenceNumber =
SchemaField::MakeOptional(4, "file_sequence_number", std::make_shared<LongType>());
SchemaField::MakeOptional(4, "file_sequence_number", iceberg::int64());

static std::shared_ptr<StructType> TypeFromPartitionType(
std::shared_ptr<StructType> partition_type);
Expand Down
56 changes: 25 additions & 31 deletions src/iceberg/manifest_list.h
Original file line number Diff line number Diff line change
Expand Up @@ -54,17 +54,15 @@ struct ICEBERG_EXPORT PartitionFieldSummary {
std::optional<std::vector<uint8_t>> upper_bound;

inline static const SchemaField kContainsNull =
SchemaField::MakeRequired(509, "contains_null", std::make_shared<BooleanType>(),
SchemaField::MakeRequired(509, "contains_null", iceberg::boolean(),
"True if any file has a null partition value");
inline static const SchemaField kContainsNaN =
SchemaField::MakeOptional(518, "contains_nan", std::make_shared<BooleanType>(),
SchemaField::MakeOptional(518, "contains_nan", iceberg::boolean(),
"True if any file has a nan partition value");
inline static const SchemaField kLowerBound =
SchemaField::MakeOptional(510, "lower_bound", std::make_shared<BinaryType>(),
"Partition lower bound for all files");
inline static const SchemaField kUpperBound =
SchemaField::MakeOptional(511, "upper_bound", std::make_shared<BinaryType>(),
"Partition upper bound for all files");
inline static const SchemaField kLowerBound = SchemaField::MakeOptional(
510, "lower_bound", iceberg::binary(), "Partition lower bound for all files");
inline static const SchemaField kUpperBound = SchemaField::MakeOptional(
511, "upper_bound", iceberg::binary(), "Partition upper bound for all files");

static const StructType& Type();
};
Expand Down Expand Up @@ -150,48 +148,44 @@ struct ICEBERG_EXPORT ManifestFile {
/// \brief Checks if this manifest file contains entries with DELETED status
bool has_deleted_files() const { return deleted_files_count.value_or(1) > 0; }

inline static const SchemaField kManifestPath =
SchemaField::MakeRequired(500, "manifest_path", std::make_shared<StringType>(),
"Location URI with FS scheme");
inline static const SchemaField kManifestPath = SchemaField::MakeRequired(
500, "manifest_path", iceberg::string(), "Location URI with FS scheme");
inline static const SchemaField kManifestLength = SchemaField::MakeRequired(
501, "manifest_length", std::make_shared<LongType>(), "Total file size in bytes");
501, "manifest_length", iceberg::int64(), "Total file size in bytes");
inline static const SchemaField kPartitionSpecId = SchemaField::MakeRequired(
502, "partition_spec_id", std::make_shared<IntType>(), "Spec ID used to write");
inline static const SchemaField kContent =
SchemaField::MakeOptional(517, "content", std::make_shared<IntType>(),
"Contents of the manifest: 0=data, 1=deletes");
502, "partition_spec_id", iceberg::int32(), "Spec ID used to write");
inline static const SchemaField kContent = SchemaField::MakeOptional(
517, "content", iceberg::int32(), "Contents of the manifest: 0=data, 1=deletes");
inline static const SchemaField kSequenceNumber =
SchemaField::MakeOptional(515, "sequence_number", std::make_shared<LongType>(),
SchemaField::MakeOptional(515, "sequence_number", iceberg::int64(),
"Sequence number when the manifest was added");
inline static const SchemaField kMinSequenceNumber =
SchemaField::MakeOptional(516, "min_sequence_number", std::make_shared<LongType>(),
SchemaField::MakeOptional(516, "min_sequence_number", iceberg::int64(),
"Lowest sequence number in the manifest");
inline static const SchemaField kAddedSnapshotId =
SchemaField::MakeRequired(503, "added_snapshot_id", std::make_shared<LongType>(),
"Snapshot ID that added the manifest");
inline static const SchemaField kAddedSnapshotId = SchemaField::MakeRequired(
503, "added_snapshot_id", iceberg::int64(), "Snapshot ID that added the manifest");
inline static const SchemaField kAddedFilesCount = SchemaField::MakeOptional(
504, "added_files_count", std::make_shared<IntType>(), "Added entry count");
504, "added_files_count", iceberg::int32(), "Added entry count");
inline static const SchemaField kExistingFilesCount = SchemaField::MakeOptional(
505, "existing_files_count", std::make_shared<IntType>(), "Existing entry count");
505, "existing_files_count", iceberg::int32(), "Existing entry count");
inline static const SchemaField kDeletedFilesCount = SchemaField::MakeOptional(
506, "deleted_files_count", std::make_shared<IntType>(), "Deleted entry count");
506, "deleted_files_count", iceberg::int32(), "Deleted entry count");
inline static const SchemaField kAddedRowsCount = SchemaField::MakeOptional(
512, "added_rows_count", std::make_shared<LongType>(), "Added rows count");
512, "added_rows_count", iceberg::int64(), "Added rows count");
inline static const SchemaField kExistingRowsCount = SchemaField::MakeOptional(
513, "existing_rows_count", std::make_shared<LongType>(), "Existing rows count");
513, "existing_rows_count", iceberg::int64(), "Existing rows count");
inline static const SchemaField kDeletedRowsCount = SchemaField::MakeOptional(
514, "deleted_rows_count", std::make_shared<LongType>(), "Deleted rows count");
514, "deleted_rows_count", iceberg::int64(), "Deleted rows count");
inline static const SchemaField kPartitions = SchemaField::MakeOptional(
507, "partitions",
std::make_shared<ListType>(SchemaField::MakeRequired(
508, std::string(ListType::kElementName),
std::make_shared<StructType>(PartitionFieldSummary::Type()))),
"Summary for each partition");
inline static const SchemaField kKeyMetadata =
SchemaField::MakeOptional(519, "key_metadata", std::make_shared<BinaryType>(),
"Encryption key metadata blob");
inline static const SchemaField kKeyMetadata = SchemaField::MakeOptional(
519, "key_metadata", iceberg::binary(), "Encryption key metadata blob");
inline static const SchemaField kFirstRowId = SchemaField::MakeOptional(
520, "first_row_id", std::make_shared<LongType>(),
520, "first_row_id", iceberg::int64(),
"Starting row ID to assign to new rows in ADDED data files");

static const StructType& Type();
Expand Down
Loading
Loading