Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
6adec04
[OAP] [6020 ] Spark sql avg agg function support decimal
liujiayi771 Aug 7, 2023
6ef6f4b
[oap ] Register merge extract companion agg functions without suffix
zhztheplayer Dec 29, 2023
c3a7641
Stream input row to hash table when addInput for left semi and anti join
liujiayi771 Nov 24, 2024
1ed72cb
[11067] Support scan filter for decimal in ORC
rui-mo Mar 14, 2025
63656d5
refactor: Move toValues from InPredicate.cpp to Filter.h
yingsu00 Mar 15, 2025
93e8c24
feat(connector): Support reading Iceberg split with equality deletes
yingsu00 May 1, 2024
400dbe7
Support insert data into iceberg table.
PingLiuPing Oct 3, 2025
4ae41e8
Fix read iceberg table decimal column error
PingLiuPing Aug 19, 2025
c07c745
Add iceberg partition transforms.
PingLiuPing Jul 1, 2025
1ad57aa
Add NaN statistics to parquet writer.
PingLiuPing Sep 4, 2025
681b37a
Collect Iceberg data file statistics in dwio.
PingLiuPing Jul 28, 2025
2b5fa82
Fix incorrect min max stats when the column value are infinity or -in…
PingLiuPing Aug 26, 2025
d86757d
Integrate Iceberg data file statistics and adding unit test.
PingLiuPing Sep 1, 2025
7f248a3
Support write field_id to parquet metadata SchemaElement.
PingLiuPing Sep 5, 2025
a4bb434
Implement iceberg sort order
PingLiuPing May 30, 2025
00fc0c4
Add clustered Iceberg writer mode.
PingLiuPing Sep 1, 2025
004731b
Support struct schema evolution matching by name
rui-mo Mar 18, 2025
bdc3fe3
Allow reading integers into smaller-range types
rui-mo Sep 18, 2025
627db07
fix: Fix smj result mismatch issue in semi, anit and full outer join
zhouyuan Sep 4, 2025
716949e
adding daily tests
zhouyuan Jul 3, 2025
404ed3e
fix: remove website folder to bypass the security issues
zhouyuan Jul 9, 2025
38509b6
Fix Spark timestamp_seconds function
rui-mo Sep 10, 2025
6a3434f
Separate iceberg from Hive
yingsu00 Jun 30, 2025
c816dce
Connector refactor changes By Naveen
nmahadevuni Sep 25, 2025
537a70c
Iceberg refactor to get tests working: Naveen
nmahadevuni Oct 1, 2025
b682b36
Remove IcebergDataSink references
nmahadevuni Oct 6, 2025
e4aa8b6
Delete old hive/iceberg connector
nmahadevuni Oct 8, 2025
7134709
refactor(iceberg): Remove old iceberg references from hive files
nmahadevuni Oct 8, 2025
df10245
Lakehouse iceberg changes for Prestissimo integration
nmahadevuni Oct 8, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
The table of contents is too big for display.
Diff view
Diff view
  •  
  •  
  •  
75 changes: 75 additions & 0 deletions .github/workflows/gluten_daily.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,75 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

name: Gluten Daily Build

on:
push:
branches:
- 'main'

concurrency:
group: ${{ github.repository }}-${{ github.head_ref || github.sha }}-${{ github.workflow }}
cancel-in-progress: true

jobs:

gluten-cpp-build:
name: gluten cpp build
# prevent errors when forks ff their main branch
if: ${{ github.repository == 'IBM/velox' }}
runs-on: ubuntu-22.04
env:
CCACHE_DIR: "${{ github.workspace }}/.ccache"
steps:
- uses: actions/checkout@v4
- name: Get Ccache
uses: actions/cache/restore@v4
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-centos7-release-default-${{github.sha}}
restore-keys: |
ccache-centos7-release-default
- name: Setup Gluten
run: |
git clone --depth 1 https://github.com/apache/incubator-gluten gluten && cd gluten
BRANCH=$(echo ${GITHUB_REF#refs/heads/})
sed -i 's/oap-project/IBM/g' ep/build-velox/src/get_velox.sh
#sed -i 's/VELOX_BRANCH=2025.*/VELOX_BRANCH=main/g' ep/build-velox/src/get_velox.sh
- name: Build Gluten native libraries
run: |
docker pull apache/gluten:vcpkg-centos-7
docker run -v $GITHUB_WORKSPACE:/work -w /work apache/gluten:vcpkg-centos-7 bash -c "
git config --global --add safe.directory /work
set -e
df -a
cd /work/gluten
export CCACHE_DIR=/work/.ccache
mkdir -p /work/.ccache
source /opt/rh/devtoolset-11/enable
source /opt/rh/rh-git227/enable
export NUM_THREADS=4
ccache -sz
./dev/builddeps-veloxbe.sh --enable_vcpkg=ON --build_arrow=OFF --build_tests=OFF --build_benchmarks=OFF \
--build_examples=OFF --enable_s3=ON --enable_gcs=ON --enable_hdfs=ON --enable_abfs=ON --velox_home=/work
ccache -s
mkdir -p /work/.m2/repository/org/apache/arrow/
cp -r /root/.m2/repository/org/apache/arrow/* /work/.m2/repository/org/apache/arrow/
"
- name: Save ccache
uses: actions/cache/save@v4
id: ccache
with:
path: '${{ env.CCACHE_DIR }}'
key: ccache-centos7-release-default-${{github.sha}}
15 changes: 0 additions & 15 deletions .github/workflows/linux-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,21 +15,6 @@
name: Linux Build using GCC

on:
push:
branches:
- main
paths:
- velox/**
- '!velox/docs/**'
- CMakeLists.txt
- CMake/**
- scripts/setup-ubuntu.sh
- scripts/setup-common.sh
- scripts/setup-versions.sh
- scripts/setup-helper-functions.sh
- .github/workflows/linux-build.yml
- .github/workflows/linux-build-base.yml

pull_request:
paths:
- velox/**
Expand Down
5 changes: 4 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,9 @@ set(
)
option(VELOX_ENABLE_EXEC "Build exec." ON)
option(VELOX_ENABLE_AGGREGATES "Build aggregates." ON)
option(VELOX_ENABLE_HIVE_CONNECTOR "Build Hive connector." ON)
option(VELOX_ENABLE_HIVE_CONNECTOR "Build the Hive connector." ON)
#option(VELOX_ENABLE_HIVE_NEW_CONNECTOR "Build the new Hive connector." ON)
#option(VELOX_ENABLE_ICEBERG_CONNECTOR "Build the ICEBERG connector that does NOT depend on the new Hive connector." ON)
option(VELOX_ENABLE_TPCH_CONNECTOR "Build TPC-H connector." ON)
option(VELOX_ENABLE_TPCDS_CONNECTOR "Build TPC-DS connector." ON)
option(VELOX_ENABLE_PRESTO_FUNCTIONS "Build Presto SQL functions." ON)
Expand Down Expand Up @@ -704,6 +706,7 @@ include_directories(.)

# Adding this down here prevents warnings in dependencies from stopping the
# build
set(TREAT_WARNINGS_AS_ERRORS OFF)
if("${TREAT_WARNINGS_AS_ERRORS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Werror")
endif()
Expand Down
2 changes: 1 addition & 1 deletion velox/common/memory/tests/SharedArbitratorTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -755,7 +755,7 @@ DEBUG_ONLY_TEST_P(
folly::EventCount taskPauseWait;
auto taskPauseWaitKey = taskPauseWait.prepareWait();

const auto fakeAllocationSize = kMemoryCapacity - (32L << 20);
const auto fakeAllocationSize = kMemoryCapacity - (2L << 20);

std::atomic<bool> injectAllocationOnce{true};
fakeOperatorFactory_->setAllocationCallback([&](Operator* op) {
Expand Down
2 changes: 2 additions & 0 deletions velox/connectors/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@ velox_link_libraries(velox_connector velox_common_config velox_vector)

add_subdirectory(fuzzer)

add_subdirectory(lakehouse)

if(${VELOX_ENABLE_HIVE_CONNECTOR})
add_subdirectory(hive)
endif()
Expand Down
2 changes: 1 addition & 1 deletion velox/connectors/Connector.h
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,7 @@ class DataSink {
uint64_t recodeTimeNs{0};
uint64_t compressionTimeNs{0};

common::SpillStats spillStats;
velox::common::SpillStats spillStats;

bool empty() const;

Expand Down
3 changes: 0 additions & 3 deletions velox/connectors/hive/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -15,8 +15,6 @@
velox_add_library(velox_hive_config OBJECT HiveConfig.cpp)
velox_link_libraries(velox_hive_config velox_core velox_exception)

add_subdirectory(iceberg)

velox_add_library(
velox_hive_connector
OBJECT
Expand All @@ -35,7 +33,6 @@ velox_add_library(

velox_link_libraries(
velox_hive_connector
PUBLIC velox_hive_iceberg_splitreader
PRIVATE velox_common_io velox_connector velox_dwio_catalog_fbhive velox_hive_partition_function
)

Expand Down
5 changes: 5 additions & 0 deletions velox/connectors/hive/HiveConfig.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -273,4 +273,9 @@ std::string HiveConfig::schema(const config::ConfigBase* session) const {
kSchema, config_->get<std::string>(kSchema, ""));
}

bool HiveConfig::fanoutEnabled(const config::ConfigBase* session) const {
return session->get<bool>(
kFanoutEnabledSession, config_->get<bool>(kFanoutEnabled, true));
}

} // namespace facebook::velox::connector::hive
9 changes: 9 additions & 0 deletions velox/connectors/hive/HiveConfig.h
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,12 @@ class HiveConfig {
static constexpr const char* kSource = "source";
static constexpr const char* kSchema = "schema";

/// Controls the writer mode, whether the fanout mode writer is enabled,
/// default value is true, setting to false means clustered mode.
/// Currently applies only to the Iceberg writer.
static constexpr const char* kFanoutEnabled = "fanout-enabled";
static constexpr const char* kFanoutEnabledSession = "fanout_enabled";

InsertExistingPartitionsBehavior insertExistingPartitionsBehavior(
const config::ConfigBase* session) const;

Expand Down Expand Up @@ -307,6 +313,9 @@ class HiveConfig {
/// Schema of the query. Used for storage logging.
std::string schema(const config::ConfigBase* session) const;

/// Return if fanout writer mode is enabled.
bool fanoutEnabled(const config::ConfigBase* session) const;

HiveConfig(std::shared_ptr<const config::ConfigBase> config) {
VELOX_CHECK_NOT_NULL(
config, "Config is null for HiveConfig initialization");
Expand Down
11 changes: 0 additions & 11 deletions velox/connectors/hive/HiveConnector.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@
#include "velox/connectors/hive/HiveDataSink.h"
#include "velox/connectors/hive/HiveDataSource.h"
#include "velox/connectors/hive/HivePartitionFunction.h"
#include "velox/connectors/hive/iceberg/IcebergDataSink.h"

#include <boost/lexical_cast.hpp>
#include <memory>
Expand Down Expand Up @@ -74,16 +73,6 @@ std::unique_ptr<DataSink> HiveConnector::createDataSink(
ConnectorInsertTableHandlePtr connectorInsertTableHandle,
ConnectorQueryCtx* connectorQueryCtx,
CommitStrategy commitStrategy) {
if (auto icebergInsertHandle =
std::dynamic_pointer_cast<const iceberg::IcebergInsertTableHandle>(
connectorInsertTableHandle)) {
return std::make_unique<iceberg::IcebergDataSink>(
inputType,
icebergInsertHandle,
connectorQueryCtx,
commitStrategy,
hiveConfig_);
}
auto hiveInsertHandle =
std::dynamic_pointer_cast<const HiveInsertTableHandle>(
connectorInsertTableHandle);
Expand Down
9 changes: 9 additions & 0 deletions velox/connectors/hive/HiveConnectorUtil.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,10 @@
#include "velox/expression/ExprConstants.h"
#include "velox/expression/ExprToSubfieldFilter.h"

#include <boost/lexical_cast.hpp>
#include <boost/uuid/uuid_generators.hpp>
#include <boost/uuid/uuid_io.hpp>

namespace facebook::velox::connector::hive {
namespace {

Expand Down Expand Up @@ -944,4 +948,9 @@ core::TypedExprPtr extractFiltersFromRemainingFilter(
}
return expr;
}

std::string makeUuid() {
return boost::lexical_cast<std::string>(boost::uuids::random_generator()());
}

} // namespace facebook::velox::connector::hive
2 changes: 2 additions & 0 deletions velox/connectors/hive/HiveConnectorUtil.h
Original file line number Diff line number Diff line change
Expand Up @@ -116,4 +116,6 @@ core::TypedExprPtr extractFiltersFromRemainingFilter(
common::SubfieldFilters& filters,
double& sampleRate);

std::string makeUuid();

} // namespace facebook::velox::connector::hive
Loading