Skip to content

Commit

Permalink
Add support for PyVelox - python velox bindings. (facebookincubator#3077
Browse files Browse the repository at this point in the history
)

Summary:
This PR adds :

1. Creates the PyVelox python package/ extension
2. Only has basic type definitions currently and related tests
3. Supports creation of the package via setup.py
4. Note: Some of pybind template magic requires type definitions to be in header since otherwise we see weird errors in metas internal systems.

I will follow up subsequently with PRs that add:

1. CI support to Build python package on every PR
2. Creation of vectors and registration of udfs etc. (in collaboration with Voltron).
3. Expression eval.

If you have python installed, you can pull this PR and try out the build as follows:

```
$ DEBUG=1 python setup.py develop

kpai@kpai-mbp /Users/kpai/src/Velox [pyvelox2]% python
Python 3.9.7 (default, Sep 16 2021, 08:50:36)
[Clang 10.0.0 ] :: Anaconda, Inc. on darwin
Type "help", "copyright", "credits" or "license" for more information.
>>> import pyvelox.pyvelox as pv
>>> pv.BooleanType()
<pyvelox.pyvelox.BooleanType object at 0x7fc190121830>
>>>
```

Pull Request resolved: facebookincubator#3077

Reviewed By: pedroerp

Differential Revision: D41475596

Pulled By: kgpai

fbshipit-source-id: 09ea8cb33a95a9cfb42eda8cd900dab52f6b96a1
  • Loading branch information
kgpai authored and facebook-github-bot committed Nov 22, 2022
1 parent d188613 commit 5d4db25
Show file tree
Hide file tree
Showing 11 changed files with 603 additions and 2 deletions.
22 changes: 20 additions & 2 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,24 @@ if(${VELOX_ENABLE_BENCHMARKS} OR ${VELOX_ENABLE_BENCHMARKS_BASIC})
set(VELOX_BUILD_TEST_UTILS ON)
endif()

if(${VELOX_BUILD_PYTHON_PACKAGE})
set(VELOX_BUILD_TESTING OFF)
set(VELOX_ENABLE_PRESTO_FUNCTIONS ON)
set(VELOX_ENABLE_DUCKDB OFF)
set(VELOX_ENABLE_EXPRESSION ON)
set(VELOX_ENABLE_PARSE OFF)
set(VELOX_ENABLE_EXEC OFF)
set(VELOX_ENABLE_AGGREGATES OFF)
set(VELOX_ENABLE_HIVE_CONNECTOR OFF)
set(VELOX_ENABLE_TPCH_CONNECTOR OFF)
set(VELOX_ENABLE_SPARK_FUNCTIONS OFF)
set(VELOX_ENABLE_EXAMPLES OFF)
set(VELOX_ENABLE_S3 OFF)
set(VELOX_ENABLE_SUBSTRAIT OFF)
set(VELOX_CODEGEN_SUPPORT OFF)
set(VELOX_ENABLE_BENCHMARKS_BASIC OFF)
endif()

if(VELOX_ENABLE_S3)
# Set AWS_ROOT_DIR if you have a custom install location of AWS SDK CPP.
if(AWSSDK_ROOT_DIR)
Expand Down Expand Up @@ -291,10 +309,10 @@ if(CMAKE_SYSTEM_NAME MATCHES "Darwin")
link_directories("${ICU_INCLUDE_DIRS}/../lib")
endif()

if(VELOX_BUILD_PYTHON_PACKAGE)
message(STATUS "Adding pybind11")
if(${VELOX_BUILD_PYTHON_PACKAGE})
set(pybind11_SOURCE AUTO)
resolve_dependency(pybind11 REQUIRED_VERSION 2.10.0)
add_subdirectory(pyvelox)
endif()

# Locate or build folly.
Expand Down
11 changes: 11 additions & 0 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -62,6 +62,8 @@ CPU_TARGET ?= "avx"
FUZZER_SEED ?= 123456
FUZZER_DURATION_SEC ?= 60

PYTHON_EXECUTABLE ?= $(shell which python)

all: release #: Build the release version

clean: #: Delete all build artifacts
Expand Down Expand Up @@ -145,3 +147,12 @@ help: #: Show the help messages
@cat $(firstword $(MAKEFILE_LIST)) | \
awk '/^[-a-z]+:/' | \
awk -F: '{ printf("%-20s %s\n", $$1, $$NF) }'

python-clean:
DEBUG=1 ${PYTHON_EXECUTABLE} setup.py clean

python-build:
DEBUG=1 ${PYTHON_EXECUTABLE} setup.py develop

python-test: python-build
DEBUG=1 ${PYTHON_EXECUTABLE} -m unittest -v
32 changes: 32 additions & 0 deletions pyvelox/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

if(VELOX_BUILD_PYTHON_PACKAGE)
message("Creating pyvelox module")
include_directories(SYSTEM ${CMAKE_SOURCE_DIR})
add_definitions(-DCREATE_PYVELOX_MODULE)
# Define our Python module:
pybind11_add_module(pyvelox MODULE pyvelox.cpp pyvelox.h)

# Link with Velox:
target_link_libraries(pyvelox PRIVATE velox_type)

install(TARGETS pyvelox LIBRARY DESTINATION .)
else()
# Torcharrow will not use pyvelox as an extension module for compatibility
# reasons.
message("Creating pyvelox library")
add_library(pyvelox pyvelox.cpp pyvelox.h)
target_link_libraries(pyvelox velox_type pybind11::module)
endif()
45 changes: 45 additions & 0 deletions pyvelox/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# PyVelox: Python bindings and extensions for Velox

**This library is currently in Alpha stage and does not have a stable release. The API and implementation may change based on
user feedback or performance. Future changes may not be backward compatible.
If you have suggestions on the API or use cases you'd like to be covered, please open a
GitHub issue. We'd love to hear thoughts and feedback.**


## Prerequisites

You will need Python 3.7 or later. Also, we highly recommend installing an [Miniconda](https://docs.conda.io/en/latest/miniconda.html#latest-miniconda-installer-links) environment.

First, set up an environment. If you are using conda, create a conda environment:
```
conda create --name pyveloxenv python=3.7
conda activate pyveloxenv
```


### From Source

Currently PyVelox can only be built from source. You will need Python 3.7 or later and a C++17 compiler.


#### Install Dependencies

On macOS

[HomeBrew](https://brew.sh/) is required to install development tools on macOS.
Run the script referenced [here](https://github.com/facebookincubator/velox#setting-up-on-macos) to install all the mac specific dependencies.

On Linux
Run the script referenced [here](https://github.com/facebookincubator/velox#setting-up-on-linux-ubuntu-2004-or-later) to install on linux.


#### Install PyVelox
For local development, you can build with debug mode:
```
DEBUG=1 python setup.py develop
```

And run unit tests with
```
python -m unittest -v
```
13 changes: 13 additions & 0 deletions pyvelox/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
40 changes: 40 additions & 0 deletions pyvelox/pyvelox.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "pyvelox.h" // @manual

namespace facebook::velox::py {
using namespace velox;
namespace py = pybind11;

std::string serializeType(const std::shared_ptr<const velox::Type>& type) {
const auto& obj = type->serialize();
return folly::json::serialize(obj, velox::getSerializationOptions());
}

#ifdef CREATE_PYVELOX_MODULE
PYBIND11_MODULE(pyvelox, m) {
m.doc() = R"pbdoc(
PyVelox native code module
-----------------------
)pbdoc";

addVeloxBindings(m);

m.attr("__version__") = "dev";
}
#endif
} // namespace facebook::velox::py
171 changes: 171 additions & 0 deletions pyvelox/pyvelox.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
/*
* Copyright (c) Facebook, Inc. and its affiliates.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#pragma once

#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#include <pybind11/stl_bind.h>
#include <velox/type/Type.h>
#include "folly/json.h"

namespace facebook::velox::py {

std::string serializeType(const std::shared_ptr<const velox::Type>& type);

/// Adds Velox Python Bindings to the module m.
///
/// This function adds the following bindings:
/// * velox::TypeKind enum
/// * velox::Type and its derived types
/// * Basic functions on Type and its derived types.
///
/// @param m Module to add bindings too.
/// @param asLocalModule If true then these bindings are only visible inside
/// the module. Refer to
/// https://pybind11.readthedocs.io/en/stable/advanced/classes.html#module-local-class-bindings
/// for further details.
inline void addVeloxBindings(pybind11::module& m, bool asLocalModule = true) {
// Inlining these bindings since adding them to the cpp file results in a
// ASAN error.
using namespace velox;
namespace py = pybind11;

// Add TypeKind enum.
py::enum_<velox::TypeKind>(m, "TypeKind", py::module_local(asLocalModule))
.value("BOOLEAN", velox::TypeKind::BOOLEAN)
.value("TINYINT", velox::TypeKind::TINYINT)
.value("SMALLINT", velox::TypeKind::SMALLINT)
.value("INTEGER", velox::TypeKind::INTEGER)
.value("BIGINT", velox::TypeKind::BIGINT)
.value("REAL", velox::TypeKind::REAL)
.value("DOUBLE", velox::TypeKind::DOUBLE)
.value("VARCHAR", velox::TypeKind::VARCHAR)
.value("VARBINARY", velox::TypeKind::VARBINARY)
.value("TIMESTAMP", velox::TypeKind::TIMESTAMP)
.value("OPAQUE", velox::TypeKind::OPAQUE)
.value("ARRAY", velox::TypeKind::ARRAY)
.value("MAP", velox::TypeKind::MAP)
.value("ROW", velox::TypeKind::ROW)
.export_values();

// Create VeloxType bound to velox::Type.
py::class_<Type, std::shared_ptr<Type>> type(
m, "VeloxType", py::module_local(asLocalModule));

// Adding all the derived types of Type here.
py::class_<BooleanType, Type, std::shared_ptr<BooleanType>> booleanType(
m, "BooleanType", py::module_local(asLocalModule));
py::class_<IntegerType, Type, std::shared_ptr<IntegerType>> integerType(
m, "IntegerType", py::module_local(asLocalModule));
py::class_<BigintType, Type, std::shared_ptr<BigintType>> bigintType(
m, "BigintType", py::module_local(asLocalModule));
py::class_<SmallintType, Type, std::shared_ptr<SmallintType>> smallintType(
m, "SmallintType", py::module_local(asLocalModule));
py::class_<TinyintType, Type, std::shared_ptr<TinyintType>> tinyintType(
m, "TinyintType", py::module_local(asLocalModule));
py::class_<RealType, Type, std::shared_ptr<RealType>> realType(
m, "RealType", py::module_local(asLocalModule));
py::class_<DoubleType, Type, std::shared_ptr<DoubleType>> doubleType(
m, "DoubleType", py::module_local(asLocalModule));
py::class_<TimestampType, Type, std::shared_ptr<TimestampType>> timestampType(
m, "TimestampType", py::module_local(asLocalModule));
py::class_<VarcharType, Type, std::shared_ptr<VarcharType>> varcharType(
m, "VarcharType", py::module_local(asLocalModule));
py::class_<VarbinaryType, Type, std::shared_ptr<VarbinaryType>> varbinaryType(
m, "VarbinaryType", py::module_local(asLocalModule));
py::class_<ArrayType, Type, std::shared_ptr<ArrayType>> arrayType(
m, "ArrayType", py::module_local(asLocalModule));
py::class_<MapType, Type, std::shared_ptr<MapType>> mapType(
m, "MapType", py::module_local(asLocalModule));
py::class_<RowType, Type, std::shared_ptr<RowType>> rowType(
m, "RowType", py::module_local(asLocalModule));
py::class_<FixedSizeArrayType, Type, std::shared_ptr<FixedSizeArrayType>>
fixedArrayType(m, "FixedSizeArrayType", py::module_local(asLocalModule));

// Basic operations on Type.
type.def("__str__", &Type::toString);
// Gcc doesnt support the below kind of templatization.
#if defined(__clang__)
// Adds equality and inequality comparison operators.
type.def(py::self == py::self);
type.def(py::self != py::self);
#endif
type.def(
"cpp_size_in_bytes",
&Type::cppSizeInBytes,
"Return the C++ size in bytes");
type.def(
"is_fixed_width",
&Type::isFixedWidth,
"Check if the type is fixed width");
type.def(
"is_primitive_type",
&Type::isPrimitiveType,
"Check if the type is a primitive type");
type.def("kind", &Type::kind, "Returns the kind of the type");
type.def("serialize", &serializeType, "Serializes the type as JSON");

booleanType.def(py::init());
tinyintType.def(py::init());
smallintType.def(py::init());
integerType.def(py::init());
bigintType.def(py::init());
realType.def(py::init());
doubleType.def(py::init());
varcharType.def(py::init());
varbinaryType.def(py::init());
timestampType.def(py::init());
arrayType.def(py::init<std::shared_ptr<Type>>());
arrayType.def(
"element_type", &ArrayType::elementType, "Return the element type");
fixedArrayType.def(py::init<int, velox::TypePtr>())
.def("element_type", &velox::FixedSizeArrayType::elementType)
.def("fixed_width", &velox::FixedSizeArrayType::fixedElementsWidth);
mapType.def(py::init<std::shared_ptr<Type>, std::shared_ptr<Type>>());
mapType.def("key_type", &MapType::keyType, "Return the key type");
mapType.def("value_type", &MapType::valueType, "Return the value type");

rowType.def(py::init<
std::vector<std::string>,
std::vector<std::shared_ptr<const Type>>>());
rowType.def("size", &RowType::size, "Return the number of columns");
rowType.def(
"child_at",
&RowType::childAt,
"Return the type of the column at a given index",
py::arg("idx"));
rowType.def(
"find_child",
[](const std::shared_ptr<RowType>& type, const std::string& name) {
return type->findChild(name);
},
"Return the type of the column with the given name",
py::arg("name"));
rowType.def(
"get_child_idx",
&RowType::getChildIdx,
"Return the index of the column with the given name",
py::arg("name"));
rowType.def(
"name_of",
&RowType::nameOf,
"Return the name of the column at the given index",
py::arg("idx"));
rowType.def("names", &RowType::names, "Return the names of the columns");
}

} // namespace facebook::velox::py
13 changes: 13 additions & 0 deletions pyvelox/test/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright (c) Facebook, Inc. and its affiliates.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
Loading

0 comments on commit 5d4db25

Please sign in to comment.