Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

No-CUDA build #373

Merged
merged 11 commits into from
May 27, 2024
13 changes: 10 additions & 3 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -44,13 +44,13 @@ rapids_cmake_support_conda_env(conda_env MODIFY_PREFIX_PATH)
rapids_cpm_init()

rapids_find_package(
CUDAToolkit REQUIRED
Threads REQUIRED
BUILD_EXPORT_SET kvikio-exports
INSTALL_EXPORT_SET kvikio-exports
)

rapids_find_package(
Threads REQUIRED
CUDAToolkit
BUILD_EXPORT_SET kvikio-exports
INSTALL_EXPORT_SET kvikio-exports
)
Expand Down Expand Up @@ -84,13 +84,20 @@ endif()
add_library(kvikio INTERFACE)
add_library(kvikio::kvikio ALIAS kvikio)

# Enable CUDA in KvikIO
if(CUDAToolkit_FOUND)
target_link_libraries(kvikio INTERFACE CUDA::toolkit)
target_compile_definitions(kvikio INTERFACE KVIKIO_CUDA_FOUND)
else()
message(WARNING "Building KvikIO without CUDA")
endif()

target_include_directories(
kvikio INTERFACE "$<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>"
"$<INSTALL_INTERFACE:include>"
)

target_link_libraries(kvikio INTERFACE Threads::Threads)
target_link_libraries(kvikio INTERFACE CUDA::toolkit)
target_link_libraries(kvikio INTERFACE ${CMAKE_DL_LIBS})
target_compile_features(kvikio INTERFACE cxx_std_17)

Expand Down
56 changes: 39 additions & 17 deletions cpp/examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,32 +13,54 @@
# =============================================================================

set(TEST_INSTALL_PATH bin/tests/libkvikio)
set(TEST_NAME BASIC_IO_TEST)

add_executable(BASIC_IO_TEST basic_io.cpp)
set_target_properties(BASIC_IO_TEST PROPERTIES INSTALL_RPATH "\$ORIGIN/../../lib")
target_include_directories(BASIC_IO_TEST PRIVATE ../include ${cuFile_INCLUDE_DIRS})
target_link_libraries(BASIC_IO_TEST PRIVATE kvikio CUDA::cudart)

# Enable supported cuFile features in KvikIO examples
if(cuFile_FOUND)
target_link_libraries(BASIC_IO_TEST PRIVATE cufile::cuFile_interface)
target_compile_definitions(BASIC_IO_TEST PRIVATE KVIKIO_CUFILE_FOUND)
if(cuFile_BATCH_API_FOUND)
target_compile_definitions(BASIC_IO_TEST PRIVATE KVIKIO_CUFILE_BATCH_API_FOUND)

if(CUDAToolkit_FOUND)
add_executable(BASIC_IO_TEST basic_io.cpp)
set_target_properties(BASIC_IO_TEST PROPERTIES INSTALL_RPATH "\$ORIGIN/../../lib")
target_include_directories(BASIC_IO_TEST PRIVATE ../include ${cuFile_INCLUDE_DIRS})
target_link_libraries(BASIC_IO_TEST PRIVATE kvikio CUDA::cudart)

# Enable supported cuFile features in KvikIO examples
if(cuFile_FOUND)
target_link_libraries(BASIC_IO_TEST PRIVATE cufile::cuFile_interface)
target_compile_definitions(BASIC_IO_TEST PRIVATE KVIKIO_CUFILE_FOUND)
if(cuFile_BATCH_API_FOUND)
target_compile_definitions(BASIC_IO_TEST PRIVATE KVIKIO_CUFILE_BATCH_API_FOUND)
endif()
if(cuFile_STREAM_API_FOUND)
target_compile_definitions(BASIC_IO_TEST PRIVATE KVIKIO_CUFILE_STREAM_API_FOUND)
endif()
endif()
if(cuFile_STREAM_API_FOUND)
target_compile_definitions(BASIC_IO_TEST PRIVATE KVIKIO_CUFILE_STREAM_API_FOUND)

if(CMAKE_COMPILER_IS_GNUCXX)
set(KVIKIO_CXX_FLAGS "-Wall;-Werror;-Wno-unknown-pragmas")
target_compile_options(BASIC_IO_TEST PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${KVIKIO_CXX_FLAGS}>")
endif()

install(
TARGETS BASIC_IO_TEST
COMPONENT testing
DESTINATION ${TEST_INSTALL_PATH}
EXCLUDE_FROM_ALL
)
else()
message(STATUS "Cannot build the basic_io example when CUDA is not found")
endif()

add_executable(BASIC_NO_CUDA_TEST basic_no_cuda.cpp)
set_target_properties(BASIC_NO_CUDA_TEST PROPERTIES INSTALL_RPATH "\$ORIGIN/../../lib")
target_include_directories(BASIC_NO_CUDA_TEST PRIVATE ../include)
target_link_libraries(BASIC_NO_CUDA_TEST PRIVATE kvikio)

if(CMAKE_COMPILER_IS_GNUCXX)
set(KVIKIO_CXX_FLAGS "-Wall;-Werror;-Wno-unknown-pragmas")
target_compile_options(BASIC_IO_TEST PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${KVIKIO_CXX_FLAGS}>")
target_compile_options(
BASIC_NO_CUDA_TEST PRIVATE "$<$<COMPILE_LANGUAGE:CXX>:${KVIKIO_CXX_FLAGS}>"
)
endif()

install(
TARGETS ${TEST_NAME}
TARGETS BASIC_NO_CUDA_TEST
COMPONENT testing
DESTINATION ${TEST_INSTALL_PATH}
EXCLUDE_FROM_ALL
Expand Down
117 changes: 117 additions & 0 deletions cpp/examples/basic_no_cuda.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,117 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <chrono>
#include <iostream>

#include <kvikio/batch.hpp>
#include <kvikio/buffer.hpp>
#include <kvikio/defaults.hpp>
#include <kvikio/driver.hpp>
#include <kvikio/error.hpp>
#include <kvikio/file_handle.hpp>

using namespace std;

class Timer {
public:
Timer() : start(std::chrono::high_resolution_clock::now()) {}

~Timer()
{
auto end = std::chrono::high_resolution_clock::now();
auto start_ms =
std::chrono::time_point_cast<std::chrono::microseconds>(start).time_since_epoch().count();
auto end_ms =
std::chrono::time_point_cast<std::chrono::microseconds>(end).time_since_epoch().count();

cout << "(" << end_ms - start_ms << " us)" << endl;
}

private:
std::chrono::time_point<std::chrono::high_resolution_clock> start;
};

void check(bool condition)
{
if (!condition) {
std::cout << "Error" << std::endl;
exit(-1);
}
}

constexpr int NELEM = 1024; // Number of elements used throughout the test
constexpr int SIZE = NELEM * sizeof(int); // Size of the memory allocations (in bytes)
constexpr int LARGE_SIZE = 8 * SIZE; // LARGE SIZE to test partial submit (in bytes)

int main()
{
cout << "KvikIO defaults: " << endl;
if (kvikio::defaults::compat_mode()) {
cout << " Compatibility mode: enabled" << endl;
} else {
kvikio::DriverInitializer manual_init_driver;
cout << " Compatibility mode: disabled" << endl;
kvikio::DriverProperties props;
cout << "DriverProperties: " << endl;
cout << " nvfs version: " << props.get_nvfs_major_version() << "."
<< props.get_nvfs_minor_version() << endl;
cout << " Allow compatibility mode: " << std::boolalpha << props.get_nvfs_allow_compat_mode()
<< endl;
cout << " Pool mode - enabled: " << std::boolalpha << props.get_nvfs_poll_mode()
<< ", threshold: " << props.get_nvfs_poll_thresh_size() << " kb" << endl;
cout << " Max pinned memory: " << props.get_max_pinned_memory_size() << " kb" << endl;
cout << " Max batch IO size: " << props.get_max_batch_io_size() << endl;
}

std::vector<int> a(SIZE);
std::iota(a.begin(), a.end(), 0);
std::vector<int> b(SIZE);
std::vector<int> c(SIZE);
check(kvikio::is_host_memory(a.data()) == true);

{
cout << endl;
Timer timer;
kvikio::FileHandle file1("/tmp/test-file1", "w");
kvikio::FileHandle file2("/tmp/test-file2", "w");
std::future<std::size_t> fut1 = file1.pwrite(a.data(), SIZE);
std::future<std::size_t> fut2 = file2.pwrite(a.data(), SIZE);
size_t written = fut1.get() + fut2.get();
check(written == SIZE * 2);
check(SIZE == file1.nbytes());
check(SIZE == file2.nbytes());
cout << "Write: " << written << endl;
}
{
std::cout << std::endl;
Timer timer;
kvikio::FileHandle file1("/tmp/test-file1", "r");
kvikio::FileHandle file2("/tmp/test-file2", "r");
std::future<std::size_t> fut1 = file1.pread(b.data(), SIZE);
std::future<std::size_t> fut2 = file2.pread(c.data(), SIZE);
size_t read = fut1.get() + fut2.get();
check(read == SIZE * 2);
check(SIZE == file1.nbytes());
check(SIZE == file2.nbytes());
for (int i = 0; i < NELEM; ++i) {
check(a[i] == b[i]);
check(a[i] == c[i]);
}
cout << "Parallel POSIX read (" << kvikio::defaults::thread_pool_nthreads()
<< " threads): " << read << endl;
}
}
4 changes: 2 additions & 2 deletions cpp/examples/downstream/cmake/get_kvikio.cmake
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# =============================================================================
# Copyright (c) 2022-2023, NVIDIA CORPORATION.
# Copyright (c) 2022-2024, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
# in compliance with the License. You may obtain a copy of the License at
Expand Down Expand Up @@ -30,4 +30,4 @@ function(find_and_configure_kvikio MIN_VERSION)

endfunction()

find_and_configure_kvikio("22.10")
find_and_configure_kvikio("24.06")
madsbk marked this conversation as resolved.
Show resolved Hide resolved
30 changes: 27 additions & 3 deletions cpp/include/kvikio/shim/cuda.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2022-2023, NVIDIA CORPORATION.
* Copyright (c) 2022-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -15,8 +15,7 @@
*/
#pragma once

#include <cuda.h>

#include <kvikio/shim/cuda_h_wrapper.hpp>
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

suggestion/nit: Shall we call this kvikio/shim/cuda_h.hpp ? Or maybe shim/cuda.hpp ?

Copy link
Member Author

@madsbk madsbk May 9, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We already have kvikio/shim/cuda.hpp and kvikio/shim/cufile.hpp.
Should we rename the wrappers to:

kvikio/shim/cuda_h.hpp
kvikio/shim/cufile_h.hpp

Or maybe

kvikio/shim/cuda_stub.hpp
kvikio/shim/cufile_stub.hpp

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think I prefer XXX_stub.hpp over XXX_h.hpp. Though neither of them are fully satisfactory. I guess my only quibble with wrapper is that it suggests something that the header file is not doing: in the case where the real header is available, it just #includes that and no more.

#include <kvikio/shim/utils.hpp>

namespace kvikio {
Expand Down Expand Up @@ -49,6 +48,7 @@ class cudaAPI {
decltype(cuStreamSynchronize)* StreamSynchronize{nullptr};

private:
#ifdef KVIKIO_CUDA_FOUND
cudaAPI()
{
void* lib = load_library("libcuda.so.1");
Expand All @@ -73,6 +73,9 @@ class cudaAPI {
get_symbol(DevicePrimaryCtxRelease, lib, KVIKIO_STRINGIFY(cuDevicePrimaryCtxRelease));
get_symbol(StreamSynchronize, lib, KVIKIO_STRINGIFY(cuStreamSynchronize));
}
#else
cudaAPI() { throw std::runtime_error("KvikIO not compiled with cuFile.h"); }
madsbk marked this conversation as resolved.
Show resolved Hide resolved
#endif

public:
cudaAPI(cudaAPI const&) = delete;
Expand All @@ -85,4 +88,25 @@ class cudaAPI {
}
};

/**
* @brief Check if the cuFile library is available
*
* Notice, this doesn't check if the runtime environment supports cuFile.
*
* @return The boolean answer
*/
#ifdef KVIKIO_CUDA_FOUND
inline bool is_cuda_available()
{
try {
cudaAPI::instance();
} catch (const std::runtime_error&) {
return false;
}
return true;
}
#else
constexpr bool is_cuda_available() { return false; }
#endif

} // namespace kvikio
64 changes: 64 additions & 0 deletions cpp/include/kvikio/shim/cuda_h_wrapper.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
/*
* Copyright (c) 2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

/**
* In order to support compilation when `cuda.h` isn't available, we
* wrap all use of cuda in a `#ifdef KVIKIO_CUDA_FOUND` guard.
*
* The motivation here is to make KvikIO work in all circumstances so
* that libraries doesn't have to implement there own fallback solutions.
*/
#ifdef KVIKIO_CUDA_FOUND
#include <cuda.h>
#else

// If CUDA isn't defined, we define some of the data types here.
// Notice, this doesn't need to be ABI compatible with the CUDA definitions.

using CUresult = int;
using CUdeviceptr = unsigned long long;
using CUdevice = int;
using CUcontext = void*;
using CUstream = void*;

#define CUDA_ERROR_STUB_LIBRARY 0
#define CUDA_SUCCESS 0
#define CUDA_ERROR_INVALID_VALUE 0
#define CU_POINTER_ATTRIBUTE_CONTEXT 0
#define CU_POINTER_ATTRIBUTE_DEVICE_ORDINAL 0
#define CU_POINTER_ATTRIBUTE_DEVICE_POINTER 0
#define CU_MEMHOSTREGISTER_PORTABLE 0

CUresult cuInit(...);
CUresult cuMemHostAlloc(...);
CUresult cuMemFreeHost(...);
CUresult cuMemcpyHtoD(...);
CUresult cuMemcpyDtoH(...);
CUresult cuPointerGetAttribute(...);
CUresult cuPointerGetAttributes(...);
CUresult cuCtxPushCurrent(...);
CUresult cuCtxPopCurrent(...);
CUresult cuCtxGetCurrent(...);
CUresult cuMemGetAddressRange(...);
CUresult cuGetErrorName(...);
CUresult cuGetErrorString(...);
CUresult cuDeviceGet(...);
CUresult cuDevicePrimaryCtxRetain(...);
CUresult cuDevicePrimaryCtxRelease(...);
CUresult cuStreamSynchronize(...);

#endif
3 changes: 2 additions & 1 deletion cpp/include/kvikio/shim/cufile_h_wrapper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,10 @@
*/
#pragma once

#include <cuda.h>
#include <sys/types.h>

#include <kvikio/shim/cuda_h_wrapper.hpp>

/**
* In order to support compilation when `cufile.h` isn't available, we
* wrap all use of cufile in a `#ifdef KVIKIO_CUFILE_FOUND` guard.
Expand Down
Loading
Loading