Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use pluto where possible instead of hic calls #270

Merged
merged 2 commits into from
Mar 28, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,7 @@ add_subdirectory( hic )
find_package(hic REQUIRED)

add_subdirectory( pluto )
find_package(pluto REQUIRED)

include( features/BOUNDSCHECKING )
include( features/FORTRAN )
Expand Down
1 change: 1 addition & 0 deletions cmake/atlas-import.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ endif()

find_dependency( atlas_io HINTS ${CMAKE_CURRENT_LIST_DIR}/../atlas_io @atlas_io_DIR@ @atlas_io_BINARY_DIR@ )
find_dependency( hic HINTS ${CMAKE_CURRENT_LIST_DIR}/../hic @hic_DIR@ @hic_BINARY_DIR@ )
find_dependency( pluto HINTS ${CMAKE_CURRENT_LIST_DIR}/../pluto @pluto_DIR@ @pluto_BINARY_DIR@ )

## Eigen3
set( Eigen3_HINT @Eigen3_DIR@ )
Expand Down
24 changes: 24 additions & 0 deletions pluto/src/pluto/copy.h
Original file line number Diff line number Diff line change
Expand Up @@ -181,4 +181,28 @@ void copy_device_to_host(T& host, const std::unique_ptr<T, D>& device, stream_vi
copy_device_to_host(&host, device.get(), s);
}

template <class T>
void copy_host_to_device_2D(
T* device, std::size_t device_pitch,
const T* host, std::size_t host_pitch,
std::size_t width, std::size_t height) {

memcpy_host_to_device_2D(
device, device_pitch * sizeof(T),
host, host_pitch * sizeof(T),
width * sizeof(T), height);
}

template <class T>
void copy_device_to_host_2D(
T* host, std::size_t host_pitch,
const T* device, std::size_t device_pitch,
std::size_t width, std::size_t height) {

memcpy_device_to_host_2D(
host, host_pitch * sizeof(T),
device, device_pitch * sizeof(T),
width * sizeof(T), height);
}

} // namespace pluto
1 change: 1 addition & 0 deletions src/atlas/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1017,6 +1017,7 @@ ecbuild_add_library( TARGET atlas
atlas_io
hic
hicsparse
pluto
$<${atlas_HAVE_EIGEN}:Eigen3::Eigen>
$<${atlas_HAVE_OMP_CXX}:OpenMP::OpenMP_CXX>
$<${atlas_HAVE_GRIDTOOLS_STORAGE}:GridTools::gridtools>
Expand Down
171 changes: 56 additions & 115 deletions src/atlas/array/native/NativeDataStore.h
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,8 @@
#include <limits> // std::numeric_limits<T>::signaling_NaN
#include <sstream>

#include "pluto/pluto.h"

#include "atlas/array/ArrayDataStore.h"
#include "atlas/library/Library.h"
#include "atlas/library/config.h"
Expand All @@ -24,9 +26,6 @@
#include "atlas/runtime/Log.h"
#include "eckit/log/Bytes.h"

#include "hic/hic.h"


#define ATLAS_ACC_DEBUG 0

//------------------------------------------------------------------------------
Expand Down Expand Up @@ -94,26 +93,15 @@ template <typename Value>
void initialise(Value[], size_t) {}
#endif

static int devices() {
static int devices_ = [](){
int n = 0;
auto err = hicGetDeviceCount(&n);
if (err != hicSuccess) {
n = 0;
static_cast<void>(hicGetLastError());
}
return n;
}();
return devices_;
}

template <typename Value>
class DataStore : public ArrayDataStore {
public:
DataStore(size_t size): size_(size) {
DataStore(size_t size): size_(size),
host_allocator_{pluto::host_resource()},
device_allocator_{pluto::device_resource()} {
allocateHost();
initialise(host_data_, size_);
if (ATLAS_HAVE_GPU && devices()) {
if (ATLAS_HAVE_GPU && pluto::devices()) {
device_updated_ = false;
}
else {
Expand All @@ -127,25 +115,19 @@ class DataStore : public ArrayDataStore {
}

void updateDevice() const override {
if (ATLAS_HAVE_GPU && devices()) {
if (ATLAS_HAVE_GPU && pluto::devices()) {
if (not device_allocated_) {
allocateDevice();
}
hicError_t err = hicMemcpy(device_data_, host_data_, size_*sizeof(Value), hicMemcpyHostToDevice);
if (err != hicSuccess) {
throw_AssertionFailed("Failed to updateDevice: "+std::string(hicGetErrorString(err)), Here());
}
pluto::copy_host_to_device(device_data_, host_data_, size_);
device_updated_ = true;
}
}

void updateHost() const override {
if constexpr (ATLAS_HAVE_GPU) {
if (device_allocated_) {
hicError_t err = hicMemcpy(host_data_, device_data_, size_*sizeof(Value), hicMemcpyDeviceToHost);
if (err != hicSuccess) {
throw_AssertionFailed("Failed to updateHost: "+std::string(hicGetErrorString(err)), Here());
}
pluto::copy_device_to_host(host_data_, device_data_, size_);
host_updated_ = true;
}
}
Expand Down Expand Up @@ -174,32 +156,24 @@ class DataStore : public ArrayDataStore {
bool deviceAllocated() const override { return device_allocated_; }

void allocateDevice() const override {
if (ATLAS_HAVE_GPU && devices()) {
if (ATLAS_HAVE_GPU && pluto::devices()) {
if (device_allocated_) {
return;
}
if (size_) {
hicError_t err = hicMalloc((void**)&device_data_, sizeof(Value)*size_);
if (err != hicSuccess) {
throw_AssertionFailed("Failed to allocate GPU memory: " + std::string(hicGetErrorString(err)), Here());
}
device_data_ = device_allocator_.allocate(size_);
device_allocated_ = true;
accMap();
}
}
}

void deallocateDevice() const override {
if constexpr (ATLAS_HAVE_GPU) {
if (device_allocated_) {
accUnmap();
hicError_t err = hicFree(device_data_);
if (err != hicSuccess) {
throw_AssertionFailed("Failed to deallocate GPU memory: " + std::string(hicGetErrorString(err)), Here());
}
device_data_ = nullptr;
device_allocated_ = false;
}
if (device_allocated_) {
accUnmap();
device_allocator_.deallocate(device_data_,size_);
device_data_ = nullptr;
device_allocated_ = false;
}
}

Expand Down Expand Up @@ -259,36 +233,22 @@ class DataStore : public ArrayDataStore {
throw_Exception(ss.str(), loc);
}

void alloc_aligned(Value*& ptr, size_t n) {
if (n > 0) {
const size_t alignment = 64 * sizeof(Value);
size_t bytes = sizeof(Value) * n;
MemoryHighWatermark::instance() += bytes;

int err = posix_memalign((void**)&ptr, alignment, bytes);
if (err) {
throw_AllocationFailed(bytes, Here());
}
}
else {
ptr = nullptr;
}
}

void free_aligned(Value*& ptr) {
if (ptr) {
free(ptr);
ptr = nullptr;
MemoryHighWatermark::instance() -= footprint();
}
}

void allocateHost() {
alloc_aligned(host_data_, size_);
if (size_ > 0) {
MemoryHighWatermark::instance() += footprint();
host_data_ = host_allocator_.allocate(size_);
}
else {
host_data_ = nullptr;
}
}

void deallocateHost() {
free_aligned(host_data_);
if (host_data_) {
host_allocator_.deallocate(host_data_, size_);
host_data_ = nullptr;
MemoryHighWatermark::instance() -= footprint();
}
}

size_t footprint() const { return sizeof(Value) * size_; }
Expand All @@ -302,6 +262,8 @@ class DataStore : public ArrayDataStore {
mutable bool device_allocated_{false};
mutable bool acc_mapped_{false};

pluto::allocator<Value> host_allocator_;
mutable pluto::allocator<Value> device_allocator_;
};

//------------------------------------------------------------------------------
Expand All @@ -311,22 +273,23 @@ class WrappedDataStore : public ArrayDataStore {
public:

void init_device() {
if (ATLAS_HAVE_GPU && devices()) {
if (ATLAS_HAVE_GPU && pluto::devices()) {
device_updated_ = false;
}
else {
device_data_ = host_data_;
}
}

WrappedDataStore(Value* host_data, size_t size): host_data_(host_data), size_(size) {
WrappedDataStore(Value* host_data, size_t size): host_data_(host_data), size_(size),
device_allocator_{pluto::device_resource()} {
init_device();
}

WrappedDataStore(Value* host_data, const ArraySpec& spec):
host_data_(host_data),
size_(spec.size())
{
size_(spec.size()),
device_allocator_{pluto::device_resource()} {
init_device();
contiguous_ = spec.contiguous();
if (! contiguous_) {
Expand Down Expand Up @@ -363,25 +326,18 @@ class WrappedDataStore : public ArrayDataStore {
}

void updateDevice() const override {
if (ATLAS_HAVE_GPU && devices()) {
if (ATLAS_HAVE_GPU && pluto::devices()) {
if (not device_allocated_) {
allocateDevice();
}
if (contiguous_) {
hicError_t err = hicMemcpy(device_data_, host_data_, size_*sizeof(Value), hicMemcpyHostToDevice);
if (err != hicSuccess) {
throw_AssertionFailed("Failed to updateDevice: "+std::string(hicGetErrorString(err)), Here());
}
pluto::copy_host_to_device(device_data_, host_data_, size_);
}
else {
hicError_t err = hicMemcpy2D(
device_data_, memcpy_h2d_pitch_ * sizeof(Value),
host_data_, memcpy_d2h_pitch_ * sizeof(Value),
memcpy_width_ * sizeof(Value), memcpy_height_,
hicMemcpyHostToDevice);
if (err != hicSuccess) {
throw_AssertionFailed("Failed to updateDevice: "+std::string(hicGetErrorString(err)), Here());
}
pluto::copy_host_to_device_2D(
device_data_, memcpy_h2d_pitch_,
host_data_, memcpy_d2h_pitch_,
memcpy_width_, memcpy_height_);
}
device_updated_ = true;
}
Expand All @@ -391,20 +347,13 @@ class WrappedDataStore : public ArrayDataStore {
if constexpr (ATLAS_HAVE_GPU) {
if (device_allocated_) {
if (contiguous_) {
hicError_t err = hicMemcpy(host_data_, device_data_, size_*sizeof(Value), hicMemcpyDeviceToHost);
if (err != hicSuccess) {
throw_AssertionFailed("Failed to updateHost: "+std::string(hicGetErrorString(err)), Here());
}
pluto::copy_device_to_host(host_data_, device_data_, size_);
}
else {
hicError_t err = hicMemcpy2D(
host_data_, memcpy_d2h_pitch_ * sizeof(Value),
device_data_, memcpy_h2d_pitch_ * sizeof(Value),
memcpy_width_ * sizeof(Value), memcpy_height_,
hicMemcpyDeviceToHost);
if (err != hicSuccess) {
throw_AssertionFailed("Failed to updateHost: "+std::string(hicGetErrorString(err)), Here());
}
pluto::copy_device_to_host_2D(
host_data_, memcpy_d2h_pitch_ ,
device_data_, memcpy_h2d_pitch_,
memcpy_width_, memcpy_height_);
}
host_updated_ = true;
}
Expand Down Expand Up @@ -435,15 +384,12 @@ class WrappedDataStore : public ArrayDataStore {
bool deviceAllocated() const override { return device_allocated_; }

void allocateDevice() const override {
if (ATLAS_HAVE_GPU && devices()) {
if (ATLAS_HAVE_GPU && pluto::devices()) {
if (device_allocated_) {
return;
}
if (size_) {
hicError_t err = hicMalloc((void**)&device_data_, sizeof(Value)*size_);
if (err != hicSuccess) {
throw_AssertionFailed("Failed to allocate GPU memory: " + std::string(hicGetErrorString(err)), Here());
}
device_data_ = device_allocator_.allocate(size_);
device_allocated_ = true;
if (contiguous_) {
accMap();
Expand All @@ -453,18 +399,13 @@ class WrappedDataStore : public ArrayDataStore {
}

void deallocateDevice() const override {
if constexpr (ATLAS_HAVE_GPU) {
if (device_allocated_) {
if (contiguous_) {
accUnmap();
}
hicError_t err = hicFree(device_data_);
if (err != hicSuccess) {
throw_AssertionFailed("Failed to deallocate GPU memory: " + std::string(hicGetErrorString(err)), Here());
}
device_data_ = nullptr;
device_allocated_ = false;
if (device_allocated_) {
if (contiguous_) {
accUnmap();
}
device_allocator_.deallocate(device_data_, size_);
device_data_ = nullptr;
device_allocated_ = false;
}
}

Expand Down Expand Up @@ -505,7 +446,6 @@ class WrappedDataStore : public ArrayDataStore {
}

void accUnmap() const override {
#if ATLAS_HAVE_ACC
if (acc_mapped_) {
ATLAS_ASSERT(atlas::acc::is_present(host_data_, size_ * sizeof(Value)));
if constexpr(ATLAS_ACC_DEBUG) {
Expand All @@ -514,7 +454,6 @@ class WrappedDataStore : public ArrayDataStore {
atlas::acc::unmap(host_data_);
acc_mapped_ = false;
}
#endif
}

private:
Expand All @@ -532,6 +471,8 @@ class WrappedDataStore : public ArrayDataStore {
mutable bool device_updated_{true};
mutable bool device_allocated_{false};
mutable bool acc_mapped_{false};

mutable pluto::allocator<Value> device_allocator_;
};

} // namespace native
Expand Down
Loading
Loading