Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use camp resources to ensure asynchronous copies occur when possible. #204

Open
wants to merge 1 commit into
base: develop
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
58 changes: 44 additions & 14 deletions src/chai/ArrayManager.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -224,6 +224,34 @@ void ArrayManager::resetTouch(PointerRecord* pointer_record)
}
}


/* Not all GPU platform runtimes (notably HIP), will give you asynchronous copies to the device by default, so we leverage
* umpire's API for asynchronous copies using camp resources in this method, based off of the CHAI destination space
* */
static void copy(void * dst_pointer, void * src_pointer, umpire::ResourceManager & manager, ExecutionSpace dst_space, ExecutionSpace src_space) {

#ifdef CHAI_ENABLE_CUDA
camp::resources::Resource device_resource(camp::resources::Cuda::get_default());
#elif defined(CHAI_ENABLE_HIP)
camp::resources::Resource device_resource(camp::resources::Hip::get_default());
#else
camp::resources::Resource device_resource(camp::resources::Host::get_default());
#endif

camp::resources::Resource host_resource(camp::resources::Host::get_default());
if (dst_space == GPU || src_space == GPU) {
// Do the copy using the device resource
manager.copy(dst_pointer, src_pointer, device_resource);
} else {
// Do the copy using the host resource
manager.copy(dst_pointer, src_pointer, host_resource);
}
// Ensure device to host copies are synchronous
if (dst_space == CPU && src_space == GPU) {
device_resource.wait();
}
}

void ArrayManager::move(PointerRecord* record, ExecutionSpace space)
{
if (space == NONE) {
Expand Down Expand Up @@ -251,7 +279,9 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space)
}
#endif

void* src_pointer = record->m_pointers[record->m_last_space];
ExecutionSpace prev_space = record->m_last_space;

void* src_pointer = record->m_pointers[prev_space];
void* dst_pointer = record->m_pointers[space];

if (!dst_pointer) {
Expand All @@ -265,7 +295,7 @@ void ArrayManager::move(PointerRecord* record, ExecutionSpace space)
} else if (dst_pointer != src_pointer) {
// Exclude the copy if src and dst are the same (can happen for PINNED memory)
{
m_resource_manager.copy(dst_pointer, src_pointer);
chai::copy(dst_pointer, src_pointer, m_resource_manager, space, prev_space);
}

callback(record, ACTION_MOVE, space);
Expand Down Expand Up @@ -447,32 +477,32 @@ PointerRecord* ArrayManager::makeManaged(void* pointer,

PointerRecord* ArrayManager::deepCopyRecord(PointerRecord const* record)
{
PointerRecord* copy = new PointerRecord{};
PointerRecord* new_record = new PointerRecord{};
const size_t size = record->m_size;
copy->m_size = size;
copy->m_user_callback = [] (const PointerRecord*, Action, ExecutionSpace) {};
new_record->m_size = size;
new_record->m_user_callback = [] (const PointerRecord*, Action, ExecutionSpace) {};

const ExecutionSpace last_space = record->m_last_space;
copy->m_last_space = last_space;
new_record->m_last_space = last_space;
for (int space = CPU; space < NUM_EXECUTION_SPACES; ++space) {
copy->m_allocators[space] = record->m_allocators[space];
new_record->m_allocators[space] = record->m_allocators[space];
}

allocate(copy, last_space);
allocate(new_record, last_space);

for (int space = CPU; space < NUM_EXECUTION_SPACES; ++space) {
copy->m_owned[space] = true;
copy->m_touched[space] = false;
new_record->m_owned[space] = true;
new_record->m_touched[space] = false;
}

copy->m_touched[last_space] = true;
new_record->m_touched[last_space] = true;

void* dst_pointer = copy->m_pointers[last_space];
void* dst_pointer = new_record->m_pointers[last_space];
void* src_pointer = record->m_pointers[last_space];

m_resource_manager.copy(dst_pointer, src_pointer);
chai::copy(dst_pointer, src_pointer, m_resource_manager, last_space, last_space);

return copy;
return new_record;
}

std::unordered_map<void*, const PointerRecord*>
Expand Down