Skip to content

Memleak stuff #62

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Draft
wants to merge 11 commits into
base: sycl
Choose a base branch
from
1 change: 1 addition & 0 deletions sycl/include/sycl/buffer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ class buffer : public detail::buffer_plain,
buffer &operator=(buffer &&rhs) = default;

~buffer() {
CPOUT << "~buffer()" << std::endl;
try {
buffer_plain::handleRelease();
} catch (std::exception &e) {
Expand Down
4 changes: 4 additions & 0 deletions sycl/include/sycl/detail/common.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@
#include <type_traits> // for enable_if_t
#include <utility> // for index_sequence, make_i...

// CP
//#define CPOUT std::clog
#define CPOUT std::clog.rdbuf(NULL); std::clog

// Default signature enables the passing of user code location information to
// public methods as a default argument.
namespace sycl {
Expand Down
3 changes: 3 additions & 0 deletions sycl/include/sycl/kernel_bundle.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -541,6 +541,9 @@ kernel_bundle<State> get_kernel_bundle(const context &Ctx) {
return get_kernel_bundle<State>(Ctx, Ctx.get_devices());
}

// CP
__SYCL_EXPORT void test_release(sycl::context &Ctx, ur_native_handle_t NativeHandle);

namespace detail {

// Internal non-template versions of get_kernel_bundle API which is used by
Expand Down
2 changes: 2 additions & 0 deletions sycl/include/sycl/property_list.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@
#include <type_traits> // for conditional_t, enable...
#include <vector> // for vector



namespace sycl {
inline namespace _V1 {
namespace ext::oneapi {
Expand Down
2 changes: 2 additions & 0 deletions sycl/source/detail/buffer_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -140,9 +140,11 @@ class buffer_impl final : public SYCLMemObjT {
MemObjType getType() const override { return MemObjType::Buffer; }

~buffer_impl() {
CPOUT << "~buffer_impl" << std::endl;
try {
BaseT::updateHostMemory();
} catch (...) {
std::cout << "exception during updateHostMemory() called from ~buffer_impl" << std::endl;
}
destructorNotification(this);
}
Expand Down
8 changes: 8 additions & 0 deletions sycl/source/detail/context_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ context_impl::context_impl(const device &Device, async_handler AsyncHandler,
MContext(nullptr),
MPlatform(detail::getSyclObjImpl(Device.get_platform())),
MPropList(PropList), MSupportBufferLocationByDevices(NotChecked) {
// CP
CPOUT << "context_impl(dev, async, plist) constructor" << std::endl;
verifyProps(PropList);
MKernelProgramCache.setContextPtr(this);
}
Expand All @@ -43,6 +45,8 @@ context_impl::context_impl(const std::vector<sycl::device> Devices,
: MOwnedByRuntime(true), MAsyncHandler(AsyncHandler), MDevices(Devices),
MContext(nullptr), MPlatform(), MPropList(PropList),
MSupportBufferLocationByDevices(NotChecked) {
// CP
CPOUT << "context_impl(devices, async, plist) constructor" << std::endl;
verifyProps(PropList);
MPlatform = detail::getSyclObjImpl(MDevices[0].get_platform());
std::vector<ur_device_handle_t> DeviceIds;
Expand Down Expand Up @@ -76,6 +80,8 @@ context_impl::context_impl(ur_context_handle_t UrContext,
: MOwnedByRuntime(OwnedByRuntime), MAsyncHandler(AsyncHandler),
MDevices(DeviceList), MContext(UrContext), MPlatform(),
MSupportBufferLocationByDevices(NotChecked) {
// CP
CPOUT << "context_impl(UrContext, async, Adapter, DeviceList, OwnedByRuntime) constructor" << std::endl;
if (!MDevices.empty()) {
MPlatform = detail::getSyclObjImpl(MDevices[0].get_platform());
} else {
Expand Down Expand Up @@ -126,6 +132,8 @@ cl_context context_impl::get() const {
}

context_impl::~context_impl() {
// CP
CPOUT << "~context_impl() called" << std::endl;
try {
// Free all events associated with the initialization of device globals.
for (auto &DeviceGlobalInitializer : MDeviceGlobalInitializers)
Expand Down
2 changes: 2 additions & 0 deletions sycl/source/detail/context_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
#include <optional>
#include <set>



namespace sycl {
inline namespace _V1 {
// Forward declaration
Expand Down
1 change: 1 addition & 0 deletions sycl/source/detail/error_handling/error_handling.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -192,6 +192,7 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl,

for (size_t I = 0; I < 3; ++I) {
if (MaxThreadsPerBlock[I] < NDRDesc.LocalSize[I]) {
CPOUT << "---- THROWING ---- " << std::endl;
throw sycl::exception(make_error_code(errc::nd_range),
"The number of work-items in each dimension of a "
"work-group cannot exceed {" +
Expand Down
8 changes: 7 additions & 1 deletion sycl/source/detail/event_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,8 @@ void event_impl::initContextIfNeeded() {
}

event_impl::~event_impl() {
// CP
CPOUT << "~event_impl() called" << std::endl;
try {
auto Handle = this->getHandle();
if (Handle)
Expand Down Expand Up @@ -145,7 +147,8 @@ void event_impl::setContextImpl(const ContextImplPtr &Context) {
event_impl::event_impl(ur_event_handle_t Event, const context &SyclContext)
: MEvent(Event), MContext(detail::getSyclObjImpl(SyclContext)),
MIsFlushed(true), MState(HES_Complete) {

// CP
CPOUT << "event_impl(ur_event_handle_t, context )" << std::endl;
ur_context_handle_t TempContext;
getAdapter()->call<UrApiKind::urEventGetInfo>(
this->getHandle(), UR_EVENT_INFO_CONTEXT, sizeof(ur_context_handle_t),
Expand All @@ -163,6 +166,9 @@ event_impl::event_impl(const QueueImplPtr &Queue)
: MQueue{Queue}, MIsProfilingEnabled{!Queue || Queue->MIsProfilingEnabled},
MFallbackProfiling{MIsProfilingEnabled && Queue &&
Queue->isProfilingFallback()} {
// CP
CPOUT << "event_impl(QueueImplPtr)" << std::endl;

if (Queue)
this->setContextImpl(Queue->getContextImplPtr());
else {
Expand Down
2 changes: 2 additions & 0 deletions sycl/source/detail/event_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,8 @@ class event_impl {
// ONEAPI_DEVICE_SELECTOR. Deferring may lead to conficts with noexcept
// event methods. This ::get() call uses static vars to read and parse the
// ODS env var exactly once.
// CP
CPOUT << "event_impl<HES_Complte>() constructor" << std::endl;
SYCLConfig<ONEAPI_DEVICE_SELECTOR>::get();
}

Expand Down
46 changes: 24 additions & 22 deletions sycl/source/detail/global_handler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -60,10 +60,6 @@ class ObjectUsageCounter {

LockGuard Guard(GlobalHandler::MSyclGlobalHandlerProtector);
MCounter--;
GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr();
if (RTGlobalObjHandler) {
RTGlobalObjHandler->prepareSchedulerToRelease(!MCounter);
}
} catch (std::exception &e) {
__SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~ObjectUsageCounter", e);
}
Expand Down Expand Up @@ -234,6 +230,8 @@ void GlobalHandler::releaseDefaultContexts() {
// Note that on Windows the destruction of the default context
// races with the detaching of the DLL object that calls urLoaderTearDown.

CPOUT << "releaseDefaultContext()" << std::endl;

MPlatformToDefaultContextCache.Inst.reset(nullptr);
}

Expand All @@ -242,7 +240,10 @@ struct EarlyShutdownHandler {
try {
#ifdef _WIN32
// on Windows we keep to the existing shutdown procedure
GlobalHandler::instance().releaseDefaultContexts();
//GlobalHandler::instance().endDeferredRelease();
//GlobalHandler::instance().releaseDefaultContexts();
//shutdown_early();
//shutdown_late();
#else
shutdown_early();
#endif
Expand Down Expand Up @@ -284,21 +285,22 @@ void GlobalHandler::unloadAdapters() {
}

void GlobalHandler::prepareSchedulerToRelease(bool Blocking) {
// CP - fix part 1
#ifndef _WIN32
if (Blocking)
drainThreadPool();
#endif
if (MScheduler.Inst)
MScheduler.Inst->releaseResources(Blocking ? BlockingT::BLOCKING
: BlockingT::NON_BLOCKING);
#endif
}

void GlobalHandler::drainThreadPool() {
if (MHostTaskThreadPool.Inst)
MHostTaskThreadPool.Inst->drain();
}

#ifdef _WIN32
//#ifdef _WIN32
// because of something not-yet-understood on Windows
// threads may be shutdown once the end of main() is reached
// making an orderly shutdown difficult. Fortunately, Windows
Expand All @@ -309,7 +311,7 @@ void shutdown_win() {
GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
Handler->unloadAdapters();
}
#else
//#else
void shutdown_early() {
const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector};
GlobalHandler *&Handler = GlobalHandler::getInstancePtr();
Expand All @@ -321,8 +323,10 @@ void shutdown_early() {

// Ensure neither host task is working so that no default context is accessed
// upon its release
CPOUT << "shutdown_early() about to prepareSchedulerToRelease" << std::endl;
Handler->prepareSchedulerToRelease(true);

CPOUT << "shutdown_early() about to finishAndWait()" << std::endl;
if (Handler->MHostTaskThreadPool.Inst)
Handler->MHostTaskThreadPool.Inst->finishAndWait();

Expand Down Expand Up @@ -353,9 +357,18 @@ void shutdown_late() {
delete Handler;
Handler = nullptr;
}
#endif
//#endif

#ifdef _WIN32
// a simple wrapper to catch and stream any exception then continue
template <typename F>
void safe_call(F func) {
try {
func();
} catch (const std::exception& e) {
std::cerr << "exception in DllMain DLL_PROCESS_DETACH " << e.what() << std::endl;
}
}
extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL,
DWORD fdwReason,
LPVOID lpReserved) {
Expand All @@ -374,19 +387,8 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL,
if (PrintUrTrace)
std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl;

#ifdef XPTI_ENABLE_INSTRUMENTATION
if (xptiTraceEnabled())
return TRUE; // When doing xpti tracing, we can't safely call shutdown.
// TODO: figure out what XPTI is doing that prevents
// release.
#endif

try {
shutdown_win();
} catch (std::exception &e) {
__SYCL_REPORT_EXCEPTION_TO_STREAM("exception in shutdown_win", e);
return FALSE;
}
safe_call([](){ shutdown_early(); });
safe_call([](){ shutdown_late(); });
break;
case DLL_PROCESS_ATTACH:
if (PrintUrTrace)
Expand Down
17 changes: 17 additions & 0 deletions sycl/source/detail/kernel_bundle_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -533,6 +533,23 @@ class kernel_bundle_impl {
ContextImpl->getHandleRef(), spirv.data(), spirv.size(), nullptr,
&UrProgram);
// program created by urProgramCreateWithIL is implicitly retained.

// -------------------------------------
// CP - adding to try an force an imbalance
Adapter->call<detail::UrApiKind::urProgramRetain>(UrProgram);

// rebalance:
// this works.
// Adapter->call<detail::UrApiKind::urProgramRelease>(UrProgram);

// this ALSO works. So much for my theory.
detail::UrFuncInfo<detail::UrApiKind::urProgramRelease> programReleaseInfo;
auto programRelease = programReleaseInfo.getFuncPtrFromModule(detail::ur::getURLoaderLibrary());
programRelease(UrProgram);

// -------------------------------------


if (UrProgram == nullptr)
throw sycl::exception(
sycl::make_error_code(errc::invalid),
Expand Down
6 changes: 6 additions & 0 deletions sycl/source/detail/kernel_impl.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr Context,
Context)),
MCreatedFromSource(true), MKernelBundleImpl(std::move(KernelBundleImpl)),
MIsInterop(true), MKernelArgMaskPtr{ArgMask} {
// CP
CPOUT << "kernel_impl(kernel, context, bundle, argmas) constructor" << std::endl;
ur_context_handle_t UrContext = nullptr;
// Using the adapter from the passed ContextImpl
getAdapter()->call<UrApiKind::urKernelGetInfo>(
Expand Down Expand Up @@ -53,10 +55,14 @@ kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr ContextImpl,
MCreatedFromSource(false), MDeviceImageImpl(std::move(DeviceImageImpl)),
MKernelBundleImpl(std::move(KernelBundleImpl)),
MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex} {
// CP
CPOUT << "kernel_impl(kernel, context, deviceimage, bundle, argmask, program, mutex) constructor" << std::endl;
MIsInterop = MKernelBundleImpl->isInterop();
}

kernel_impl::~kernel_impl() {
// CP
CPOUT << "~kernel_impl() called" << std::endl;
try {
// TODO catch an exception and put it to list of asynchronous exceptions
getAdapter()->call<UrApiKind::urKernelRelease>(MKernel);
Expand Down
13 changes: 13 additions & 0 deletions sycl/source/detail/kernel_program_cache.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -112,14 +112,20 @@ class KernelProgramCache {
struct ProgramBuildResult : public BuildResult<ur_program_handle_t> {
AdapterPtr Adapter;
ProgramBuildResult(const AdapterPtr &Adapter) : Adapter(Adapter) {
// CP
CPOUT << "ProgramBuildResult(adapter)" << std::endl;
Val = nullptr;
}
ProgramBuildResult(const AdapterPtr &Adapter, BuildState InitialState)
: Adapter(Adapter) {
// CP
CPOUT << "ProgramBuildResult(adapter, state)" << std::endl;
Val = nullptr;
this->State.store(InitialState);
}
~ProgramBuildResult() {
// CP
CPOUT << "~ProgramBuildResult()" << std::endl;
try {
if (Val) {
ur_result_t Err =
Expand All @@ -131,6 +137,9 @@ class KernelProgramCache {
e);
}
}
ProgramBuildResult() = delete;
ProgramBuildResult(const ProgramBuildResult&) = delete;
ProgramBuildResult& operator=(const ProgramBuildResult&) = delete;
};
using ProgramBuildResultPtr = std::shared_ptr<ProgramBuildResult>;

Expand Down Expand Up @@ -198,9 +207,13 @@ class KernelProgramCache {
struct KernelBuildResult : public BuildResult<KernelArgMaskPairT> {
AdapterPtr Adapter;
KernelBuildResult(const AdapterPtr &Adapter) : Adapter(Adapter) {
// CP
CPOUT << "KernelBuildResult(adapter)" << std::endl;
Val.first = nullptr;
}
~KernelBuildResult() {
// CP
CPOUT << "~KernelBuildResult()" << std::endl;
try {
if (Val.first) {
ur_result_t Err =
Expand Down
6 changes: 6 additions & 0 deletions sycl/source/detail/queue_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,8 @@ class queue_impl {
MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
MQueueID{
MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} {
// CP
CPOUT << "queue_impl() constructor" << std::endl;
verifyProps(PropList);
if (has_property<property::queue::enable_profiling>()) {
if (has_property<ext::oneapi::property::queue::discard_events>())
Expand Down Expand Up @@ -232,6 +234,7 @@ class queue_impl {
MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
MQueueID{
MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} {
CPOUT << "queue_impl() interop constructor" << std::endl;
queue_impl_interop(UrQueue);
}

Expand All @@ -251,11 +254,14 @@ class queue_impl {
MIsProfilingEnabled(has_property<property::queue::enable_profiling>()),
MQueueID{
MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} {
CPOUT << "queue_impl() verify/interop constructor " << std::endl;
verifyProps(PropList);
queue_impl_interop(UrQueue);
}

~queue_impl() {
// CP
CPOUT << "~queue_impl() called" << std::endl;
try {
#if XPTI_ENABLE_INSTRUMENTATION
// The trace event created in the constructor should be active through the
Expand Down
Loading
Loading