From 167244f4760982d8c9c92015275192c17b5c3763 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Tue, 7 Jan 2025 16:25:40 -0800 Subject: [PATCH 01/11] just storing stuff off. None of this is really needed except maybe the default/copy constructor removal of ProgramBuildResults class --- sycl/include/sycl/kernel_bundle.hpp | 3 ++ sycl/source/detail/kernel_bundle_impl.hpp | 17 ++++++++++ sycl/source/detail/kernel_program_cache.hpp | 3 ++ sycl/source/kernel_bundle.cpp | 35 +++++++++++++++++++++ sycl/test-e2e/Basic/subdevice.cpp | 1 + 5 files changed, 59 insertions(+) diff --git a/sycl/include/sycl/kernel_bundle.hpp b/sycl/include/sycl/kernel_bundle.hpp index a61019efdbf5..6b9924c3aaf2 100644 --- a/sycl/include/sycl/kernel_bundle.hpp +++ b/sycl/include/sycl/kernel_bundle.hpp @@ -541,6 +541,9 @@ kernel_bundle get_kernel_bundle(const context &Ctx) { return get_kernel_bundle(Ctx, Ctx.get_devices()); } +// CP +__SYCL_EXPORT void test_release(sycl::context &Ctx, ur_native_handle_t NativeHandle); + namespace detail { // Internal non-template versions of get_kernel_bundle API which is used by diff --git a/sycl/source/detail/kernel_bundle_impl.hpp b/sycl/source/detail/kernel_bundle_impl.hpp index e538318f807b..40f4c8469440 100644 --- a/sycl/source/detail/kernel_bundle_impl.hpp +++ b/sycl/source/detail/kernel_bundle_impl.hpp @@ -533,6 +533,23 @@ class kernel_bundle_impl { ContextImpl->getHandleRef(), spirv.data(), spirv.size(), nullptr, &UrProgram); // program created by urProgramCreateWithIL is implicitly retained. + + // ------------------------------------- + // CP - adding to try an force an imbalance + Adapter->call(UrProgram); + + // rebalance: + // this works. + // Adapter->call(UrProgram); + + // this ALSO works. So much for my theory. + detail::UrFuncInfo programReleaseInfo; + auto programRelease = programReleaseInfo.getFuncPtrFromModule(detail::ur::getURLoaderLibrary()); + programRelease(UrProgram); + + // ------------------------------------- + + if (UrProgram == nullptr) throw sycl::exception( sycl::make_error_code(errc::invalid), diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index 968cb9b24b05..a41221d4d476 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -131,6 +131,9 @@ class KernelProgramCache { e); } } + ProgramBuildResult() = delete; + ProgramBuildResult(const ProgramBuildResult&) = delete; + ProgramBuildResult& operator=(const ProgramBuildResult&) = delete; }; using ProgramBuildResultPtr = std::shared_ptr; diff --git a/sycl/source/kernel_bundle.cpp b/sycl/source/kernel_bundle.cpp index e19c2b9df2a7..619e13367816 100644 --- a/sycl/source/kernel_bundle.cpp +++ b/sycl/source/kernel_bundle.cpp @@ -328,6 +328,41 @@ bool is_compatible(const std::vector &KernelIDs, const device &Dev) { return true; } +// CP +#include "detail/adapter.hpp" +#include +//using ContextImplPtr = std::shared_ptr; +void test_release(sycl::context &Context, ur_native_handle_t NativeHandle) { + //detail::ProgramManager::getInstance(); + + const detail::ContextImplPtr &ContextImpl = getSyclObjImpl(Context); + const detail::AdapterPtr &Adapter = ContextImpl->getAdapter(); + ur_program_handle_t UrProgram = nullptr; + ur_program_native_properties_t Properties{}; + Properties.stype = UR_STRUCTURE_TYPE_PROGRAM_NATIVE_PROPERTIES; + Properties.isNativeHandleOwned = true; //!KeepOwnership; + + detail::UrFuncInfo programReleaseInfo; + auto programRelease = programReleaseInfo.getFuncPtrFromModule(detail::ur::getURLoaderLibrary()); + + Adapter->call( NativeHandle, ContextImpl->getHandleRef(), &Properties, &UrProgram); + + uint32_t NumDevices = 0; + Adapter->call( UrProgram, UR_PROGRAM_INFO_NUM_DEVICES, sizeof(NumDevices), &NumDevices, nullptr); + std::cout << "NumDevice: " << NumDevices << std::endl; + + // is this necessary? SHouldn't be. + Adapter->call(UrProgram); + Adapter->call(UrProgram); + + // + //Adapter->call(UrProgram); + + // or + //programRelease(UrProgram); + +} + ///////////////////////// // * kernel_compiler extension * ///////////////////////// diff --git a/sycl/test-e2e/Basic/subdevice.cpp b/sycl/test-e2e/Basic/subdevice.cpp index 6ad21afc12ad..9aeeebb261a7 100644 --- a/sycl/test-e2e/Basic/subdevice.cpp +++ b/sycl/test-e2e/Basic/subdevice.cpp @@ -154,6 +154,7 @@ int main() { } // test exception + std::cout << "TEST EXCEPTION!! " << std::endl; try { const size_t out_of_bounds = std::numeric_limits::max(); const auto partition = From 6233533b7d7e13e61ab1a39c5d8b212d571e84a8 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Fri, 10 Jan 2025 09:12:49 -0800 Subject: [PATCH 02/11] constructor/destructor logging and commentary --- sycl/source/detail/context_impl.cpp | 8 +++++ sycl/source/detail/event_impl.cpp | 8 ++++- sycl/source/detail/event_impl.hpp | 2 ++ sycl/source/detail/kernel_impl.cpp | 6 ++++ sycl/source/detail/kernel_program_cache.hpp | 10 ++++++ sycl/source/detail/queue_impl.hpp | 6 ++++ sycl/source/detail/scheduler/commands.cpp | 34 ++++++++++++++++--- sycl/source/detail/scheduler/commands.hpp | 6 +++- sycl/source/detail/scheduler/scheduler.cpp | 36 ++++++++++++++------- 9 files changed, 98 insertions(+), 18 deletions(-) diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 0dcddfa24d8e..3c7f167cce57 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -33,6 +33,8 @@ context_impl::context_impl(const device &Device, async_handler AsyncHandler, MContext(nullptr), MPlatform(detail::getSyclObjImpl(Device.get_platform())), MPropList(PropList), MSupportBufferLocationByDevices(NotChecked) { + // CP + std::cout << "context_impl(dev, async, plist) constructor" << std::endl; verifyProps(PropList); MKernelProgramCache.setContextPtr(this); } @@ -43,6 +45,8 @@ context_impl::context_impl(const std::vector Devices, : MOwnedByRuntime(true), MAsyncHandler(AsyncHandler), MDevices(Devices), MContext(nullptr), MPlatform(), MPropList(PropList), MSupportBufferLocationByDevices(NotChecked) { + // CP + std::cout << "context_impl(devices, async, plist) constructor" << std::endl; verifyProps(PropList); MPlatform = detail::getSyclObjImpl(MDevices[0].get_platform()); std::vector DeviceIds; @@ -76,6 +80,8 @@ context_impl::context_impl(ur_context_handle_t UrContext, : MOwnedByRuntime(OwnedByRuntime), MAsyncHandler(AsyncHandler), MDevices(DeviceList), MContext(UrContext), MPlatform(), MSupportBufferLocationByDevices(NotChecked) { + // CP + std::cout << "context_impl(UrContext, async, Adapter, DeviceList, OwnedByRuntime) constructor" << std::endl; if (!MDevices.empty()) { MPlatform = detail::getSyclObjImpl(MDevices[0].get_platform()); } else { @@ -126,6 +132,8 @@ cl_context context_impl::get() const { } context_impl::~context_impl() { + // CP + std::cout << "~context_impl() called" << std::endl; try { // Free all events associated with the initialization of device globals. for (auto &DeviceGlobalInitializer : MDeviceGlobalInitializers) diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index f6e5edfc92e7..074f374927f2 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -43,6 +43,8 @@ void event_impl::initContextIfNeeded() { } event_impl::~event_impl() { + // CP + std::cout << "~event_impl() called" << std::endl; try { auto Handle = this->getHandle(); if (Handle) @@ -145,7 +147,8 @@ void event_impl::setContextImpl(const ContextImplPtr &Context) { event_impl::event_impl(ur_event_handle_t Event, const context &SyclContext) : MEvent(Event), MContext(detail::getSyclObjImpl(SyclContext)), MIsFlushed(true), MState(HES_Complete) { - + // CP + std::cout << "event_impl(ur_event_handle_t, context )" << std::endl; ur_context_handle_t TempContext; getAdapter()->call( this->getHandle(), UR_EVENT_INFO_CONTEXT, sizeof(ur_context_handle_t), @@ -163,6 +166,9 @@ event_impl::event_impl(const QueueImplPtr &Queue) : MQueue{Queue}, MIsProfilingEnabled{!Queue || Queue->MIsProfilingEnabled}, MFallbackProfiling{MIsProfilingEnabled && Queue && Queue->isProfilingFallback()} { + // CP + std::cout << "event_impl(QueueImplPtr)" << std::endl; + if (Queue) this->setContextImpl(Queue->getContextImplPtr()); else { diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 768de7082662..0fdaf6eeb4b7 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -55,6 +55,8 @@ class event_impl { // ONEAPI_DEVICE_SELECTOR. Deferring may lead to conficts with noexcept // event methods. This ::get() call uses static vars to read and parse the // ODS env var exactly once. + // CP + std::cout << "event_impl() constructor" << std::endl; SYCLConfig::get(); } diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index f89ef979f7c9..45eb08dcd111 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -24,6 +24,8 @@ kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr Context, Context)), MCreatedFromSource(true), MKernelBundleImpl(std::move(KernelBundleImpl)), MIsInterop(true), MKernelArgMaskPtr{ArgMask} { + // CP + std::cout << "kernel_impl(kernel, context, bundle, argmas) constructor" << std::endl; ur_context_handle_t UrContext = nullptr; // Using the adapter from the passed ContextImpl getAdapter()->call( @@ -53,10 +55,14 @@ kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr ContextImpl, MCreatedFromSource(false), MDeviceImageImpl(std::move(DeviceImageImpl)), MKernelBundleImpl(std::move(KernelBundleImpl)), MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex} { + // CP + std::cout << "kernel_impl(kernel, context, deviceimage, bundle, argmask, program, mutex) constructor" << std::endl; MIsInterop = MKernelBundleImpl->isInterop(); } kernel_impl::~kernel_impl() { + // CP + std::cout << "~kernel_impl() called" << std::endl; try { // TODO catch an exception and put it to list of asynchronous exceptions getAdapter()->call(MKernel); diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index a41221d4d476..938b86d86dec 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -112,14 +112,20 @@ class KernelProgramCache { struct ProgramBuildResult : public BuildResult { AdapterPtr Adapter; ProgramBuildResult(const AdapterPtr &Adapter) : Adapter(Adapter) { + // CP + std::cout << "ProgramBuildResult(adapter)" << std::endl; Val = nullptr; } ProgramBuildResult(const AdapterPtr &Adapter, BuildState InitialState) : Adapter(Adapter) { + // CP + std::cout << "ProgramBuildResult(adapter, state)" << std::endl; Val = nullptr; this->State.store(InitialState); } ~ProgramBuildResult() { + // CP + std::cout << "~ProgramBuildResult()" << std::endl; try { if (Val) { ur_result_t Err = @@ -201,9 +207,13 @@ class KernelProgramCache { struct KernelBuildResult : public BuildResult { AdapterPtr Adapter; KernelBuildResult(const AdapterPtr &Adapter) : Adapter(Adapter) { + // CP + std::cout << "KernelBuildResult(adapter)" << std::endl; Val.first = nullptr; } ~KernelBuildResult() { + // CP + std::cout << "~KernelBuildResult()" << std::endl; try { if (Val.first) { ur_result_t Err = diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 0f99f49d1257..41453da4a7da 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -121,6 +121,8 @@ class queue_impl { MIsProfilingEnabled(has_property()), MQueueID{ MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { + // CP + std::cout << "queue_impl() constructor" << std::endl; verifyProps(PropList); if (has_property()) { if (has_property()) @@ -232,6 +234,7 @@ class queue_impl { MIsProfilingEnabled(has_property()), MQueueID{ MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { + std::cout << "queue_impl() interop constructor" << std::endl; queue_impl_interop(UrQueue); } @@ -251,11 +254,14 @@ class queue_impl { MIsProfilingEnabled(has_property()), MQueueID{ MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { + std::cout << "queue_impl() verify/interop constructor " << std::endl; verifyProps(PropList); queue_impl_interop(UrQueue); } ~queue_impl() { + // CP + std::cout << "~queue_impl() called" << std::endl; try { #if XPTI_ENABLE_INSTRUMENTATION // The trace event created in the constructor should be active through the diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 31db161f8872..24c45b9827d9 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1063,6 +1063,8 @@ AllocaCommandBase::AllocaCommandBase(CommandType Type, QueueImplPtr Queue, : Command(Type, Queue), MLinkedAllocaCmd(LinkedAllocaCmd), MIsLeaderAlloca(nullptr == LinkedAllocaCmd), MIsConst(IsConst), MRequirement(std::move(Req)), MReleaseCmd(Queue, this) { + // CP + std::cout << "AllocaCommandBase constructor " << MType << std::endl; MRequirement.MAccessMode = access::mode::read_write; emitInstrumentationDataProxy(); } @@ -1265,6 +1267,8 @@ void AllocaSubBufCommand::printDot(std::ostream &Stream) const { ReleaseCommand::ReleaseCommand(QueueImplPtr Queue, AllocaCommandBase *AllocaCmd) : Command(CommandType::RELEASE, std::move(Queue)), MAllocaCmd(AllocaCmd) { + // CP + std::cout << "ReleaseCommmand(Q, Allocacmd) constructor " << MType << std::endl; emitInstrumentationDataProxy(); } @@ -1390,6 +1394,8 @@ MapMemObject::MapMemObject(AllocaCommandBase *SrcAllocaCmd, Requirement Req, : Command(CommandType::MAP_MEM_OBJ, std::move(Queue)), MSrcAllocaCmd(SrcAllocaCmd), MSrcReq(std::move(Req)), MDstPtr(DstPtr), MMapMode(MapMode) { + // CP + std::cout << "MapMemObject constructor " << MType << std::endl; emitInstrumentationDataProxy(); } @@ -1452,6 +1458,8 @@ UnMapMemObject::UnMapMemObject(AllocaCommandBase *DstAllocaCmd, Requirement Req, void **SrcPtr, QueueImplPtr Queue) : Command(CommandType::UNMAP_MEM_OBJ, std::move(Queue)), MDstAllocaCmd(DstAllocaCmd), MDstReq(std::move(Req)), MSrcPtr(SrcPtr) { + // CP + std::cout << "UnMapMemObject constructor " << MType << std::endl; emitInstrumentationDataProxy(); } @@ -1540,6 +1548,8 @@ MemCpyCommand::MemCpyCommand(Requirement SrcReq, MSrcQueue(SrcQueue), MSrcReq(std::move(SrcReq)), MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(DstReq)), MDstAllocaCmd(DstAllocaCmd) { + // CP + std::cout << "MemCpyCommand constructor " << MType << std::endl; if (MSrcQueue) { MEvent->setContextImpl(MSrcQueue->getContextImplPtr()); } @@ -1714,6 +1724,8 @@ MemCpyCommandHost::MemCpyCommandHost(Requirement SrcReq, : Command(CommandType::COPY_MEMORY, std::move(DstQueue)), MSrcQueue(SrcQueue), MSrcReq(std::move(SrcReq)), MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(DstReq)), MDstPtr(DstPtr) { + // CP + std::cout << "MemCpyCommandHost constructor " << MType << std::endl; if (MSrcQueue) { MEvent->setContextImpl(MSrcQueue->getContextImplPtr()); } @@ -1788,6 +1800,8 @@ ur_result_t MemCpyCommandHost::enqueueImp() { } EmptyCommand::EmptyCommand() : Command(CommandType::EMPTY_TASK, nullptr) { + // CP + std::cout << "EmptyCommand() " << MType << std::endl; emitInstrumentationDataProxy(); } @@ -1882,7 +1896,8 @@ UpdateHostRequirementCommand::UpdateHostRequirementCommand( void **DstPtr) : Command(CommandType::UPDATE_REQUIREMENT, std::move(Queue)), MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(Req)), MDstPtr(DstPtr) { - + // CP + std::cout << "UpdateHostRequirementCommand constructor " << MType << std::endl; emitInstrumentationDataProxy(); } @@ -1982,6 +1997,8 @@ ExecCGCommand::ExecCGCommand( : Command(CommandType::RUN_CG, std::move(Queue), CommandBuffer, Dependencies), MEventNeeded(EventNeeded), MCommandGroup(std::move(CommandGroup)) { + // CP + std::cout << "ExecCGCommand constructor " << MType << std::endl; if (MCommandGroup->getType() == detail::CGType::CodeplayHostTask) { MEvent->setSubmittedQueue( static_cast(MCommandGroup.get())->MQueue); @@ -2810,6 +2827,9 @@ void enqueueImpKernel( KernelIsCooperative, KernelUsesClusterLaunch, WorkGroupMemorySize, BinImage, KernelName); + // CP + // Error = UR_RESULT_SUCCESS; //<-- this changes the leak. + const AdapterPtr &Adapter = Queue->getAdapter(); if (!SyclKernelImpl && !MSyclKernel) { Adapter->call(Kernel); @@ -2817,11 +2837,13 @@ void enqueueImpKernel( } } if (UR_RESULT_SUCCESS != Error) { + // CP + // throwing an exception here does not help. so the problem isn't in the "handling" below + // If we have got non-success error code, let's analyze it to emit nice // exception explaining what was wrong const device_impl &DeviceImpl = *(Queue->getDeviceImplPtr()); - detail::enqueue_kernel_launch::handleErrorOrWarning(Error, DeviceImpl, - Kernel, NDRDesc); + detail::enqueue_kernel_launch::handleErrorOrWarning(Error, DeviceImpl, Kernel, NDRDesc); } } @@ -3677,7 +3699,11 @@ UpdateCommandBufferCommand::UpdateCommandBufferCommand( std::vector> Nodes) : Command(CommandType::UPDATE_CMD_BUFFER, Queue), MGraph(Graph), - MNodes(Nodes) {} + MNodes(Nodes) { + // CP + std::cout << "Create UpdateCommandBufferCommand " << MType << std::endl; + + } ur_result_t UpdateCommandBufferCommand::enqueueImp() { waitForPreparedHostEvents(); diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 239cebf52176..0432b2251674 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -219,7 +219,11 @@ class Command { return nullptr; } - virtual ~Command() { MEvent->cleanDepEventsThroughOneLevel(); } + virtual ~Command() { + // CP + std::cout << "~Command() type: " << MType << std::endl; + MEvent->cleanDepEventsThroughOneLevel(); + } const char *getBlockReason() const; diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index efbbb52acab7..f69f58b35528 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -128,7 +128,7 @@ EventImplPtr Scheduler::addCG( NewEvent->setSubmissionTime(); } - enqueueCommandForCG(NewEvent, AuxiliaryCmds); + enqueueCommandForCG(NewEvent, AuxiliaryCmds); // may throw if (!AuxiliaryResources.empty()) registerAuxiliaryResources(NewEvent, std::move(AuxiliaryResources)); @@ -149,19 +149,32 @@ void Scheduler::enqueueCommandForCG(EventImplPtr NewEvent, EnqueueResultT Res; bool Enqueued; + // CP + // I'm not sure the logic here is correct. This Cleanup is only used in the case of an error. + // It seems like NewEvent should have its command + // cleared no matter what. Also, shouldn't the cleanup used when staging auxiliary commands + // be cleaning up auxillary commands, instead o the main command, which was never enqueued? + // Or should we be cleaning up everything? I think enqueueCommand has to be careful about + // it's error semantics. Did it enqueue or not? + // In my case, we successfully enqueue one dependency, but the GC command itself fails (does it, it throws certainly, but maybe afterwards?) auto CleanUp = [&]() { - if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) { - if (NewEvent) { - NewEvent->setCommand(nullptr); - } - delete NewCmd; - } + // this will clear up the CG command, but not the others, and also results in crash during shutdown + NewEvent->setComplete(); + NewEvent->setCommand(nullptr); + delete NewCmd; + + // if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) { + // if (NewEvent) { + // NewEvent->setCommand(nullptr); + // } + // delete NewCmd; + // } }; for (Command *Cmd : AuxiliaryCmds) { Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd, Blocking); - try { + try { // CP <== this is wrong. Should encompass enqueeuCommand if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw exception(make_error_code(errc::runtime), "Auxiliary enqueue process failed."); @@ -177,11 +190,9 @@ void Scheduler::enqueueCommandForCG(EventImplPtr NewEvent, // TODO: Check if lazy mode. EnqueueResultT Res; try { - bool Enqueued = GraphProcessor::enqueueCommand( - NewCmd, Lock, Res, ToCleanUp, NewCmd, Blocking); + bool Enqueued = GraphProcessor::enqueueCommand(NewCmd, Lock, Res, ToCleanUp, NewCmd, Blocking); if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) - throw exception(make_error_code(errc::runtime), - "Enqueue process failed."); + throw exception(make_error_code(errc::runtime), "Enqueue process failed."); } catch (...) { // enqueueCommand() func and if statement above may throw an exception, // so destroy required resources to avoid memory leak @@ -190,6 +201,7 @@ void Scheduler::enqueueCommandForCG(EventImplPtr NewEvent, } } } + // THIS cleanup op has no bearing on the outcome. Wihtout it the good app still has no leaks. cleanupCommands(ToCleanUp); } From 3b32f644805a322e0abbf491e07f4b94244fd1a0 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Mon, 13 Jan 2025 16:14:59 -0800 Subject: [PATCH 03/11] the fix works. awesome --- sycl/include/sycl/buffer.hpp | 1 + sycl/source/detail/buffer_impl.hpp | 2 ++ sycl/source/detail/scheduler/scheduler.cpp | 27 ++++++++++++++-------- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/sycl/include/sycl/buffer.hpp b/sycl/include/sycl/buffer.hpp index 8b3a14af607f..af48a2d00840 100644 --- a/sycl/include/sycl/buffer.hpp +++ b/sycl/include/sycl/buffer.hpp @@ -477,6 +477,7 @@ class buffer : public detail::buffer_plain, buffer &operator=(buffer &&rhs) = default; ~buffer() { + std::cout << "~buffer()" << std::endl; try { buffer_plain::handleRelease(); } catch (std::exception &e) { diff --git a/sycl/source/detail/buffer_impl.hpp b/sycl/source/detail/buffer_impl.hpp index be3a529f1771..ea2be1d7fe21 100644 --- a/sycl/source/detail/buffer_impl.hpp +++ b/sycl/source/detail/buffer_impl.hpp @@ -140,9 +140,11 @@ class buffer_impl final : public SYCLMemObjT { MemObjType getType() const override { return MemObjType::Buffer; } ~buffer_impl() { + std::cout << "~buffer_impl" << std::endl; try { BaseT::updateHostMemory(); } catch (...) { + std::cout << "exception during updateHostMemory() called from ~buffer_impl" << std::endl; } destructorNotification(this); } diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index f69f58b35528..cccd8d3a3d40 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -52,6 +52,10 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record, #endif std::vector ToCleanUp; for (Command *Cmd : Record->MReadLeaves) { + // CP -- possible fix + if(Cmd->MEnqueueStatus == EnqueueResultT::SyclEnqueueFailed) + continue; // nothing to do + EnqueueResultT Res; bool Enqueued = GraphProcessor::enqueueCommand(Cmd, GraphReadLock, Res, ToCleanUp, Cmd); @@ -65,6 +69,10 @@ void Scheduler::waitForRecordToFinish(MemObjRecord *Record, GraphProcessor::waitForEvent(Cmd->getEvent(), GraphReadLock, ToCleanUp); } for (Command *Cmd : Record->MWriteLeaves) { + // CP -- possible fix + if(Cmd->MEnqueueStatus == EnqueueResultT::SyclEnqueueFailed) + continue; // nothing to do + EnqueueResultT Res; bool Enqueued = GraphProcessor::enqueueCommand(Cmd, GraphReadLock, Res, ToCleanUp, Cmd); @@ -159,16 +167,17 @@ void Scheduler::enqueueCommandForCG(EventImplPtr NewEvent, // In my case, we successfully enqueue one dependency, but the GC command itself fails (does it, it throws certainly, but maybe afterwards?) auto CleanUp = [&]() { // this will clear up the CG command, but not the others, and also results in crash during shutdown - NewEvent->setComplete(); - NewEvent->setCommand(nullptr); - delete NewCmd; + // NewEvent->setComplete(); + // NewEvent->setCommand(nullptr); + // delete NewCmd; - // if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) { - // if (NewEvent) { - // NewEvent->setCommand(nullptr); - // } - // delete NewCmd; - // } + // original logic. doesn't do anything b.c. MDeps or MUsers rarely both empty + if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) { + if (NewEvent) { + NewEvent->setCommand(nullptr); + } + delete NewCmd; + } }; for (Command *Cmd : AuxiliaryCmds) { From fe874e6a7d6b9f739650c8760f8a1b54776d5632 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Thu, 16 Jan 2025 10:20:52 -0800 Subject: [PATCH 04/11] more logging, one more fix, and one probably not needed fix and not done --- .../detail/error_handling/error_handling.cpp | 1 + sycl/source/detail/global_handler.cpp | 2 ++ sycl/source/detail/scheduler/commands.cpp | 6 +++-- sycl/source/detail/scheduler/commands.hpp | 23 +++++++++++++++++-- .../source/detail/scheduler/graph_builder.cpp | 21 +++++++++++++---- sycl/source/detail/scheduler/scheduler.cpp | 4 +++- sycl/source/detail/scheduler/scheduler.hpp | 9 +++++++- 7 files changed, 56 insertions(+), 10 deletions(-) diff --git a/sycl/source/detail/error_handling/error_handling.cpp b/sycl/source/detail/error_handling/error_handling.cpp index daae5563776f..e9e2eca874e4 100644 --- a/sycl/source/detail/error_handling/error_handling.cpp +++ b/sycl/source/detail/error_handling/error_handling.cpp @@ -192,6 +192,7 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, for (size_t I = 0; I < 3; ++I) { if (MaxThreadsPerBlock[I] < NDRDesc.LocalSize[I]) { + std::cout << "---- THROWING ---- " << std::endl; throw sycl::exception(make_error_code(errc::nd_range), "The number of work-items in each dimension of a " "work-group cannot exceed {" + diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 5669fbdaacc5..6ba0fdc9cffe 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -234,6 +234,8 @@ void GlobalHandler::releaseDefaultContexts() { // Note that on Windows the destruction of the default context // races with the detaching of the DLL object that calls urLoaderTearDown. + std::cout << "releaseDefaultContext()" << std::endl; + MPlatformToDefaultContextCache.Inst.reset(nullptr); } diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index 24c45b9827d9..a7c14e471d2f 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -828,9 +828,11 @@ bool Command::producesPiEvent() const { return true; } bool Command::supportsPostEnqueueCleanup() const { return true; } +// CP - moar fix ( this fix and the change to the assert in graph_builder.cpp are not likely really needed. ) bool Command::readyForCleanup() const { return MLeafCounter == 0 && - MEnqueueStatus == EnqueueResultT::SyclEnqueueSuccess; + (MEnqueueStatus == EnqueueResultT::SyclEnqueueSuccess || + MEnqueueStatus == EnqueueResultT::SyclEnqueueFailed); } Command *Command::addDep(DepDesc NewDep, std::vector &ToCleanUp) { @@ -1549,7 +1551,7 @@ MemCpyCommand::MemCpyCommand(Requirement SrcReq, MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(DstReq)), MDstAllocaCmd(DstAllocaCmd) { // CP - std::cout << "MemCpyCommand constructor " << MType << std::endl; + std::cout << "MemCpyCommand constructor " << MType << " " << this << std::endl; if (MSrcQueue) { MEvent->setContextImpl(MSrcQueue->getContextImplPtr()); } diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 0432b2251674..12a11500f575 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -83,7 +83,26 @@ struct EnqueueResultT { struct DepDesc { DepDesc(Command *DepCommand, const Requirement *Req, AllocaCommandBase *AllocaCmd) - : MDepCommand(DepCommand), MDepRequirement(Req), MAllocaCmd(AllocaCmd) {} + : MDepCommand(DepCommand), MDepRequirement(Req), MAllocaCmd(AllocaCmd) { + std::cout << "DepDesc() constructor(" << this << "). MDepCommand: " << MDepCommand << std::endl; + } + + ~DepDesc() { + std::cout << "~DepDesc() destructor(" << this << "). MDepCommand: " << MDepCommand << std::endl; + } + + DepDesc() = delete; // CP + + // CP - not sure if removing the copy constructor will help identify the problem. + // will take this up if needed. + //DepDesc(const DepDesc &Other) = delete; // CP + + //copy constructor. + DepDesc(const DepDesc &Other) + : MDepCommand(Other.MDepCommand), MDepRequirement(Other.MDepRequirement), + MAllocaCmd(Other.MAllocaCmd) { + std::cout << "DepDesc() copy constructor(" << this << "). MDepCommand: " << MDepCommand << std::endl; + } friend bool operator<(const DepDesc &Lhs, const DepDesc &Rhs) { return std::tie(Lhs.MDepRequirement, Lhs.MDepCommand) < @@ -221,7 +240,7 @@ class Command { virtual ~Command() { // CP - std::cout << "~Command() type: " << MType << std::endl; + std::cout << "~Command() type: " << MType << " " << this << std::endl; MEvent->cleanDepEventsThroughOneLevel(); } diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index 5636309cdccc..d2560706a93b 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -193,7 +193,9 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue, LeavesCollection::EnqueueListT &ToEnqueue) { // Add the old leaf as a dependency for the new one by duplicating one // of the requirements for the current record + // CP - if we drop copy constructor, this will have to change DepDesc Dep = findDepForRecord(Dependant, Record); + std::cout << "DepDesc change dependency. Before MDepCommand: " << Dep.MDepCommand << " After: " << Dependency << std::endl; Dep.MDepCommand = Dependency; std::vector ToCleanUp; Command *ConnectionCmd = Dependant->addDep(Dep, ToCleanUp); @@ -201,6 +203,7 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue, ToEnqueue.push_back(ConnectionCmd); --(Dependency->MLeafCounter); + std::cout << "reduced Dependency->MLeafCounter: " << Dependency->MLeafCounter << " cleanup? " << Dependency->readyForCleanup() << std::endl; if (Dependency->readyForCleanup()) ToCleanUp.push_back(Dependency); for (Command *Cmd : ToCleanUp) @@ -486,8 +489,11 @@ Scheduler::GraphBuilder::addCopyBack(Requirement *Req, std::vector ToCleanUp; for (Command *Dep : Deps) { - Command *ConnCmd = MemCpyCmd->addDep( - DepDesc{Dep, MemCpyCmd->getRequirement(), SrcAllocaCmd}, ToCleanUp); + // CP -- moar fix + if (Dep->MEnqueueStatus == EnqueueResultT::SyclEnqueueFailed) + continue; // nothing to do + + Command *ConnCmd = MemCpyCmd->addDep(DepDesc{Dep, MemCpyCmd->getRequirement(), SrcAllocaCmd}, ToCleanUp); if (ConnCmd) ToEnqueue.push_back(ConnCmd); } @@ -626,6 +632,7 @@ Scheduler::GraphBuilder::findDepsForReq(MemObjRecord *Record, // A helper function for finding a command dependency on a specific memory // object +// CP - will have to change if we move from copy constructor DepDesc Scheduler::GraphBuilder::findDepForRecord(Command *Cmd, MemObjRecord *Record) { for (const DepDesc &DD : Cmd->MDeps) { @@ -634,7 +641,8 @@ DepDesc Scheduler::GraphBuilder::findDepForRecord(Command *Cmd, } } assert(false && "No dependency found for a leaf of the record"); - return {nullptr, nullptr, nullptr}; + static DespDesc nullDep{nullptr, nullptr, nullptr}; + return nullDep; } // The function searches for the alloca command matching context and @@ -1157,8 +1165,10 @@ void Scheduler::GraphBuilder::cleanupCommand( if (SYCLConfig::get()) return; + // CP -- change to include failed probably not needed. assert(Cmd->MLeafCounter == 0 && - (Cmd->isSuccessfullyEnqueued() || AllowUnsubmitted)); + (Cmd->isSuccessfullyEnqueued() || AllowUnsubmitted || + (Cmd->MEnqueueStatus == EnqueueResultT::SyclEnqueueFailed))); Command::CommandType CmdT = Cmd->getType(); assert(CmdT != Command::ALLOCA && CmdT != Command::ALLOCA_SUB_BUF); @@ -1169,6 +1179,7 @@ void Scheduler::GraphBuilder::cleanupCommand( for (DepDesc &Dep : UserCmd->MDeps) { // Link the users of the command to the alloca command(s) instead if (Dep.MDepCommand == Cmd) { + std::cout << "DepDesc changing MDepCommand. Before: " << Dep.MDepCommand; // ... unless the user is the alloca itself. if (Dep.MAllocaCmd == UserCmd) { Dep.MDepCommand = nullptr; @@ -1176,6 +1187,7 @@ void Scheduler::GraphBuilder::cleanupCommand( Dep.MDepCommand = Dep.MAllocaCmd; Dep.MDepCommand->MUsers.insert(UserCmd); } + std::cout << " After: " << Dep.MDepCommand << std::endl; } } } @@ -1247,6 +1259,7 @@ Command *Scheduler::GraphBuilder::connectDepEvent( // add user to Dep.MDepCommand is already performed beyond this if branch { DepDesc DepOnConnect = Dep; + std::cout << "connect DepDesc changing MDepCommand. Before: " << DepOnConnect.MDepCommand << " After: " << ConnectCmd << std::endl; DepOnConnect.MDepCommand = ConnectCmd; // Dismiss the result here as it's not a connection now, diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index cccd8d3a3d40..86f5499e675b 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -474,7 +474,9 @@ void Scheduler::NotifyHostTaskCompletion(Command *Cmd) { { ReadLockT Lock = acquireReadLock(); - std::vector Deps = Cmd->MDeps; + // CP -- not needed + //std::vector Deps = Cmd->MDeps; + // Host tasks are cleaned up upon completion rather than enqueuing. if (Cmd->MLeafCounter == 0) { ToCleanUp.push_back(Cmd); diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index c6d2d07600d1..fcd2ed13005e 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -201,7 +201,13 @@ struct MemObjRecord { MemObjRecord(ContextImplPtr Ctx, std::size_t LeafLimit, LeavesCollection::AllocateDependencyF AllocateDependency) : MReadLeaves{this, LeafLimit, AllocateDependency}, - MWriteLeaves{this, LeafLimit, AllocateDependency}, MCurContext{Ctx} {} + MWriteLeaves{this, LeafLimit, AllocateDependency}, MCurContext{Ctx} { + std::cout << "MemObjRecord() constructor" << std::endl; + } + + ~MemObjRecord() { + std::cout << "~MemObjRecord destructor" << std::endl; + } // Contains all allocation commands for the memory object. std::vector MAllocaCommands; @@ -693,6 +699,7 @@ class Scheduler { protected: /// Finds a command dependency corresponding to the record. + // CP - switch if we drop copy constructor DepDesc findDepForRecord(Command *Cmd, MemObjRecord *Record); /// Searches for suitable alloca in memory record. From af088ed3ce27b956843a1f840a4f5a687a37e7e2 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Thu, 16 Jan 2025 10:22:46 -0800 Subject: [PATCH 05/11] mispelling and coment --- sycl/source/detail/scheduler/graph_builder.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index d2560706a93b..f9f396e7d8cd 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -641,7 +641,8 @@ DepDesc Scheduler::GraphBuilder::findDepForRecord(Command *Cmd, } } assert(false && "No dependency found for a leaf of the record"); - static DespDesc nullDep{nullptr, nullptr, nullptr}; + // CP -- if we drop copy constructor, this will have to change + static DepDesc nullDep{nullptr, nullptr, nullptr}; return nullDep; } From 2bf4eb7cd749b9e39264dc037514c0c2ddeb2b3c Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Thu, 16 Jan 2025 16:31:11 -0800 Subject: [PATCH 06/11] useless attempt --- sycl/source/detail/scheduler/scheduler.cpp | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 86f5499e675b..fe402773961d 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -165,11 +165,18 @@ void Scheduler::enqueueCommandForCG(EventImplPtr NewEvent, // Or should we be cleaning up everything? I think enqueueCommand has to be careful about // it's error semantics. Did it enqueue or not? // In my case, we successfully enqueue one dependency, but the GC command itself fails (does it, it throws certainly, but maybe afterwards?) - auto CleanUp = [&]() { + auto CleanUp = [&](Command* SomeCmd) { // this will clear up the CG command, but not the others, and also results in crash during shutdown // NewEvent->setComplete(); // NewEvent->setCommand(nullptr); // delete NewCmd; + + // doesn't do anything. + for(auto Desc : SomeCmd->MDeps) { + if (auto DepCmd = Desc.MDepCommand) { + DepCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueFailed; + } + } // original logic. doesn't do anything b.c. MDeps or MUsers rarely both empty if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) { @@ -190,7 +197,7 @@ void Scheduler::enqueueCommandForCG(EventImplPtr NewEvent, } catch (...) { // enqueueCommand() func and if statement above may throw an exception, // so destroy required resources to avoid memory leak - CleanUp(); + CleanUp(Cmd); std::rethrow_exception(std::current_exception()); } } @@ -205,7 +212,7 @@ void Scheduler::enqueueCommandForCG(EventImplPtr NewEvent, } catch (...) { // enqueueCommand() func and if statement above may throw an exception, // so destroy required resources to avoid memory leak - CleanUp(); + CleanUp(NewCmd); std::rethrow_exception(std::current_exception()); } } From 896ceec9c29d51732a73b5be46f6117158b83ae2 Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Fri, 17 Jan 2025 09:04:34 -0800 Subject: [PATCH 07/11] more things that don't work --- sycl/source/detail/scheduler/scheduler.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index fe402773961d..0453a48caf17 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -175,9 +175,14 @@ void Scheduler::enqueueCommandForCG(EventImplPtr NewEvent, for(auto Desc : SomeCmd->MDeps) { if (auto DepCmd = Desc.MDepCommand) { DepCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueFailed; + //DepCmd->MMarks.MToBeDeleted = true; + DepCmd->MMarkedForCleanup = true; } } + //auto someRecord = this.getMemObjRecord( req ); + //MGraphBuilder.cleanupCommandsForRecord( someRecord ); //MemObjRecord *Record) + // original logic. doesn't do anything b.c. MDeps or MUsers rarely both empty if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) { if (NewEvent) { From a6521ae3438266740cc4189ab5ede851c9df3e6a Mon Sep 17 00:00:00 2001 From: Chris Perkins Date: Fri, 17 Jan 2025 16:33:28 -0800 Subject: [PATCH 08/11] zOMG --- sycl/source/detail/scheduler/scheduler.cpp | 31 +++++++++++++--------- 1 file changed, 18 insertions(+), 13 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 0453a48caf17..60fa238ba01f 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -172,24 +172,29 @@ void Scheduler::enqueueCommandForCG(EventImplPtr NewEvent, // delete NewCmd; // doesn't do anything. - for(auto Desc : SomeCmd->MDeps) { - if (auto DepCmd = Desc.MDepCommand) { - DepCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueFailed; - //DepCmd->MMarks.MToBeDeleted = true; - DepCmd->MMarkedForCleanup = true; - } - } + // for(auto Desc : SomeCmd->MDeps) { + // if (auto DepCmd = Desc.MDepCommand) { + // DepCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueFailed; + // //DepCmd->MMarks.MToBeDeleted = true; + // DepCmd->MMarkedForCleanup = true; + // } + // } //auto someRecord = this.getMemObjRecord( req ); //MGraphBuilder.cleanupCommandsForRecord( someRecord ); //MemObjRecord *Record) + + // CP -- latest and last fix!! + cleanupCommands(ToCleanUp); // original logic. doesn't do anything b.c. MDeps or MUsers rarely both empty - if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) { - if (NewEvent) { - NewEvent->setCommand(nullptr); - } - delete NewCmd; - } + // if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) { + // if (NewEvent) { + // NewEvent->setCommand(nullptr); + // } + // delete NewCmd; + // } + + }; for (Command *Cmd : AuxiliaryCmds) { From 35c697e075d1306eb041e43645253139042026db Mon Sep 17 00:00:00 2001 From: "Perkins, Chris" Date: Thu, 23 Jan 2025 14:32:41 -0800 Subject: [PATCH 09/11] latest --- sycl/source/detail/scheduler/scheduler.cpp | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 60fa238ba01f..bd9dd94eba57 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -183,16 +183,18 @@ void Scheduler::enqueueCommandForCG(EventImplPtr NewEvent, //auto someRecord = this.getMemObjRecord( req ); //MGraphBuilder.cleanupCommandsForRecord( someRecord ); //MemObjRecord *Record) - // CP -- latest and last fix!! - cleanupCommands(ToCleanUp); + // original logic. doesn't do anything b.c. MDeps or MUsers rarely both empty - // if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) { - // if (NewEvent) { - // NewEvent->setCommand(nullptr); - // } - // delete NewCmd; - // } + if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) { + if (NewEvent) { + NewEvent->setCommand(nullptr); + } + delete NewCmd; + } + + // CP -- latest and last fix!! + cleanupCommands(ToCleanUp); }; From a6df298ea991f1ccdccb12fbe9ae7d7750000a2c Mon Sep 17 00:00:00 2001 From: "Perkins, Chris" Date: Mon, 27 Jan 2025 11:00:14 -0800 Subject: [PATCH 10/11] seems to be the best fix --- sycl/source/detail/global_handler.cpp | 3 ++- sycl/source/detail/scheduler/scheduler.cpp | 21 ++++++++++++++++++--- sycl/source/detail/sycl_mem_obj_t.cpp | 1 + 3 files changed, 21 insertions(+), 4 deletions(-) diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index 6ba0fdc9cffe..bb8a92e40729 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -286,13 +286,14 @@ void GlobalHandler::unloadAdapters() { } void GlobalHandler::prepareSchedulerToRelease(bool Blocking) { + // CP - fix part 1 #ifndef _WIN32 if (Blocking) drainThreadPool(); +#endif if (MScheduler.Inst) MScheduler.Inst->releaseResources(Blocking ? BlockingT::BLOCKING : BlockingT::NON_BLOCKING); -#endif } void GlobalHandler::drainThreadPool() { diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index bd9dd94eba57..28f63fa63975 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -306,11 +306,22 @@ void Scheduler::waitForEvent(const EventImplPtr &Event, bool *Success) { bool Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj, bool StrictLock) { + std::cout << "Scheduler::removeMemoryObject() " << StrictLock << std::endl; MemObjRecord *Record = MGraphBuilder.getMemObjRecord(MemObj); + std::cout << "Got a Record: " << Record << std::endl; if (!Record) // No operations were performed on the mem object return true; + //CP - fix part 2. Should this be the same for linux? +#ifdef _WIN32 + bool allowWait = MemObj->hasUserDataPtr(); +#else + bool allowWait = true; +#endif + std::cout << "allowWait: " << allowWait << std::endl; + + if(allowWait) { // This only needs a shared mutex as it only involves enqueueing and // awaiting for events @@ -429,6 +440,11 @@ void Scheduler::releaseResources(BlockingT Blocking) { cleanupCommands({}); cleanupAuxiliaryResources(Blocking); + + // CP - fix part 3 +#ifdef _WIN32 + cleanupDeferredMemObjects(Blocking); //<-- if non-blocking DeleteCmdExpception fails, otherwise host-task-failure freezes +#else // We need loop since sometimes we may need new objects to be added to // deferred mem objects storage during cleanup. Known example is: we cleanup // existing deferred mem objects under write lock, during this process we @@ -439,6 +455,7 @@ void Scheduler::releaseResources(BlockingT Blocking) { do { cleanupDeferredMemObjects(Blocking); } while (Blocking == BlockingT::BLOCKING && !isDeferredMemObjectsEmpty()); +#endif } MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) { @@ -533,10 +550,8 @@ void Scheduler::cleanupDeferredMemObjects(BlockingT Blocking) { std::vector> TempStorage; { std::lock_guard LockDef{MDeferredMemReleaseMutex}; - MDeferredMemObjRelease.swap(TempStorage); + MDeferredMemObjRelease.swap(TempStorage); // it is here that host-task-failure freezes. destructors, presumably? } - // if any objects in TempStorage exist - it is leaving scope and being - // deleted } std::vector> ObjsReadyToRelease; diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 25e092232ae7..5b34c38f4f6e 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -140,6 +140,7 @@ void SYCLMemObjT::updateHostMemory(void *const Ptr) { } void SYCLMemObjT::updateHostMemory() { + std::cout << "updateHostMemory() entered. Have Upload functor: " << (MUploadDataFunctor != nullptr) << " NeedWriteBack: " << MNeedWriteBack << std::endl; if ((MUploadDataFunctor != nullptr) && MNeedWriteBack) MUploadDataFunctor(); From 6516070ac9f20b5fc49151c4c00e45e201f20c9a Mon Sep 17 00:00:00 2001 From: "Perkins, Chris" Date: Wed, 29 Jan 2025 16:20:45 -0800 Subject: [PATCH 11/11] both shutdown_early()/late() and use CPOUT --- sycl/include/sycl/buffer.hpp | 2 +- sycl/include/sycl/detail/common.hpp | 4 ++ sycl/include/sycl/property_list.hpp | 2 + sycl/source/detail/buffer_impl.hpp | 2 +- sycl/source/detail/context_impl.cpp | 8 ++-- sycl/source/detail/context_impl.hpp | 2 + .../detail/error_handling/error_handling.cpp | 2 +- sycl/source/detail/event_impl.cpp | 6 +-- sycl/source/detail/event_impl.hpp | 2 +- sycl/source/detail/global_handler.cpp | 43 +++++++++-------- sycl/source/detail/kernel_impl.cpp | 6 +-- sycl/source/detail/kernel_program_cache.hpp | 10 ++-- sycl/source/detail/queue_impl.hpp | 8 ++-- sycl/source/detail/scheduler/commands.cpp | 20 ++++---- sycl/source/detail/scheduler/commands.hpp | 8 ++-- .../source/detail/scheduler/graph_builder.cpp | 10 ++-- sycl/source/detail/scheduler/scheduler.cpp | 46 +++---------------- sycl/source/detail/scheduler/scheduler.hpp | 4 +- sycl/source/detail/sycl_mem_obj_t.cpp | 2 +- sycl/test-e2e/Basic/backend_info.cpp | 2 +- sycl/test-e2e/Basic/buffer/buffer_release.cpp | 2 +- 21 files changed, 83 insertions(+), 108 deletions(-) diff --git a/sycl/include/sycl/buffer.hpp b/sycl/include/sycl/buffer.hpp index af48a2d00840..49fc26260a8f 100644 --- a/sycl/include/sycl/buffer.hpp +++ b/sycl/include/sycl/buffer.hpp @@ -477,7 +477,7 @@ class buffer : public detail::buffer_plain, buffer &operator=(buffer &&rhs) = default; ~buffer() { - std::cout << "~buffer()" << std::endl; + CPOUT << "~buffer()" << std::endl; try { buffer_plain::handleRelease(); } catch (std::exception &e) { diff --git a/sycl/include/sycl/detail/common.hpp b/sycl/include/sycl/detail/common.hpp index 19f4d85ab30d..718d6aa5c0d0 100644 --- a/sycl/include/sycl/detail/common.hpp +++ b/sycl/include/sycl/detail/common.hpp @@ -18,6 +18,10 @@ #include // for enable_if_t #include // for index_sequence, make_i... +// CP +//#define CPOUT std::clog +#define CPOUT std::clog.rdbuf(NULL); std::clog + // Default signature enables the passing of user code location information to // public methods as a default argument. namespace sycl { diff --git a/sycl/include/sycl/property_list.hpp b/sycl/include/sycl/property_list.hpp index 8f019eb7cf22..94aaf8011c84 100644 --- a/sycl/include/sycl/property_list.hpp +++ b/sycl/include/sycl/property_list.hpp @@ -18,6 +18,8 @@ #include // for conditional_t, enable... #include // for vector + + namespace sycl { inline namespace _V1 { namespace ext::oneapi { diff --git a/sycl/source/detail/buffer_impl.hpp b/sycl/source/detail/buffer_impl.hpp index ea2be1d7fe21..a6ae00991f2f 100644 --- a/sycl/source/detail/buffer_impl.hpp +++ b/sycl/source/detail/buffer_impl.hpp @@ -140,7 +140,7 @@ class buffer_impl final : public SYCLMemObjT { MemObjType getType() const override { return MemObjType::Buffer; } ~buffer_impl() { - std::cout << "~buffer_impl" << std::endl; + CPOUT << "~buffer_impl" << std::endl; try { BaseT::updateHostMemory(); } catch (...) { diff --git a/sycl/source/detail/context_impl.cpp b/sycl/source/detail/context_impl.cpp index 3c7f167cce57..4736d3b45173 100644 --- a/sycl/source/detail/context_impl.cpp +++ b/sycl/source/detail/context_impl.cpp @@ -34,7 +34,7 @@ context_impl::context_impl(const device &Device, async_handler AsyncHandler, MPlatform(detail::getSyclObjImpl(Device.get_platform())), MPropList(PropList), MSupportBufferLocationByDevices(NotChecked) { // CP - std::cout << "context_impl(dev, async, plist) constructor" << std::endl; + CPOUT << "context_impl(dev, async, plist) constructor" << std::endl; verifyProps(PropList); MKernelProgramCache.setContextPtr(this); } @@ -46,7 +46,7 @@ context_impl::context_impl(const std::vector Devices, MContext(nullptr), MPlatform(), MPropList(PropList), MSupportBufferLocationByDevices(NotChecked) { // CP - std::cout << "context_impl(devices, async, plist) constructor" << std::endl; + CPOUT << "context_impl(devices, async, plist) constructor" << std::endl; verifyProps(PropList); MPlatform = detail::getSyclObjImpl(MDevices[0].get_platform()); std::vector DeviceIds; @@ -81,7 +81,7 @@ context_impl::context_impl(ur_context_handle_t UrContext, MDevices(DeviceList), MContext(UrContext), MPlatform(), MSupportBufferLocationByDevices(NotChecked) { // CP - std::cout << "context_impl(UrContext, async, Adapter, DeviceList, OwnedByRuntime) constructor" << std::endl; + CPOUT << "context_impl(UrContext, async, Adapter, DeviceList, OwnedByRuntime) constructor" << std::endl; if (!MDevices.empty()) { MPlatform = detail::getSyclObjImpl(MDevices[0].get_platform()); } else { @@ -133,7 +133,7 @@ cl_context context_impl::get() const { context_impl::~context_impl() { // CP - std::cout << "~context_impl() called" << std::endl; + CPOUT << "~context_impl() called" << std::endl; try { // Free all events associated with the initialization of device globals. for (auto &DeviceGlobalInitializer : MDeviceGlobalInitializers) diff --git a/sycl/source/detail/context_impl.hpp b/sycl/source/detail/context_impl.hpp index 65e1fda0a5a7..7baf542ed1db 100644 --- a/sycl/source/detail/context_impl.hpp +++ b/sycl/source/detail/context_impl.hpp @@ -23,6 +23,8 @@ #include #include + + namespace sycl { inline namespace _V1 { // Forward declaration diff --git a/sycl/source/detail/error_handling/error_handling.cpp b/sycl/source/detail/error_handling/error_handling.cpp index e9e2eca874e4..78697a258e0a 100644 --- a/sycl/source/detail/error_handling/error_handling.cpp +++ b/sycl/source/detail/error_handling/error_handling.cpp @@ -192,7 +192,7 @@ void handleInvalidWorkGroupSize(const device_impl &DeviceImpl, for (size_t I = 0; I < 3; ++I) { if (MaxThreadsPerBlock[I] < NDRDesc.LocalSize[I]) { - std::cout << "---- THROWING ---- " << std::endl; + CPOUT << "---- THROWING ---- " << std::endl; throw sycl::exception(make_error_code(errc::nd_range), "The number of work-items in each dimension of a " "work-group cannot exceed {" + diff --git a/sycl/source/detail/event_impl.cpp b/sycl/source/detail/event_impl.cpp index 074f374927f2..e2b972f65cec 100644 --- a/sycl/source/detail/event_impl.cpp +++ b/sycl/source/detail/event_impl.cpp @@ -44,7 +44,7 @@ void event_impl::initContextIfNeeded() { event_impl::~event_impl() { // CP - std::cout << "~event_impl() called" << std::endl; + CPOUT << "~event_impl() called" << std::endl; try { auto Handle = this->getHandle(); if (Handle) @@ -148,7 +148,7 @@ event_impl::event_impl(ur_event_handle_t Event, const context &SyclContext) : MEvent(Event), MContext(detail::getSyclObjImpl(SyclContext)), MIsFlushed(true), MState(HES_Complete) { // CP - std::cout << "event_impl(ur_event_handle_t, context )" << std::endl; + CPOUT << "event_impl(ur_event_handle_t, context )" << std::endl; ur_context_handle_t TempContext; getAdapter()->call( this->getHandle(), UR_EVENT_INFO_CONTEXT, sizeof(ur_context_handle_t), @@ -167,7 +167,7 @@ event_impl::event_impl(const QueueImplPtr &Queue) MFallbackProfiling{MIsProfilingEnabled && Queue && Queue->isProfilingFallback()} { // CP - std::cout << "event_impl(QueueImplPtr)" << std::endl; + CPOUT << "event_impl(QueueImplPtr)" << std::endl; if (Queue) this->setContextImpl(Queue->getContextImplPtr()); diff --git a/sycl/source/detail/event_impl.hpp b/sycl/source/detail/event_impl.hpp index 0fdaf6eeb4b7..223e5fd0e1dc 100644 --- a/sycl/source/detail/event_impl.hpp +++ b/sycl/source/detail/event_impl.hpp @@ -56,7 +56,7 @@ class event_impl { // event methods. This ::get() call uses static vars to read and parse the // ODS env var exactly once. // CP - std::cout << "event_impl() constructor" << std::endl; + CPOUT << "event_impl() constructor" << std::endl; SYCLConfig::get(); } diff --git a/sycl/source/detail/global_handler.cpp b/sycl/source/detail/global_handler.cpp index bb8a92e40729..3b76b9aaa4d7 100644 --- a/sycl/source/detail/global_handler.cpp +++ b/sycl/source/detail/global_handler.cpp @@ -60,10 +60,6 @@ class ObjectUsageCounter { LockGuard Guard(GlobalHandler::MSyclGlobalHandlerProtector); MCounter--; - GlobalHandler *RTGlobalObjHandler = GlobalHandler::getInstancePtr(); - if (RTGlobalObjHandler) { - RTGlobalObjHandler->prepareSchedulerToRelease(!MCounter); - } } catch (std::exception &e) { __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in ~ObjectUsageCounter", e); } @@ -234,7 +230,7 @@ void GlobalHandler::releaseDefaultContexts() { // Note that on Windows the destruction of the default context // races with the detaching of the DLL object that calls urLoaderTearDown. - std::cout << "releaseDefaultContext()" << std::endl; + CPOUT << "releaseDefaultContext()" << std::endl; MPlatformToDefaultContextCache.Inst.reset(nullptr); } @@ -244,7 +240,10 @@ struct EarlyShutdownHandler { try { #ifdef _WIN32 // on Windows we keep to the existing shutdown procedure - GlobalHandler::instance().releaseDefaultContexts(); + //GlobalHandler::instance().endDeferredRelease(); + //GlobalHandler::instance().releaseDefaultContexts(); + //shutdown_early(); + //shutdown_late(); #else shutdown_early(); #endif @@ -301,7 +300,7 @@ void GlobalHandler::drainThreadPool() { MHostTaskThreadPool.Inst->drain(); } -#ifdef _WIN32 + //#ifdef _WIN32 // because of something not-yet-understood on Windows // threads may be shutdown once the end of main() is reached // making an orderly shutdown difficult. Fortunately, Windows @@ -312,7 +311,7 @@ void shutdown_win() { GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); Handler->unloadAdapters(); } -#else + //#else void shutdown_early() { const LockGuard Lock{GlobalHandler::MSyclGlobalHandlerProtector}; GlobalHandler *&Handler = GlobalHandler::getInstancePtr(); @@ -324,8 +323,10 @@ void shutdown_early() { // Ensure neither host task is working so that no default context is accessed // upon its release + CPOUT << "shutdown_early() about to prepareSchedulerToRelease" << std::endl; Handler->prepareSchedulerToRelease(true); + CPOUT << "shutdown_early() about to finishAndWait()" << std::endl; if (Handler->MHostTaskThreadPool.Inst) Handler->MHostTaskThreadPool.Inst->finishAndWait(); @@ -356,9 +357,18 @@ void shutdown_late() { delete Handler; Handler = nullptr; } -#endif + //#endif #ifdef _WIN32 +// a simple wrapper to catch and stream any exception then continue +template +void safe_call(F func) { + try { + func(); + } catch (const std::exception& e) { + std::cerr << "exception in DllMain DLL_PROCESS_DETACH " << e.what() << std::endl; + } +} extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD fdwReason, LPVOID lpReserved) { @@ -377,19 +387,8 @@ extern "C" __SYCL_EXPORT BOOL WINAPI DllMain(HINSTANCE hinstDLL, if (PrintUrTrace) std::cout << "---> DLL_PROCESS_DETACH syclx.dll\n" << std::endl; -#ifdef XPTI_ENABLE_INSTRUMENTATION - if (xptiTraceEnabled()) - return TRUE; // When doing xpti tracing, we can't safely call shutdown. - // TODO: figure out what XPTI is doing that prevents - // release. -#endif - - try { - shutdown_win(); - } catch (std::exception &e) { - __SYCL_REPORT_EXCEPTION_TO_STREAM("exception in shutdown_win", e); - return FALSE; - } + safe_call([](){ shutdown_early(); }); + safe_call([](){ shutdown_late(); }); break; case DLL_PROCESS_ATTACH: if (PrintUrTrace) diff --git a/sycl/source/detail/kernel_impl.cpp b/sycl/source/detail/kernel_impl.cpp index 45eb08dcd111..8b05efe5e984 100644 --- a/sycl/source/detail/kernel_impl.cpp +++ b/sycl/source/detail/kernel_impl.cpp @@ -25,7 +25,7 @@ kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr Context, MCreatedFromSource(true), MKernelBundleImpl(std::move(KernelBundleImpl)), MIsInterop(true), MKernelArgMaskPtr{ArgMask} { // CP - std::cout << "kernel_impl(kernel, context, bundle, argmas) constructor" << std::endl; + CPOUT << "kernel_impl(kernel, context, bundle, argmas) constructor" << std::endl; ur_context_handle_t UrContext = nullptr; // Using the adapter from the passed ContextImpl getAdapter()->call( @@ -56,13 +56,13 @@ kernel_impl::kernel_impl(ur_kernel_handle_t Kernel, ContextImplPtr ContextImpl, MKernelBundleImpl(std::move(KernelBundleImpl)), MKernelArgMaskPtr{ArgMask}, MCacheMutex{CacheMutex} { // CP - std::cout << "kernel_impl(kernel, context, deviceimage, bundle, argmask, program, mutex) constructor" << std::endl; + CPOUT << "kernel_impl(kernel, context, deviceimage, bundle, argmask, program, mutex) constructor" << std::endl; MIsInterop = MKernelBundleImpl->isInterop(); } kernel_impl::~kernel_impl() { // CP - std::cout << "~kernel_impl() called" << std::endl; + CPOUT << "~kernel_impl() called" << std::endl; try { // TODO catch an exception and put it to list of asynchronous exceptions getAdapter()->call(MKernel); diff --git a/sycl/source/detail/kernel_program_cache.hpp b/sycl/source/detail/kernel_program_cache.hpp index 938b86d86dec..badf43222557 100644 --- a/sycl/source/detail/kernel_program_cache.hpp +++ b/sycl/source/detail/kernel_program_cache.hpp @@ -113,19 +113,19 @@ class KernelProgramCache { AdapterPtr Adapter; ProgramBuildResult(const AdapterPtr &Adapter) : Adapter(Adapter) { // CP - std::cout << "ProgramBuildResult(adapter)" << std::endl; + CPOUT << "ProgramBuildResult(adapter)" << std::endl; Val = nullptr; } ProgramBuildResult(const AdapterPtr &Adapter, BuildState InitialState) : Adapter(Adapter) { // CP - std::cout << "ProgramBuildResult(adapter, state)" << std::endl; + CPOUT << "ProgramBuildResult(adapter, state)" << std::endl; Val = nullptr; this->State.store(InitialState); } ~ProgramBuildResult() { // CP - std::cout << "~ProgramBuildResult()" << std::endl; + CPOUT << "~ProgramBuildResult()" << std::endl; try { if (Val) { ur_result_t Err = @@ -208,12 +208,12 @@ class KernelProgramCache { AdapterPtr Adapter; KernelBuildResult(const AdapterPtr &Adapter) : Adapter(Adapter) { // CP - std::cout << "KernelBuildResult(adapter)" << std::endl; + CPOUT << "KernelBuildResult(adapter)" << std::endl; Val.first = nullptr; } ~KernelBuildResult() { // CP - std::cout << "~KernelBuildResult()" << std::endl; + CPOUT << "~KernelBuildResult()" << std::endl; try { if (Val.first) { ur_result_t Err = diff --git a/sycl/source/detail/queue_impl.hpp b/sycl/source/detail/queue_impl.hpp index 41453da4a7da..273cbee31b13 100644 --- a/sycl/source/detail/queue_impl.hpp +++ b/sycl/source/detail/queue_impl.hpp @@ -122,7 +122,7 @@ class queue_impl { MQueueID{ MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { // CP - std::cout << "queue_impl() constructor" << std::endl; + CPOUT << "queue_impl() constructor" << std::endl; verifyProps(PropList); if (has_property()) { if (has_property()) @@ -234,7 +234,7 @@ class queue_impl { MIsProfilingEnabled(has_property()), MQueueID{ MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { - std::cout << "queue_impl() interop constructor" << std::endl; + CPOUT << "queue_impl() interop constructor" << std::endl; queue_impl_interop(UrQueue); } @@ -254,14 +254,14 @@ class queue_impl { MIsProfilingEnabled(has_property()), MQueueID{ MNextAvailableQueueID.fetch_add(1, std::memory_order_relaxed)} { - std::cout << "queue_impl() verify/interop constructor " << std::endl; + CPOUT << "queue_impl() verify/interop constructor " << std::endl; verifyProps(PropList); queue_impl_interop(UrQueue); } ~queue_impl() { // CP - std::cout << "~queue_impl() called" << std::endl; + CPOUT << "~queue_impl() called" << std::endl; try { #if XPTI_ENABLE_INSTRUMENTATION // The trace event created in the constructor should be active through the diff --git a/sycl/source/detail/scheduler/commands.cpp b/sycl/source/detail/scheduler/commands.cpp index a7c14e471d2f..c1c55378b31f 100644 --- a/sycl/source/detail/scheduler/commands.cpp +++ b/sycl/source/detail/scheduler/commands.cpp @@ -1066,7 +1066,7 @@ AllocaCommandBase::AllocaCommandBase(CommandType Type, QueueImplPtr Queue, MIsLeaderAlloca(nullptr == LinkedAllocaCmd), MIsConst(IsConst), MRequirement(std::move(Req)), MReleaseCmd(Queue, this) { // CP - std::cout << "AllocaCommandBase constructor " << MType << std::endl; + CPOUT << "AllocaCommandBase constructor " << MType << std::endl; MRequirement.MAccessMode = access::mode::read_write; emitInstrumentationDataProxy(); } @@ -1270,7 +1270,7 @@ void AllocaSubBufCommand::printDot(std::ostream &Stream) const { ReleaseCommand::ReleaseCommand(QueueImplPtr Queue, AllocaCommandBase *AllocaCmd) : Command(CommandType::RELEASE, std::move(Queue)), MAllocaCmd(AllocaCmd) { // CP - std::cout << "ReleaseCommmand(Q, Allocacmd) constructor " << MType << std::endl; + CPOUT << "ReleaseCommmand(Q, Allocacmd) constructor " << MType << std::endl; emitInstrumentationDataProxy(); } @@ -1397,7 +1397,7 @@ MapMemObject::MapMemObject(AllocaCommandBase *SrcAllocaCmd, Requirement Req, MSrcAllocaCmd(SrcAllocaCmd), MSrcReq(std::move(Req)), MDstPtr(DstPtr), MMapMode(MapMode) { // CP - std::cout << "MapMemObject constructor " << MType << std::endl; + CPOUT << "MapMemObject constructor " << MType << std::endl; emitInstrumentationDataProxy(); } @@ -1461,7 +1461,7 @@ UnMapMemObject::UnMapMemObject(AllocaCommandBase *DstAllocaCmd, Requirement Req, : Command(CommandType::UNMAP_MEM_OBJ, std::move(Queue)), MDstAllocaCmd(DstAllocaCmd), MDstReq(std::move(Req)), MSrcPtr(SrcPtr) { // CP - std::cout << "UnMapMemObject constructor " << MType << std::endl; + CPOUT << "UnMapMemObject constructor " << MType << std::endl; emitInstrumentationDataProxy(); } @@ -1551,7 +1551,7 @@ MemCpyCommand::MemCpyCommand(Requirement SrcReq, MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(DstReq)), MDstAllocaCmd(DstAllocaCmd) { // CP - std::cout << "MemCpyCommand constructor " << MType << " " << this << std::endl; + CPOUT << "MemCpyCommand constructor " << MType << " " << this << std::endl; if (MSrcQueue) { MEvent->setContextImpl(MSrcQueue->getContextImplPtr()); } @@ -1727,7 +1727,7 @@ MemCpyCommandHost::MemCpyCommandHost(Requirement SrcReq, MSrcQueue(SrcQueue), MSrcReq(std::move(SrcReq)), MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(DstReq)), MDstPtr(DstPtr) { // CP - std::cout << "MemCpyCommandHost constructor " << MType << std::endl; + CPOUT << "MemCpyCommandHost constructor " << MType << std::endl; if (MSrcQueue) { MEvent->setContextImpl(MSrcQueue->getContextImplPtr()); } @@ -1803,7 +1803,7 @@ ur_result_t MemCpyCommandHost::enqueueImp() { EmptyCommand::EmptyCommand() : Command(CommandType::EMPTY_TASK, nullptr) { // CP - std::cout << "EmptyCommand() " << MType << std::endl; + CPOUT << "EmptyCommand() " << MType << std::endl; emitInstrumentationDataProxy(); } @@ -1899,7 +1899,7 @@ UpdateHostRequirementCommand::UpdateHostRequirementCommand( : Command(CommandType::UPDATE_REQUIREMENT, std::move(Queue)), MSrcAllocaCmd(SrcAllocaCmd), MDstReq(std::move(Req)), MDstPtr(DstPtr) { // CP - std::cout << "UpdateHostRequirementCommand constructor " << MType << std::endl; + CPOUT << "UpdateHostRequirementCommand constructor " << MType << std::endl; emitInstrumentationDataProxy(); } @@ -2000,7 +2000,7 @@ ExecCGCommand::ExecCGCommand( Dependencies), MEventNeeded(EventNeeded), MCommandGroup(std::move(CommandGroup)) { // CP - std::cout << "ExecCGCommand constructor " << MType << std::endl; + CPOUT << "ExecCGCommand constructor " << MType << std::endl; if (MCommandGroup->getType() == detail::CGType::CodeplayHostTask) { MEvent->setSubmittedQueue( static_cast(MCommandGroup.get())->MQueue); @@ -3703,7 +3703,7 @@ UpdateCommandBufferCommand::UpdateCommandBufferCommand( : Command(CommandType::UPDATE_CMD_BUFFER, Queue), MGraph(Graph), MNodes(Nodes) { // CP - std::cout << "Create UpdateCommandBufferCommand " << MType << std::endl; + CPOUT << "Create UpdateCommandBufferCommand " << MType << std::endl; } diff --git a/sycl/source/detail/scheduler/commands.hpp b/sycl/source/detail/scheduler/commands.hpp index 12a11500f575..dc8f4c6f3607 100644 --- a/sycl/source/detail/scheduler/commands.hpp +++ b/sycl/source/detail/scheduler/commands.hpp @@ -84,11 +84,11 @@ struct DepDesc { DepDesc(Command *DepCommand, const Requirement *Req, AllocaCommandBase *AllocaCmd) : MDepCommand(DepCommand), MDepRequirement(Req), MAllocaCmd(AllocaCmd) { - std::cout << "DepDesc() constructor(" << this << "). MDepCommand: " << MDepCommand << std::endl; + CPOUT << "DepDesc() constructor(" << this << "). MDepCommand: " << MDepCommand << std::endl; } ~DepDesc() { - std::cout << "~DepDesc() destructor(" << this << "). MDepCommand: " << MDepCommand << std::endl; + CPOUT << "~DepDesc() destructor(" << this << "). MDepCommand: " << MDepCommand << std::endl; } DepDesc() = delete; // CP @@ -101,7 +101,7 @@ struct DepDesc { DepDesc(const DepDesc &Other) : MDepCommand(Other.MDepCommand), MDepRequirement(Other.MDepRequirement), MAllocaCmd(Other.MAllocaCmd) { - std::cout << "DepDesc() copy constructor(" << this << "). MDepCommand: " << MDepCommand << std::endl; + CPOUT << "DepDesc() copy constructor(" << this << "). MDepCommand: " << MDepCommand << std::endl; } friend bool operator<(const DepDesc &Lhs, const DepDesc &Rhs) { @@ -240,7 +240,7 @@ class Command { virtual ~Command() { // CP - std::cout << "~Command() type: " << MType << " " << this << std::endl; + CPOUT << "~Command() type: " << MType << " " << this << std::endl; MEvent->cleanDepEventsThroughOneLevel(); } diff --git a/sycl/source/detail/scheduler/graph_builder.cpp b/sycl/source/detail/scheduler/graph_builder.cpp index f9f396e7d8cd..307954851b87 100644 --- a/sycl/source/detail/scheduler/graph_builder.cpp +++ b/sycl/source/detail/scheduler/graph_builder.cpp @@ -195,7 +195,7 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue, // of the requirements for the current record // CP - if we drop copy constructor, this will have to change DepDesc Dep = findDepForRecord(Dependant, Record); - std::cout << "DepDesc change dependency. Before MDepCommand: " << Dep.MDepCommand << " After: " << Dependency << std::endl; + CPOUT << "DepDesc change dependency. Before MDepCommand: " << Dep.MDepCommand << " After: " << Dependency << std::endl; Dep.MDepCommand = Dependency; std::vector ToCleanUp; Command *ConnectionCmd = Dependant->addDep(Dep, ToCleanUp); @@ -203,7 +203,7 @@ Scheduler::GraphBuilder::getOrInsertMemObjRecord(const QueueImplPtr &Queue, ToEnqueue.push_back(ConnectionCmd); --(Dependency->MLeafCounter); - std::cout << "reduced Dependency->MLeafCounter: " << Dependency->MLeafCounter << " cleanup? " << Dependency->readyForCleanup() << std::endl; + CPOUT << "reduced Dependency->MLeafCounter: " << Dependency->MLeafCounter << " cleanup? " << Dependency->readyForCleanup() << std::endl; if (Dependency->readyForCleanup()) ToCleanUp.push_back(Dependency); for (Command *Cmd : ToCleanUp) @@ -1180,7 +1180,7 @@ void Scheduler::GraphBuilder::cleanupCommand( for (DepDesc &Dep : UserCmd->MDeps) { // Link the users of the command to the alloca command(s) instead if (Dep.MDepCommand == Cmd) { - std::cout << "DepDesc changing MDepCommand. Before: " << Dep.MDepCommand; + CPOUT << "DepDesc changing MDepCommand. Before: " << Dep.MDepCommand; // ... unless the user is the alloca itself. if (Dep.MAllocaCmd == UserCmd) { Dep.MDepCommand = nullptr; @@ -1188,7 +1188,7 @@ void Scheduler::GraphBuilder::cleanupCommand( Dep.MDepCommand = Dep.MAllocaCmd; Dep.MDepCommand->MUsers.insert(UserCmd); } - std::cout << " After: " << Dep.MDepCommand << std::endl; + CPOUT << " After: " << Dep.MDepCommand << std::endl; } } } @@ -1260,7 +1260,7 @@ Command *Scheduler::GraphBuilder::connectDepEvent( // add user to Dep.MDepCommand is already performed beyond this if branch { DepDesc DepOnConnect = Dep; - std::cout << "connect DepDesc changing MDepCommand. Before: " << DepOnConnect.MDepCommand << " After: " << ConnectCmd << std::endl; + CPOUT << "connect DepDesc changing MDepCommand. Before: " << DepOnConnect.MDepCommand << " After: " << ConnectCmd << std::endl; DepOnConnect.MDepCommand = ConnectCmd; // Dismiss the result here as it's not a connection now, diff --git a/sycl/source/detail/scheduler/scheduler.cpp b/sycl/source/detail/scheduler/scheduler.cpp index 28f63fa63975..e100bd07ef3b 100644 --- a/sycl/source/detail/scheduler/scheduler.cpp +++ b/sycl/source/detail/scheduler/scheduler.cpp @@ -158,33 +158,8 @@ void Scheduler::enqueueCommandForCG(EventImplPtr NewEvent, bool Enqueued; // CP - // I'm not sure the logic here is correct. This Cleanup is only used in the case of an error. - // It seems like NewEvent should have its command - // cleared no matter what. Also, shouldn't the cleanup used when staging auxiliary commands - // be cleaning up auxillary commands, instead o the main command, which was never enqueued? - // Or should we be cleaning up everything? I think enqueueCommand has to be careful about - // it's error semantics. Did it enqueue or not? - // In my case, we successfully enqueue one dependency, but the GC command itself fails (does it, it throws certainly, but maybe afterwards?) auto CleanUp = [&](Command* SomeCmd) { - // this will clear up the CG command, but not the others, and also results in crash during shutdown - // NewEvent->setComplete(); - // NewEvent->setCommand(nullptr); - // delete NewCmd; - - // doesn't do anything. - // for(auto Desc : SomeCmd->MDeps) { - // if (auto DepCmd = Desc.MDepCommand) { - // DepCmd->MEnqueueStatus = EnqueueResultT::SyclEnqueueFailed; - // //DepCmd->MMarks.MToBeDeleted = true; - // DepCmd->MMarkedForCleanup = true; - // } - // } - - //auto someRecord = this.getMemObjRecord( req ); - //MGraphBuilder.cleanupCommandsForRecord( someRecord ); //MemObjRecord *Record) - - // original logic. doesn't do anything b.c. MDeps or MUsers rarely both empty if (NewCmd && (NewCmd->MDeps.size() == 0 && NewCmd->MUsers.size() == 0)) { if (NewEvent) { @@ -195,14 +170,12 @@ void Scheduler::enqueueCommandForCG(EventImplPtr NewEvent, // CP -- latest and last fix!! cleanupCommands(ToCleanUp); - - }; for (Command *Cmd : AuxiliaryCmds) { - Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd, - Blocking); - try { // CP <== this is wrong. Should encompass enqueeuCommand + try { + Enqueued = GraphProcessor::enqueueCommand(Cmd, Lock, Res, ToCleanUp, Cmd, Blocking); + if (!Enqueued && EnqueueResultT::SyclEnqueueFailed == Res.MResult) throw exception(make_error_code(errc::runtime), "Auxiliary enqueue process failed."); @@ -306,20 +279,20 @@ void Scheduler::waitForEvent(const EventImplPtr &Event, bool *Success) { bool Scheduler::removeMemoryObject(detail::SYCLMemObjI *MemObj, bool StrictLock) { - std::cout << "Scheduler::removeMemoryObject() " << StrictLock << std::endl; + CPOUT << "Scheduler::removeMemoryObject() " << StrictLock << std::endl; MemObjRecord *Record = MGraphBuilder.getMemObjRecord(MemObj); - std::cout << "Got a Record: " << Record << std::endl; + CPOUT << "Got a Record: " << Record << std::endl; if (!Record) // No operations were performed on the mem object return true; //CP - fix part 2. Should this be the same for linux? #ifdef _WIN32 - bool allowWait = MemObj->hasUserDataPtr(); + bool allowWait = MemObj->hasUserDataPtr() || GlobalHandler::instance().isOkToDefer(); #else bool allowWait = true; #endif - std::cout << "allowWait: " << allowWait << std::endl; + CPOUT << "allowWait: " << allowWait << std::endl; if(allowWait) { @@ -441,10 +414,6 @@ void Scheduler::releaseResources(BlockingT Blocking) { cleanupAuxiliaryResources(Blocking); - // CP - fix part 3 -#ifdef _WIN32 - cleanupDeferredMemObjects(Blocking); //<-- if non-blocking DeleteCmdExpception fails, otherwise host-task-failure freezes -#else // We need loop since sometimes we may need new objects to be added to // deferred mem objects storage during cleanup. Known example is: we cleanup // existing deferred mem objects under write lock, during this process we @@ -455,7 +424,6 @@ void Scheduler::releaseResources(BlockingT Blocking) { do { cleanupDeferredMemObjects(Blocking); } while (Blocking == BlockingT::BLOCKING && !isDeferredMemObjectsEmpty()); -#endif } MemObjRecord *Scheduler::getMemObjRecord(const Requirement *const Req) { diff --git a/sycl/source/detail/scheduler/scheduler.hpp b/sycl/source/detail/scheduler/scheduler.hpp index fcd2ed13005e..a3741b0d4dae 100644 --- a/sycl/source/detail/scheduler/scheduler.hpp +++ b/sycl/source/detail/scheduler/scheduler.hpp @@ -202,11 +202,11 @@ struct MemObjRecord { LeavesCollection::AllocateDependencyF AllocateDependency) : MReadLeaves{this, LeafLimit, AllocateDependency}, MWriteLeaves{this, LeafLimit, AllocateDependency}, MCurContext{Ctx} { - std::cout << "MemObjRecord() constructor" << std::endl; + CPOUT << "MemObjRecord() constructor" << std::endl; } ~MemObjRecord() { - std::cout << "~MemObjRecord destructor" << std::endl; + CPOUT << "~MemObjRecord destructor" << std::endl; } // Contains all allocation commands for the memory object. std::vector MAllocaCommands; diff --git a/sycl/source/detail/sycl_mem_obj_t.cpp b/sycl/source/detail/sycl_mem_obj_t.cpp index 5b34c38f4f6e..a2f875d2031f 100644 --- a/sycl/source/detail/sycl_mem_obj_t.cpp +++ b/sycl/source/detail/sycl_mem_obj_t.cpp @@ -140,7 +140,7 @@ void SYCLMemObjT::updateHostMemory(void *const Ptr) { } void SYCLMemObjT::updateHostMemory() { - std::cout << "updateHostMemory() entered. Have Upload functor: " << (MUploadDataFunctor != nullptr) << " NeedWriteBack: " << MNeedWriteBack << std::endl; + CPOUT << "updateHostMemory() entered. Have Upload functor: " << (MUploadDataFunctor != nullptr) << " NeedWriteBack: " << MNeedWriteBack << std::endl; if ((MUploadDataFunctor != nullptr) && MNeedWriteBack) MUploadDataFunctor(); diff --git a/sycl/test-e2e/Basic/backend_info.cpp b/sycl/test-e2e/Basic/backend_info.cpp index f61b1809a6e4..d64d3cf5b533 100644 --- a/sycl/test-e2e/Basic/backend_info.cpp +++ b/sycl/test-e2e/Basic/backend_info.cpp @@ -1,5 +1,5 @@ // RUN: %{build} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_UR_TRACE=-2 %{run} %t.out // // RUN: %{build} -DTEST_ERRORS -D_GLIBCXX_USE_CXX11_ABI=0 -fsyntax-only -Xclang -verify -Xclang -verify-ignore-unexpected=note diff --git a/sycl/test-e2e/Basic/buffer/buffer_release.cpp b/sycl/test-e2e/Basic/buffer/buffer_release.cpp index 92f48f2027cd..c41c14193852 100644 --- a/sycl/test-e2e/Basic/buffer/buffer_release.cpp +++ b/sycl/test-e2e/Basic/buffer/buffer_release.cpp @@ -1,7 +1,7 @@ // REQUIRES: cpu // RUN: %{build} -o %t.out -// RUN: %{run} %t.out +// RUN: env SYCL_UR_TRACE=-2 %{run} %t.out #include