Skip to content

Commit b6188a1

Browse files
[SYCL][NFC] Have separate code path for secondary queue + remove duplicate parameters (#18201)
Signed-off-by: Agarwal, Udit <[email protected]>
1 parent 6249f30 commit b6188a1

File tree

7 files changed

+135
-46
lines changed

7 files changed

+135
-46
lines changed

sycl/include/sycl/handler.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -448,7 +448,6 @@ class __SYCL_EXPORT handler {
448448
bool CallerNeedsEvent);
449449
#endif
450450
__SYCL_DLL_LOCAL handler(std::shared_ptr<detail::queue_impl> Queue,
451-
detail::queue_impl *PrimaryQueue,
452451
detail::queue_impl *SecondaryQueue,
453452
bool CallerNeedsEvent);
454453

sycl/include/sycl/queue.hpp

Lines changed: 38 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -373,8 +373,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
373373
const detail::code_location &CodeLoc = detail::code_location::current()) {
374374
return submit_with_event<__SYCL_USE_FALLBACK_ASSERT>(
375375
sycl::ext::oneapi::experimental::empty_properties_t{},
376-
detail::type_erased_cgfo_ty{CGF},
377-
/*SecondaryQueuePtr=*/nullptr, CodeLoc);
376+
detail::type_erased_cgfo_ty{CGF}, CodeLoc);
378377
}
379378

380379
/// Submits a command group function object to the queue, in order to be
@@ -3609,6 +3608,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
36093608
///
36103609
/// \param Props is a property list with submission properties.
36113610
/// \param CGF is a function object containing command group.
3611+
/// \param SecondaryQueuePtr is a pointer to the secondary queue.
36123612
/// \param CodeLoc is the code location of the submit call (default argument)
36133613
/// \return a SYCL event object for the submitted command group.
36143614
//
@@ -3643,6 +3643,41 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
36433643
TlsCodeLocCapture.isToplevel());
36443644
}
36453645

3646+
/// Submits a command group function object to the queue, in order to be
3647+
/// scheduled for execution on the device.
3648+
///
3649+
/// \param Props is a property list with submission properties.
3650+
/// \param CGF is a function object containing command group.
3651+
/// \param CodeLoc is the code location of the submit call (default argument)
3652+
/// \return a SYCL event object for the submitted command group.
3653+
//
3654+
// UseFallBackAssert as template param vs `#if` in function body is necessary
3655+
// to prevent ODR-violation between TUs built with different fallback assert
3656+
// modes.
3657+
template <bool UseFallbackAssert, typename PropertiesT>
3658+
event submit_with_event(
3659+
PropertiesT Props, const detail::type_erased_cgfo_ty &CGF,
3660+
const detail::code_location &CodeLoc = detail::code_location::current()) {
3661+
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
3662+
detail::SubmissionInfo SI{};
3663+
ProcessSubmitProperties(Props, SI);
3664+
if constexpr (UseFallbackAssert)
3665+
SI.PostProcessorFunc() = [this, &TlsCodeLocCapture](bool IsKernel,
3666+
bool KernelUsesAssert,
3667+
event &E) {
3668+
if (IsKernel && !device_has(aspect::ext_oneapi_native_assert) &&
3669+
KernelUsesAssert && !device_has(aspect::accelerator)) {
3670+
// __devicelib_assert_fail isn't supported by Device-side Runtime
3671+
// Linking against fallback impl of __devicelib_assert_fail is
3672+
// performed by program manager class
3673+
// Fallback assert isn't supported for FPGA
3674+
submitAssertCapture(*this, E, nullptr, TlsCodeLocCapture.query());
3675+
}
3676+
};
3677+
return submit_with_event_impl(CGF, SI, TlsCodeLocCapture.query(),
3678+
TlsCodeLocCapture.isToplevel());
3679+
}
3680+
36463681
/// Submits a command group function object to the queue, in order to be
36473682
/// scheduled for execution on the device.
36483683
///
@@ -3660,7 +3695,7 @@ class __SYCL_EXPORT queue : public detail::OwnerLessBase<queue> {
36603695
if constexpr (UseFallbackAssert) {
36613696
// If post-processing is needed, fall back to the regular submit.
36623697
// TODO: Revisit whether we can avoid this.
3663-
submit_with_event<UseFallbackAssert>(Props, CGF, nullptr, CodeLoc);
3698+
submit_with_event<UseFallbackAssert>(Props, CGF, CodeLoc);
36643699
} else {
36653700
detail::tls_code_loc_t TlsCodeLocCapture(CodeLoc);
36663701
detail::SubmissionInfo SI{};

sycl/source/detail/handler_impl.hpp

Lines changed: 4 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -31,10 +31,8 @@ enum class HandlerSubmissionState : std::uint8_t {
3131

3232
class handler_impl {
3333
public:
34-
handler_impl(queue_impl *SubmissionPrimaryQueue,
35-
queue_impl *SubmissionSecondaryQueue, bool EventNeeded)
36-
: MSubmissionPrimaryQueue(SubmissionPrimaryQueue),
37-
MSubmissionSecondaryQueue(SubmissionSecondaryQueue),
34+
handler_impl(queue_impl *SubmissionSecondaryQueue, bool EventNeeded)
35+
: MSubmissionSecondaryQueue(SubmissionSecondaryQueue),
3836
MEventNeeded(EventNeeded) {};
3937

4038
handler_impl(
@@ -69,15 +67,8 @@ class handler_impl {
6967
/// Registers mutually exclusive submission states.
7068
HandlerSubmissionState MSubmissionState = HandlerSubmissionState::NO_STATE;
7169

72-
/// Shared pointer to the primary queue implementation. This is different from
73-
/// the queue associated with the handler if the corresponding submission is
74-
/// a fallback from a previous submission.
75-
queue_impl *MSubmissionPrimaryQueue = nullptr;
76-
77-
/// Shared pointer to the secondary queue implementation. Nullptr if no
78-
/// secondary queue fallback was given in the associated submission. This is
79-
/// equal to the queue associated with the handler if the corresponding
80-
/// submission is a fallback from a previous submission.
70+
/// Pointer to the secondary queue implementation. Nullptr if no
71+
/// secondary queue fallback was given in the associated submission.
8172
queue_impl *MSubmissionSecondaryQueue = nullptr;
8273

8374
/// Bool stores information about whether the event resulting from the

sycl/source/detail/queue_impl.cpp

Lines changed: 53 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,57 @@ void queue_impl::addEvent(const event &Event) {
310310
}
311311
}
312312

313+
event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
314+
const std::shared_ptr<queue_impl> &Self,
315+
queue_impl *SecondaryQueue, bool CallerNeedsEvent,
316+
const detail::code_location &Loc,
317+
bool IsTopCodeLoc,
318+
const SubmissionInfo &SubmitInfo) {
319+
handler Handler(Self, SecondaryQueue, CallerNeedsEvent);
320+
auto &HandlerImpl = detail::getSyclObjImpl(Handler);
321+
if (xptiTraceEnabled()) {
322+
Handler.saveCodeLoc(Loc, IsTopCodeLoc);
323+
}
324+
325+
{
326+
NestedCallsTracker tracker;
327+
CGF(Handler);
328+
}
329+
330+
// Scheduler will later omit events, that are not required to execute tasks.
331+
// Host and interop tasks, however, are not submitted to low-level runtimes
332+
// and require separate dependency management.
333+
const CGType Type = HandlerImpl->MCGType;
334+
std::vector<StreamImplPtr> Streams;
335+
if (Type == CGType::Kernel)
336+
Streams = std::move(Handler.MStreamStorage);
337+
338+
HandlerImpl->MEventMode = SubmitInfo.EventMode();
339+
340+
auto Event = finalizeHandler(Handler, SubmitInfo.PostProcessorFunc());
341+
342+
addEvent(Event);
343+
344+
const auto &EventImpl = detail::getSyclObjImpl(Event);
345+
for (auto &Stream : Streams) {
346+
// We don't want stream flushing to be blocking operation that is why submit
347+
// a host task to print stream buffer. It will fire up as soon as the kernel
348+
// finishes execution.
349+
auto L = [&](handler &ServiceCGH) {
350+
Stream->generateFlushCommand(ServiceCGH);
351+
};
352+
detail::type_erased_cgfo_ty CGF{L};
353+
event FlushEvent =
354+
submit_impl(CGF, Self, SecondaryQueue, /*CallerNeedsEvent*/ true, Loc,
355+
IsTopCodeLoc, {});
356+
EventImpl->attachEventToCompleteWeak(detail::getSyclObjImpl(FlushEvent));
357+
registerStreamServiceEvent(detail::getSyclObjImpl(FlushEvent));
358+
}
359+
360+
return Event;
361+
}
362+
363+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
313364
event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
314365
const std::shared_ptr<queue_impl> &Self,
315366
const std::shared_ptr<queue_impl> &PrimaryQueue,
@@ -318,8 +369,7 @@ event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
318369
const detail::code_location &Loc,
319370
bool IsTopCodeLoc,
320371
const SubmissionInfo &SubmitInfo) {
321-
handler Handler(Self, PrimaryQueue.get(), SecondaryQueue.get(),
322-
CallerNeedsEvent);
372+
handler Handler(Self, CallerNeedsEvent);
323373
auto &HandlerImpl = detail::getSyclObjImpl(Handler);
324374

325375
#if XPTI_ENABLE_INSTRUMENTATION
@@ -365,6 +415,7 @@ event queue_impl::submit_impl(const detail::type_erased_cgfo_ty &CGF,
365415

366416
return Event;
367417
}
418+
#endif
368419

369420
template <typename HandlerFuncT>
370421
event queue_impl::submitWithHandler(const std::shared_ptr<queue_impl> &Self,

sycl/source/detail/queue_impl.hpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -376,7 +376,7 @@ class queue_impl {
376376
const detail::code_location &Loc, bool IsTopCodeLoc) {
377377

378378
event ResEvent =
379-
submit_impl(CGF, Self, Self, SubmitInfo.SecondaryQueue(),
379+
submit_impl(CGF, Self, SubmitInfo.SecondaryQueue().get(),
380380
/*CallerNeedsEvent=*/true, Loc, IsTopCodeLoc, SubmitInfo);
381381
return discard_or_return(ResEvent);
382382
}
@@ -386,7 +386,7 @@ class queue_impl {
386386
const SubmissionInfo &SubmitInfo,
387387
const detail::code_location &Loc,
388388
bool IsTopCodeLoc) {
389-
submit_impl(CGF, Self, Self, SubmitInfo.SecondaryQueue(),
389+
submit_impl(CGF, Self, SubmitInfo.SecondaryQueue().get(),
390390
/*CallerNeedsEvent=*/false, Loc, IsTopCodeLoc, SubmitInfo);
391391
}
392392

@@ -840,6 +840,7 @@ class queue_impl {
840840
}
841841
}
842842

843+
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
843844
/// Performs command group submission to the queue.
844845
///
845846
/// \param CGF is a function object containing command group.
@@ -859,6 +860,23 @@ class queue_impl {
859860
const std::shared_ptr<queue_impl> &SecondaryQueue,
860861
bool CallerNeedsEvent, const detail::code_location &Loc,
861862
bool IsTopCodeLoc, const SubmissionInfo &SubmitInfo);
863+
#endif
864+
865+
/// Performs command group submission to the queue.
866+
///
867+
/// \param CGF is a function object containing command group.
868+
/// \param Self is a pointer to this queue.
869+
/// \param SecondaryQueue is a pointer to the secondary queue.
870+
/// \param CallerNeedsEvent is a boolean indicating whether the event is
871+
/// required by the user after the call.
872+
/// \param Loc is the code location of the submit call (default argument)
873+
/// \param SubmitInfo is additional optional information for the submission.
874+
/// \return a SYCL event representing submitted command group.
875+
event submit_impl(const detail::type_erased_cgfo_ty &CGF,
876+
const std::shared_ptr<queue_impl> &Self,
877+
queue_impl *SecondaryQueue, bool CallerNeedsEvent,
878+
const detail::code_location &Loc, bool IsTopCodeLoc,
879+
const SubmissionInfo &SubmitInfo);
862880

863881
/// Helper function for submitting a memory operation with a handler.
864882
/// \param Self is a shared_ptr to this queue.

sycl/source/handler.cpp

Lines changed: 19 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -314,26 +314,24 @@ fill_copy_args(detail::handler_impl *impl,
314314

315315
handler::handler(std::shared_ptr<detail::queue_impl> Queue,
316316
bool CallerNeedsEvent)
317-
: impl(std::make_shared<detail::handler_impl>(Queue.get(), nullptr,
318-
CallerNeedsEvent)),
317+
: impl(std::make_shared<detail::handler_impl>(nullptr, CallerNeedsEvent)),
319318
MQueue(std::move(Queue)) {}
320319

321320
#ifndef __INTEL_PREVIEW_BREAKING_CHANGES
322321
// TODO: This function is not used anymore, remove it in the next
323322
// ABI-breaking window.
324-
handler::handler(std::shared_ptr<detail::queue_impl> Queue,
325-
std::shared_ptr<detail::queue_impl> PrimaryQueue,
326-
std::shared_ptr<detail::queue_impl> SecondaryQueue,
327-
bool CallerNeedsEvent)
328-
: impl(std::make_shared<detail::handler_impl>(
329-
PrimaryQueue.get(), SecondaryQueue.get(), CallerNeedsEvent)),
323+
handler::handler(
324+
std::shared_ptr<detail::queue_impl> Queue,
325+
[[maybe_unused]] std::shared_ptr<detail::queue_impl> PrimaryQueue,
326+
std::shared_ptr<detail::queue_impl> SecondaryQueue, bool CallerNeedsEvent)
327+
: impl(std::make_shared<detail::handler_impl>(SecondaryQueue.get(),
328+
CallerNeedsEvent)),
330329
MQueue(Queue) {}
331330
#endif
332331

333332
handler::handler(std::shared_ptr<detail::queue_impl> Queue,
334-
detail::queue_impl *PrimaryQueue,
335333
detail::queue_impl *SecondaryQueue, bool CallerNeedsEvent)
336-
: impl(std::make_shared<detail::handler_impl>(PrimaryQueue, SecondaryQueue,
334+
: impl(std::make_shared<detail::handler_impl>(SecondaryQueue,
337335
CallerNeedsEvent)),
338336
MQueue(std::move(Queue)) {}
339337

@@ -1769,8 +1767,7 @@ void handler::ext_oneapi_signal_external_semaphore(
17691767

17701768
void handler::use_kernel_bundle(
17711769
const kernel_bundle<bundle_state::executable> &ExecBundle) {
1772-
if ((!impl->MGraph && (impl->MSubmissionPrimaryQueue->get_context() !=
1773-
ExecBundle.get_context())) ||
1770+
if ((!impl->MGraph && (MQueue->get_context() != ExecBundle.get_context())) ||
17741771
(impl->MGraph &&
17751772
(impl->MGraph->getContext() != ExecBundle.get_context())))
17761773
throw sycl::exception(
@@ -1931,23 +1928,21 @@ void handler::verifyDeviceHasProgressGuarantee(
19311928
}
19321929

19331930
bool handler::supportsUSMMemcpy2D() {
1934-
auto &PrimQueue = impl->MSubmissionPrimaryQueue;
1935-
if (PrimQueue)
1936-
return checkContextSupports(PrimQueue->getContextImplPtr(),
1937-
UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT);
1938-
else
1939-
// Return true when handler_impl is constructed with a graph.
1931+
// Return true when handler_impl is constructed with a graph.
1932+
if (!MQueue)
19401933
return true;
1934+
1935+
return checkContextSupports(MQueue->getContextImplPtr(),
1936+
UR_CONTEXT_INFO_USM_MEMCPY2D_SUPPORT);
19411937
}
19421938

19431939
bool handler::supportsUSMFill2D() {
1944-
auto &PrimQueue = impl->MSubmissionPrimaryQueue;
1945-
if (PrimQueue)
1946-
return checkContextSupports(PrimQueue->getContextImplPtr(),
1947-
UR_CONTEXT_INFO_USM_FILL2D_SUPPORT);
1948-
else
1949-
// Return true when handler_impl is constructed with a graph.
1940+
// Return true when handler_impl is constructed with a graph.
1941+
if (!MQueue)
19501942
return true;
1943+
1944+
return checkContextSupports(MQueue->getContextImplPtr(),
1945+
UR_CONTEXT_INFO_USM_FILL2D_SUPPORT);
19511946
}
19521947

19531948
bool handler::supportsUSMMemset2D() {

sycl/test/abi/sycl_symbols_windows.dump

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -402,7 +402,7 @@
402402
??0gpu_selector@_V1@sycl@@QEAA@XZ
403403
??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vgraph_impl@detail@experimental@oneapi@ext@_V1@sycl@@@std@@@Z
404404
??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@00_N@Z
405-
??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@PEAVqueue_impl@detail@12@1_N@Z
405+
??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@PEAVqueue_impl@detail@12@_N@Z
406406
??0handler@_V1@sycl@@AEAA@V?$shared_ptr@Vqueue_impl@detail@_V1@sycl@@@std@@_N@Z
407407
??0image_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@$$QEAV012345@@Z
408408
??0image_mem@experimental@oneapi@ext@_V1@sycl@@QEAA@AEBUimage_descriptor@12345@AEBVdevice@45@AEBVcontext@45@@Z

0 commit comments

Comments
 (0)