Skip to content

Commit 76f863d

Browse files
committed
Merge remote-tracking branch 'origin/main' into tmp_cuda_reduce
2 parents 8350038 + 1aba3d3 commit 76f863d

File tree

16 files changed

+339
-196
lines changed

16 files changed

+339
-196
lines changed

examples/nvexec/reduce.cpp

Lines changed: 9 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -23,52 +23,19 @@
2323
#include <span>
2424

2525
namespace ex = stdexec;
26-
using stdexec::__tag_invoke::tag_invoke;
2726

28-
struct sink_receiver {
29-
using is_receiver = void;
30-
31-
friend void tag_invoke(stdexec::set_value_t, sink_receiver, auto&&...) noexcept {
32-
}
33-
34-
friend void tag_invoke(stdexec::set_error_t, sink_receiver, auto&&) noexcept {
35-
}
36-
37-
friend void tag_invoke(stdexec::set_stopped_t, sink_receiver) noexcept {
38-
}
39-
40-
friend stdexec::empty_env tag_invoke(stdexec::get_env_t, sink_receiver) noexcept {
41-
return {};
42-
}
43-
};
44-
45-
struct empty_environment { };
46-
47-
template <class...>
48-
[[deprecated]] void print() {
49-
}
50-
51-
// unqualified call to tag_invoke:
5227
int main() {
53-
const int n = 2 * 1024;
54-
thrust::device_vector<float> input(n, 1.0f);
55-
float* first = thrust::raw_pointer_cast(input.data());
56-
float* last = thrust::raw_pointer_cast(input.data()) + input.size();
57-
nvexec::stream_context stream_ctx{};
58-
auto sched = stream_ctx.get_scheduler();
59-
60-
auto snd = ex::just(std::span{first, last}) | nvexec::reduce(42.0f);
28+
// const int n = 2 * 1024;
29+
// thrust::device_vector<float> input(n, 1.0f);
30+
// float* first = thrust::raw_pointer_cast(input.data());
31+
// float* last = thrust::raw_pointer_cast(input.data()) + input.size();
6132

62-
auto on_snd = ex::on(sched, std::move(snd));
63-
//::print<stdexec::__detail::__name_of<decltype(on_snd)>>();
33+
// nvexec::stream_context stream_ctx{};
6434

65-
// recursively transforms the sender using the stream domain
66-
// auto stream_on_snd = nvexec::_strm::stream_domain().transform_sender(
67-
// std::move(on_snd), ex::empty_env());
35+
// auto snd = ex::transfer_just(stream_ctx.get_scheduler(), std::span{first, last})
36+
// | nvexec::reduce(42.0f);
6837

69-
// the name of the transformed sender shows that the reduce node
70-
// in the tree was transformed from a basic_sender<> to a nvexec::reduce_::sender_t<>
71-
//::print<stdexec::__detail::__name_of<decltype(stream_on_snd)>>();
38+
// auto [result] = stdexec::sync_wait(std::move(snd)).value();
7239

73-
//auto [result] = stdexec::sync_wait(std::move(on_snd)).value();
40+
// std::cout << "result: " << result << std::endl;
7441
}

include/exec/env.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,8 +23,8 @@
2323
#endif
2424

2525
namespace exec {
26-
template <class... _TagValue>
27-
using with_t = stdexec::__with<_TagValue...>;
26+
template <class _Tag, class _Value = stdexec::__none_such>
27+
using with_t = stdexec::__with<_Tag, _Value>;
2828

2929
namespace __detail {
3030
struct __with_t {

include/exec/linux/io_uring_context.hpp

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -74,8 +74,13 @@ namespace exec {
7474
unsigned int __to_submit,
7575
unsigned int __min_complete,
7676
unsigned int __flags) {
77-
return (int) ::syscall(
77+
int rc = (int) ::syscall(
7878
__NR_io_uring_enter, __ring_fd, __to_submit, __min_complete, __flags, nullptr, 0);
79+
if (rc == -1) {
80+
return -errno;
81+
} else {
82+
return rc;
83+
}
7984
}
8085

8186
inline memory_mapped_region __map_region(int __fd, ::off_t __offset, std::size_t __size) {

include/exec/task.hpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -31,8 +31,8 @@
3131
#include "scope.hpp"
3232

3333
STDEXEC_PRAGMA_PUSH()
34-
STDEXEC_PRAGMA_IGNORE("-Wpragmas")
35-
STDEXEC_PRAGMA_IGNORE("-Wundefined-inline")
34+
STDEXEC_PRAGMA_IGNORE_GNU("-Wpragmas")
35+
STDEXEC_PRAGMA_IGNORE_GNU("-Wundefined-inline")
3636

3737
namespace exec {
3838
namespace __task {

include/nvexec/multi_gpu_context.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ namespace nvexec {
6666
if (op.status_ == cudaSuccess) {
6767
continuation_kernel<<<1, 1, 0, op.stream_>>>(std::move(op.rec_), stdexec::set_value);
6868
} else {
69-
continuation_kernel<cudaError_t><<<1, 1, 0, op.stream_>>>(
69+
continuation_kernel<<<1, 1, 0, op.stream_>>>(
7070
std::move(op.rec_), stdexec::set_error, std::move(op.status_));
7171
}
7272
}

include/nvexec/stream/bulk.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -202,7 +202,7 @@ namespace nvexec::STDEXEC_STREAM_DETAIL_NS {
202202

203203
if (begin < end) {
204204
cudaSetDevice(dev);
205-
cudaStreamWaitEvent(stream, op_state.ready_to_launch_);
205+
cudaStreamWaitEvent(stream, op_state.ready_to_launch_, 0);
206206
kernel<block_threads, As&...>
207207
<<<grid_blocks, block_threads, 0, stream>>>(begin, end, self.f_, as...);
208208
cudaEventRecord(op_state.ready_to_complete_[dev], op_state.streams_[dev]);
@@ -225,7 +225,7 @@ namespace nvexec::STDEXEC_STREAM_DETAIL_NS {
225225

226226
for (int dev = 0; dev < op_state.num_devices_; dev++) {
227227
if (dev != op_state.current_device_) {
228-
cudaStreamWaitEvent(baseline_stream, op_state.ready_to_complete_[dev]);
228+
cudaStreamWaitEvent(baseline_stream, op_state.ready_to_complete_[dev], 0);
229229
}
230230
}
231231
}

include/nvexec/stream/ensure_started.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,7 @@ namespace nvexec::STDEXEC_STREAM_DETAIL_NS {
246246
if (status == cudaSuccess) {
247247
if constexpr (stream_sender<Sender, env_t>) {
248248
status = STDEXEC_DBG_ERR(
249-
cudaStreamWaitEvent(op->get_stream(), op->shared_state_->event_));
249+
cudaStreamWaitEvent(op->get_stream(), op->shared_state_->event_, 0));
250250
}
251251

252252
visit(

include/nvexec/stream/split.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -236,7 +236,7 @@ namespace nvexec::STDEXEC_STREAM_DETAIL_NS {
236236
if (status == cudaSuccess) {
237237
if constexpr (stream_sender<Sender, env_t>) {
238238
status = STDEXEC_DBG_ERR(
239-
cudaStreamWaitEvent(op->get_stream(), op->shared_state_->event_));
239+
cudaStreamWaitEvent(op->get_stream(), op->shared_state_->event_, 0));
240240
}
241241

242242
visit(

include/nvexec/stream/when_all.cuh

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -259,7 +259,7 @@ namespace nvexec::STDEXEC_STREAM_DETAIL_NS {
259259

260260
for (int i = 0; i < sizeof...(SenderIds); i++) {
261261
if (status_ == cudaSuccess) {
262-
status_ = STDEXEC_DBG_ERR(cudaStreamWaitEvent(stream, events_[i]));
262+
status_ = STDEXEC_DBG_ERR(cudaStreamWaitEvent(stream, events_[i], 0));
263263
}
264264
}
265265
} else {

include/nvexec/stream_context.cuh

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -101,7 +101,7 @@ namespace nvexec {
101101

102102
// Lazy algorithm customizations require a recursive tree transformation
103103
template <sender_expr Sender, class Env>
104-
requires _non_stream_sender<Sender> // no need to transform it a second time
104+
requires _non_stream_sender<Sender, Env> // no need to transform it a second time
105105
auto transform_sender(Sender&& sndr, const Env& env) const noexcept {
106106
return stdexec::apply_sender(
107107
(Sender&&) sndr,
@@ -113,7 +113,7 @@ namespace nvexec {
113113

114114
// reduce senders get a special transformation
115115
template <sender_expr_for<reduce_t> Sender, class Env>
116-
requires _non_stream_sender<Sender> // no need to transform it a second time
116+
requires _non_stream_sender<Sender, Env> // no need to transform it a second time
117117
auto transform_sender(Sender&& sndr, const Env& env) const noexcept {
118118
return stdexec::apply_sender(
119119
(Sender&&) sndr,

0 commit comments

Comments
 (0)