Skip to content

Commit 3f99cf7

Browse files
committed
add code
1 parent e20cec6 commit 3f99cf7

File tree

6 files changed

+217
-64
lines changed

6 files changed

+217
-64
lines changed

.vscode/settings.json

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,13 @@
22
"C_Cpp.default.configurationProvider": "vector-of-bool.cmake-tools",
33
"cmake.buildDirectory": "${workspaceFolder}/build-${variant:buildType}",
44
"files.associations": {
5+
"*.json": "jsonc",
6+
"*.dat": "csv (whitespace)",
7+
"*.pgf": "tex",
8+
"*.pdf_tex": "tex",
9+
".py.in": "Python",
10+
"*.tikz": "tex",
11+
"*.slurm": "shellscript",
512
"cctype": "cpp",
613
"clocale": "cpp",
714
"cmath": "cpp",
@@ -94,6 +101,8 @@
94101
"variant": "cpp",
95102
"expected": "cpp",
96103
"spanstream": "cpp",
97-
"stacktrace": "cpp"
104+
"stacktrace": "cpp",
105+
"format": "cpp",
106+
"__nullptr": "cpp"
98107
}
99108
}
Lines changed: 106 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,106 @@
1+
#include <cmath>
2+
#include <cstdio>
3+
#include <cstdlib>
4+
#include <cstring>
5+
#include <iostream>
6+
#include <limits>
7+
8+
#include <Kokkos_Core.hpp>
9+
10+
11+
struct HierarchicalParallelism {
12+
Kokkos::View<double**> matrix;
13+
HierarchicalParallelism(int N, int M) : matrix("matrix", N, M) {}
14+
15+
KOKKOS_INLINE_FUNCTION
16+
void operator()(const Kokkos::TeamPolicy<>::member_type& team_member) const {
17+
const int i = team_member.league_rank();
18+
Kokkos::parallel_for(Kokkos::TeamThreadRange(team_member, matrix.extent(1)),
19+
[&] (const int j) {
20+
matrix(i, j) = i * matrix.extent(1) + j;
21+
});
22+
23+
team_member.team_barrier();
24+
if (team_member.team_rank() == 0) {
25+
double sum = 0.0;
26+
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team_member, matrix.extent(1)),
27+
[&] (const int j, double& lsum) {
28+
lsum += matrix(i, j);
29+
}, sum);
30+
31+
Kokkos::single(Kokkos::PerTeam(team_member), [&] () {
32+
matrix(i, 0) = sum;
33+
// std::cout << "Sum of row " << i << " is " << sum << std::endl;
34+
});
35+
}
36+
}
37+
};
38+
39+
40+
int main(int argc, char* argv[]) {
41+
Kokkos::initialize(argc, argv);
42+
{
43+
const int N = 1000;
44+
const int M = 100;
45+
HierarchicalParallelism functor(N, M);
46+
Kokkos::parallel_for(Kokkos::TeamPolicy<>(N, Kokkos::AUTO), functor);
47+
}
48+
Kokkos::finalize();
49+
return 0;
50+
}
51+
52+
53+
// int main(int argc, char *argv[]) {
54+
// Kokkos::initialize(argc, argv);
55+
// {
56+
// Kokkos::Timer timer;
57+
// const int N = 1000000;
58+
// const int TEAM_SIZE = 16;
59+
// const int VECTOR_SIZE = 4;
60+
61+
// Kokkos::View<double *> data("Data", N);
62+
63+
// // Data initialization
64+
// Kokkos::parallel_for(
65+
// "Init", N, KOKKOS_LAMBDA(const int i) { data(i) = i * 0.01; });
66+
67+
// double sum = 0.0;
68+
69+
// // Hierarchical parallelism
70+
// Kokkos::parallel_reduce(
71+
// "HierarchicalSum",
72+
// Kokkos::TeamPolicy<>(N / (TEAM_SIZE * VECTOR_SIZE), TEAM_SIZE,
73+
// VECTOR_SIZE),
74+
// KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type &team_member,
75+
// double &team_sum) {
76+
// const int team_rank = team_member.team_rank();
77+
// const int team_size = team_member.team_size();
78+
// const int league_rank = team_member.league_rank();
79+
80+
// double thread_sum = 0.0;
81+
82+
// Kokkos::parallel_reduce(
83+
// Kokkos::ThreadVectorRange(team_member, VECTOR_SIZE),
84+
// [&](const int vector_rank, double &vector_sum) {
85+
// const int i =
86+
// (league_rank * team_size + team_rank) * VECTOR_SIZE +
87+
// vector_rank;
88+
// if (i < N) {
89+
// vector_sum += data(i);
90+
// }
91+
// },
92+
// thread_sum);
93+
94+
// Kokkos::single(Kokkos::PerThread(team_member),
95+
// [&]() { Kokkos::atomic_add(&team_sum, thread_sum); });
96+
// },
97+
// sum);
98+
99+
// std::cout << "Total Sum : " << sum << std::endl;
100+
// std::cout << "Average : " << sum / N << std::endl;
101+
// double elapsed_time = timer.seconds();
102+
// std::cout << "Elapsed time: " << elapsed_time << " seconds" << std::endl;
103+
// }
104+
// Kokkos::finalize();
105+
// return 0;
106+
// }
Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,56 @@
1+
#include <cmath>
2+
#include <cstdio>
3+
#include <cstdlib>
4+
#include <cstring>
5+
#include <iostream>
6+
#include <limits>
7+
8+
#include <Kokkos_Core.hpp>
9+
10+
struct ScratchMemoryExample {
11+
Kokkos::View<double*> data;
12+
ScratchMemoryExample(int N) : data("data", N) {}
13+
14+
KOKKOS_INLINE_FUNCTION
15+
void operator()(const Kokkos::TeamPolicy<>::member_type& team_member) const {
16+
const int team_size = team_member.team_size();
17+
const int team_rank = team_member.team_rank();
18+
const int league_rank = team_member.league_rank();
19+
20+
// Allocate team scratch memory
21+
double* team_scratch = (double*)team_member.team_shmem().get_shmem(team_size * sizeof(double));
22+
23+
// Each thread initializes its scratch memory
24+
team_scratch[team_rank] = league_rank * team_size + team_rank;
25+
26+
// Synchronize to ensure all threads have written to scratch memory
27+
team_member.team_barrier();
28+
29+
// Perform a reduction within the team
30+
double team_sum = 0.0;
31+
Kokkos::parallel_reduce(Kokkos::TeamThreadRange(team_member, team_size), [&](const int i, double& lsum) {
32+
lsum += team_scratch[i];
33+
}, team_sum);
34+
35+
// Only one thread writes the result back to global memory
36+
if (team_rank == 0) {
37+
data(league_rank) = team_sum;
38+
}
39+
}
40+
41+
// Specify the amount of scratch memory needed
42+
size_t team_shmem_size(int team_size) const {
43+
return team_size * sizeof(double);
44+
}
45+
};
46+
47+
int main(int argc, char* argv[]) {
48+
Kokkos::initialize(argc, argv);
49+
{
50+
const int N = 1000;
51+
ScratchMemoryExample functor(N);
52+
Kokkos::parallel_for(Kokkos::TeamPolicy<>(N / 10, Kokkos::AUTO).set_scratch_size(0, Kokkos::PerTeam(functor.team_shmem_size(10))), functor);
53+
}
54+
Kokkos::finalize();
55+
return 0;
56+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
#include <cmath>
2+
#include <cstdio>
3+
#include <cstdlib>
4+
#include <cstring>
5+
#include <iostream>
6+
#include <limits>
7+
8+
#include <Kokkos_Core.hpp>
9+
10+
int main(int argc, char *argv[]) {
11+
Kokkos::initialize(argc, argv);
12+
{
13+
// Size of the array
14+
const int N = 100;
15+
// Kokkos view to store the results
16+
Kokkos::View<int*> results("results", N);
17+
// Create a UniqueToken (based on thread execution)
18+
Kokkos::Experimental::UniqueToken<Kokkos::DefaultExecutionSpace> unique_token;
19+
// Number of available threads
20+
const int num_threads = unique_token.size();
21+
std::cout << "Number of threads: " << num_threads << std::endl;
22+
Kokkos::parallel_for("UniqueTokenExample", N, KOKKOS_LAMBDA(const int i) {
23+
// Get a unique identifier for this thread
24+
int token = unique_token.acquire();
25+
results(i) = i;
26+
unique_token.release(token);
27+
});
28+
// Copy the results to the host for display
29+
auto host_results = Kokkos::create_mirror_view_and_copy(Kokkos::HostSpace(), results);
30+
std::cout << "Results: ";
31+
for (int i = 0; i < N; ++i) {
32+
std::cout << host_results(i) << " ";
33+
}
34+
std::cout << std::endl;
35+
}
36+
Kokkos::finalize();
37+
}

docs/modules/kokkos/examples/src/22_hierarchical_parallelism.cpp

Lines changed: 0 additions & 63 deletions
This file was deleted.

docs/modules/kokkos/examples/src/CMakeLists.txt

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -43,6 +43,10 @@ if (FEELPP_ENABLE_KOKKOS)
4343
#add_executable(16_kokkos_simd 16_kokkos_simd.cpp)
4444
#add_executable(17_kokkos_polynom_jacobi_1 17_kokkos_polynom_jacobi_1.cpp)
4545

46+
add_executable(22_hierarchical_parallelism 22_1_hierarchical_parallelism.cpp)
47+
add_executable(22_scratch_memory 22_2_scratch_memory.cpp)
48+
add_executable(22_token 22_3_token.cpp)
49+
4650
#feelpp_add_application(kokkos SRCS feelpp_kokkos.cpp)
4751

4852
# Link Kokkos to the executable
@@ -62,5 +66,9 @@ if (FEELPP_ENABLE_KOKKOS)
6266
#target_link_libraries(16_kokkos_simd Kokkos::kokkos)
6367
#target_link_libraries(17_kokkos_polynom_jacobi_1 Kokkos::kokkos);
6468

69+
target_link_libraries(22_hierarchical_parallelism Kokkos::kokkos)
70+
target_link_libraries(22_scratch_memory Kokkos::kokkos)
71+
target_link_libraries(22_token Kokkos::kokkos)
72+
6573

6674
endif()

0 commit comments

Comments
 (0)