Skip to content

Commit eb06625

Browse files
committed
add prog multi-gpu and mpi-multi-gpu hip
1 parent 1dae777 commit eb06625

File tree

3 files changed

+206
-42
lines changed

3 files changed

+206
-42
lines changed

docs/modules/kokkos/examples/src/27_mpi_gpu_hip.cpp

Lines changed: 43 additions & 42 deletions
Original file line numberDiff line numberDiff line change
@@ -10,69 +10,70 @@
1010
int main(int argc, char *argv[]) {
1111
Kokkos::initialize(argc, argv);
1212
{
13-
14-
int provided;
13+
int provided;
1514
int initialized;
1615
MPI_Initialized(&initialized);
1716
if (!initialized) {
18-
MPI_Init_thread(nullptr, nullptr, MPI_THREAD_FUNNELED, &provided);
17+
MPI_Init_thread(nullptr, nullptr, MPI_THREAD_FUNNELED, &provided);
1918
}
20-
19+
2120
int rank, world_size;
2221
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
2322
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
2423

2524
try {
26-
Kokkos::InitializationSettings settings;
27-
settings.set_device_id(rank % Kokkos::HIP::detect_device_count());
28-
if (!Kokkos::is_initialized()) {
29-
//settings.set_num_threads(2); // if you want ... or more parameters
30-
Kokkos::initialize(settings);
31-
}
25+
Kokkos::InitializationSettings settings;
26+
settings.set_device_id(rank % Kokkos::HIP::detect_device_count());
27+
if (!Kokkos::is_initialized()) {
28+
// settings.set_num_threads(2); // if you want ... or more parameters
29+
Kokkos::initialize(settings);
30+
}
3231

33-
{
34-
int n = 10;
35-
Kokkos::View<double*, Kokkos::HIP::memory_space> data("data", n);
32+
{
33+
int n = 10;
34+
Kokkos::View<double *, Kokkos::HIP::memory_space> data("data", n);
3635

37-
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::HIP::execution_space>(0, n),
38-
KOKKOS_LAMBDA(const int i) {
39-
data(i) = rank * 1.0 + i;
40-
});
36+
Kokkos::parallel_for(
37+
Kokkos::RangePolicy<Kokkos::HIP::execution_space>(0, n),
38+
KOKKOS_LAMBDA(const int i) { data(i) = rank * 1.0 + i; });
4139

42-
Kokkos::fence();
43-
44-
double local_sum = 0.0;
45-
Kokkos::parallel_reduce(Kokkos::RangePolicy<Kokkos::HIP::execution_space>(0, n),
46-
KOKKOS_LAMBDA(const int i, double& sum) {
47-
sum += data(i);
48-
}, local_sum);
40+
Kokkos::fence();
4941

50-
Kokkos::fence();
42+
double local_sum = 0.0;
43+
Kokkos::parallel_reduce(
44+
Kokkos::RangePolicy<Kokkos::HIP::execution_space>(0, n),
45+
KOKKOS_LAMBDA(const int i, double &sum) { sum += data(i); },
46+
local_sum);
5147

52-
double global_sum;
53-
MPI_Allreduce(&local_sum, &global_sum, 1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
48+
Kokkos::fence();
5449

55-
std::cout <<"rank["<<rank<< "] Lobale Sum : " << local_sum << std::endl;
50+
double global_sum;
51+
MPI_Allreduce(&local_sum, &global_sum, 1, MPI_DOUBLE, MPI_SUM,
52+
MPI_COMM_WORLD);
5653

57-
if (rank == 0) {
58-
std::cout << "Globale sum : " << global_sum << std::endl;
59-
}
54+
std::cout << "rank[" << rank << "] Lobale Sum : " << local_sum
55+
<< std::endl;
56+
57+
if (rank == 0) {
58+
std::cout << "Globale sum : " << global_sum << std::endl;
6059
}
60+
}
6161

62-
//Kokkos::finalize();
62+
// Kokkos::finalize();
6363

64-
if (Kokkos::is_initialized()) {
65-
Kokkos::finalize();
66-
}
64+
if (Kokkos::is_initialized()) {
65+
Kokkos::finalize();
66+
}
67+
} catch (std::exception &e) {
68+
std::cerr << "Exception caught on rank " << rank << ": " << e.what()
69+
<< std::endl;
70+
MPI_Abort(MPI_COMM_WORLD, 1);
6771
}
68-
catch (std::exception& e) {
69-
std::cerr << "Exception caught on rank " << rank << ": " << e.what() << std::endl;
70-
MPI_Abort(MPI_COMM_WORLD, 1);
71-
}
72-
73-
MPI_Initialized(&initialized);
74-
if (initialized) { MPI_Finalize(); }
7572

73+
MPI_Initialized(&initialized);
74+
if (initialized) {
75+
MPI_Finalize();
76+
}
7677
}
7778
Kokkos::finalize();
7879
return 0;
Lines changed: 75 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,75 @@
1+
#include <cmath>
2+
#include <cstdio>
3+
#include <cstdlib>
4+
#include <cstring>
5+
#include <iostream>
6+
#include <limits>
7+
8+
#include <Kokkos_Core.hpp>
9+
10+
struct VectorAddFunctor {
11+
Kokkos::View<double *, Kokkos::HIPSpace> a;
12+
Kokkos::View<double *, Kokkos::HIPSpace> b;
13+
Kokkos::View<double *, Kokkos::HIPSpace> c;
14+
15+
VectorAddFunctor(Kokkos::View<double *, Kokkos::HIPSpace> a_,
16+
Kokkos::View<double *, Kokkos::HIPSpace> b_,
17+
Kokkos::View<double *, Kokkos::HIPSpace> c_)
18+
: a(a_), b(b_), c(c_) {}
19+
20+
KOKKOS_INLINE_FUNCTION
21+
void operator()(const int i) const { c(i) = a(i) + b(i); }
22+
};
23+
24+
int main(int argc, char *argv[]) {
25+
Kokkos::initialize(argc, argv);
26+
{
27+
28+
const int vector_size = 1000000;
29+
const int num_gpus = Kokkos::HIP::detect_device_count();
30+
31+
Kokkos::InitializationSettings settings;
32+
std::cout << "Number of GPUs available : " << num_gpus << std::endl;
33+
34+
Kokkos::Timer timer;
35+
36+
for (int gpu = 0; gpu < num_gpus; ++gpu) {
37+
38+
settings.set_device_id(gpu);
39+
Kokkos::HIP::impl_initialize(settings);
40+
Kokkos::fence();
41+
42+
Kokkos::View<double *, Kokkos::HIPSpace> a("a", vector_size);
43+
Kokkos::View<double *, Kokkos::HIPSpace> b("b", vector_size);
44+
Kokkos::View<double *, Kokkos::HIPSpace> c("c", vector_size);
45+
46+
Kokkos::parallel_for(
47+
Kokkos::RangePolicy<Kokkos::HIP>(0, vector_size),
48+
KOKKOS_LAMBDA(const int i) {
49+
a(i) = 1.0;
50+
b(i) = 2.0;
51+
});
52+
53+
Kokkos::parallel_for(Kokkos::RangePolicy<Kokkos::HIP>(0, vector_size),
54+
VectorAddFunctor(a, b, c));
55+
56+
Kokkos::View<double *>::HostMirror h_c = Kokkos::create_mirror_view(c);
57+
Kokkos::deep_copy(h_c, c);
58+
59+
bool correct = true;
60+
for (int i = 0; i < vector_size; ++i) {
61+
if (h_c(i) != 3.0) {
62+
correct = false;
63+
break;
64+
}
65+
}
66+
67+
std::cout << "Result on GPU " << gpu << " : "
68+
<< (correct ? "Correct" : "Incorrect") << std::endl;
69+
}
70+
double elapsed_time = timer.seconds();
71+
std::cout << "Elapsed time: " << elapsed_time << " seconds" << std::endl;
72+
}
73+
Kokkos::finalize();
74+
return 0;
75+
}
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#include <cmath>
2+
#include <cstdio>
3+
#include <cstdlib>
4+
#include <cstring>
5+
#include <iostream>
6+
#include <limits>
7+
8+
#include <Kokkos_Core.hpp>
9+
10+
int main(int argc, char *argv[]) {
11+
// Kokkos::initialize(argc, argv);
12+
{
13+
int provided;
14+
int initialized;
15+
MPI_Initialized(&initialized);
16+
if (!initialized) {
17+
MPI_Init_thread(nullptr, nullptr, MPI_THREAD_FUNNELED, &provided);
18+
}
19+
20+
int rank, world_size;
21+
MPI_Comm_rank(MPI_COMM_WORLD, &rank);
22+
MPI_Comm_size(MPI_COMM_WORLD, &world_size);
23+
24+
Kokkos::Timer timer;
25+
26+
try {
27+
Kokkos::InitializationSettings settings;
28+
int num_gpus = Kokkos::HIP::detect_device_count();
29+
int gpu_id = rank % num_gpus;
30+
settings.set_device_id(gpu_id);
31+
32+
std::cout << "rank : [" << rank << "] num gpu id : [" << gpu_id << "] "
33+
<< std::endl;
34+
35+
if (!Kokkos::is_initialized()) {
36+
Kokkos::initialize(settings);
37+
}
38+
39+
{
40+
int n = 10;
41+
Kokkos::View<double *, Kokkos::HIP::memory_space> data("data", n);
42+
43+
Kokkos::parallel_for(
44+
Kokkos::RangePolicy<Kokkos::HIP::execution_space>(0, n),
45+
KOKKOS_LAMBDA(const int i) { data(i) = rank * 1.0 + i; });
46+
47+
Kokkos::fence();
48+
49+
double local_sum = 0.0;
50+
Kokkos::parallel_reduce(
51+
Kokkos::RangePolicy<Kokkos::HIP::execution_space>(0, n),
52+
KOKKOS_LAMBDA(const int i, double &sum) { sum += data(i); },
53+
local_sum);
54+
55+
Kokkos::fence();
56+
57+
double global_sum;
58+
MPI_Allreduce(&local_sum, &global_sum, 1, MPI_DOUBLE, MPI_SUM,
59+
MPI_COMM_WORLD);
60+
61+
std::cout << "rank[" << rank << "] Locale Sum : " << local_sum
62+
<< std::endl;
63+
64+
if (rank == 0) {
65+
std::cout << "Globale Sum : " << global_sum << std::endl;
66+
double elapsed_time = timer.seconds();
67+
std::cout << "Elapsed time: " << elapsed_time << " seconds"
68+
<< std::endl;
69+
}
70+
}
71+
72+
if (Kokkos::is_initialized()) {
73+
Kokkos::finalize();
74+
}
75+
} catch (std::exception &e) {
76+
std::cerr << "Exception caught on rank " << rank << ": " << e.what()
77+
<< std::endl;
78+
MPI_Abort(MPI_COMM_WORLD, 1);
79+
}
80+
81+
MPI_Initialized(&initialized);
82+
if (initialized) {
83+
MPI_Finalize();
84+
}
85+
}
86+
// Kokkos::finalize();
87+
return 0;
88+
}

0 commit comments

Comments
 (0)