diff --git a/.github/workflows/github-pr-unix.yaml b/.github/workflows/github-pr-unix.yaml index d33b9d9d..6734d1b7 100644 --- a/.github/workflows/github-pr-unix.yaml +++ b/.github/workflows/github-pr-unix.yaml @@ -59,6 +59,8 @@ jobs: -DCMAKE_INSTALL_PREFIX="${GITHUB_WORKSPACE}"/install-kokkos -DCMAKE_CXX_COMPILER=${{ matrix.cpp_compiler }} -DCMAKE_BUILD_TYPE=${{ matrix.build_type }} + -DKokkos_ENABLE_${{ matrix.backend }}=ON + -DKokkos_ENABLE_SERIAL=ON -DKokkos_ENABLE_COMPILER_WARNINGS=ON - name: Build & Install Kokkos @@ -76,11 +78,18 @@ jobs: run: cmake --build "${GITHUB_WORKSPACE}"/build-kokkos-kernels --config ${{ matrix.build_type }} --parallel 2 --target install - name: Configure and Build Exercises + run: > + bash "${GITHUB_WORKSPACE}"/kokkos-tutorials/Scripts/ci-configure-build-test.sh + "${GITHUB_WORKSPACE}"/install-kokkos/lib/cmake/Kokkos + "${GITHUB_WORKSPACE}"/install-kokkos-kernels + "${GITHUB_WORKSPACE}"/kokkos-tutorials + ${{ matrix.cpp_compiler}} + ${{ matrix.build_type}} + ${{ matrix.backend }} + + - name: Run Solutions run: | - bash "${GITHUB_WORKSPACE}"/kokkos-tutorials/Scripts/ci-configure-build-test.sh \ - "${GITHUB_WORKSPACE}"/install-kokkos/lib/cmake/Kokkos \ - "${GITHUB_WORKSPACE}"/install-kokkos-kernels \ + bash "${GITHUB_WORKSPACE}"/kokkos-tutorials/Scripts/ci-run-solutions.sh \ "${GITHUB_WORKSPACE}"/kokkos-tutorials \ - ${{ matrix.cpp_compiler}} \ - ${{ matrix.build_type}} \ ${{ matrix.backend }} + diff --git a/Exercises/04/Solution/exercise_4_solution.cpp b/Exercises/04/Solution/exercise_4_solution.cpp index 6601fb77..317f9253 100644 --- a/Exercises/04/Solution/exercise_4_solution.cpp +++ b/Exercises/04/Solution/exercise_4_solution.cpp @@ -66,21 +66,8 @@ int main( int argc, char* argv[] ) Kokkos::initialize( argc, argv ); { - #ifdef KOKKOS_ENABLE_CUDA - #define MemSpace Kokkos::CudaSpace - #endif - #ifdef KOKKOS_ENABLE_HIP - #define MemSpace Kokkos::Experimental::HIPSpace - #endif - #ifdef KOKKOS_ENABLE_OPENMPTARGET - #define MemSpace Kokkos::OpenMPTargetSpace - #endif - - #ifndef MemSpace - #define MemSpace Kokkos::HostSpace - #endif - - using ExecSpace = MemSpace::execution_space; + using ExecSpace = Kokkos::DefaultExecutionSpace; + using MemSpace = ExecSpace::memory_space; using range_policy = Kokkos::RangePolicy; // Allocate y, x vectors and Matrix A on device. diff --git a/Exercises/random_number/Begin/MC_DartSampler.cpp b/Exercises/random_number/Begin/MC_DartSampler.cpp index b9696734..c9a1d5b6 100644 --- a/Exercises/random_number/Begin/MC_DartSampler.cpp +++ b/Exercises/random_number/Begin/MC_DartSampler.cpp @@ -155,22 +155,28 @@ struct GenRandom { // 1) cycle on the sample size and compare pi vs sample size. // 2) integer bit-size variation (64 vs 1024). +void checkSizes(int& N, int& dart_groups); int main(int argc, char* args[]) { - if ( argc < 2 ) { - printf("RNG Example: Need at least one argument (number darts) to run; second optional argument for serial_iterations\n"); - return (-1); + int N = -1; // Number of darts, 2^N + int dart_groups = -1; // Number of darts to draw per thread + + if ( argc > 1 ) { + N = std::atoi(args[1]); + printf("User N is %d\n", N); + } + if ( argc > 2 ) { + dart_groups = std::atoi(args[2]); + printf("User dart_groups is %d\n", dart_groups); } + checkSizes(N, dart_groups); + Kokkos::initialize(argc,args); { - const double rad = 1.0; // target radius (also box size) - const long N = atoi(args[1]); // exponent used to create number of darts, 2^N - - const long dart_groups = argc > 2 ? atoi(args[2]) : 1 ; - - const long darts = std::pow(2,N); // number of dart throws + const double rad = 1.0; // target radius (also box size) + const long darts = std::pow(2,N); // number of dart throws const double pi = 3.14159265358979323846 ; printf( "Reference Value for pi: %lf\n",pi); @@ -194,3 +200,17 @@ int main(int argc, char* args[]) { return 0; } +void checkSizes(int& N, int& dart_groups) +{ + if ( N == -1 && dart_groups == -1 ) { + printf("RNG Example Options:\n"); + printf(" : Number of darts 2^N (default: 2^22)\n"); + printf(" : Number of darts to draw per thread (default: 1)\n"); + } + if ( N == -1 ) { + N = 22; + } + if ( dart_groups == -1 ) { + dart_groups = 1; + } +} diff --git a/Exercises/random_number/Solution/MC_DartSampler.cpp b/Exercises/random_number/Solution/MC_DartSampler.cpp index e107f550..f8ed0ed5 100644 --- a/Exercises/random_number/Solution/MC_DartSampler.cpp +++ b/Exercises/random_number/Solution/MC_DartSampler.cpp @@ -143,22 +143,28 @@ struct GenRandom { // 1) cycle on the sample size and compare pi vs sample size. // 2) integer bit-size variation (64 vs 1024). +void checkSizes(int& N, int& dart_groups); int main(int argc, char* args[]) { - if ( argc < 2 ) { - printf("RNG Example: Need at least one argument (number darts) to run; second optional argument for serial_iterations\n"); - return (-1); + int N = -1; // Number of darts, 2^N + int dart_groups = -1; // Number of darts to draw per thread + + if ( argc > 1 ) { + N = std::atoi(args[1]); + printf("User N is %d\n", N); + } + if ( argc > 2 ) { + dart_groups = std::atoi(args[2]); + printf("User dart_groups is %d\n", dart_groups); } + checkSizes(N, dart_groups); + Kokkos::initialize(argc,args); { - const double rad = 1.0; // target radius (also box size) - const long N = atoi(args[1]); // exponent used to create number of darts, 2^N - - const long dart_groups = argc > 2 ? atoi(args[2]) : 1 ; - - const long darts = std::pow(2,N); // number of dart throws + const double rad = 1.0; // target radius (also box size) + const long darts = std::pow(2,N); // number of dart throws const double pi = 3.14159265358979323846 ; printf( "Reference Value for pi: %lf\n",pi); @@ -176,8 +182,22 @@ int main(int argc, char* args[]) { printf( "darts = %ld hits = %ld pi est = %lf\n", darts, circHits, 4.0*double(circHits)/double(darts) ); } - Kokkos::finalize(); - - return 0; + Kokkos::finalize(); + + return 0; } +void checkSizes(int& N, int& dart_groups) +{ + if ( N == -1 && dart_groups == -1 ) { + printf("RNG Example Options:\n"); + printf(" : Number of darts 2^N (default: 2^22)\n"); + printf(" : Number of darts to draw per thread (default: 1)\n"); + } + if ( N == -1 ) { + N = 22; + } + if ( dart_groups == -1 ) { + dart_groups = 1; + } +} diff --git a/Exercises/simd_warp/Begin/CMakeLists.txt b/Exercises/simd_warp/Begin/CMakeLists.txt deleted file mode 100644 index 83dcecbe..00000000 --- a/Exercises/simd_warp/Begin/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -cmake_minimum_required(VERSION 3.16) -project(KokkosTutorialSIMDWarp) -include(../../common.cmake) - -add_executable(SIMDWarp simd_warp_begin.cpp) -target_link_libraries(SIMDWarp Kokkos::kokkos) diff --git a/Exercises/simd_warp/Begin/simd_warp_begin.cpp b/Exercises/simd_warp/Begin/simd_warp_begin.cpp deleted file mode 100644 index 757553f8..00000000 --- a/Exercises/simd_warp/Begin/simd_warp_begin.cpp +++ /dev/null @@ -1,122 +0,0 @@ -#include -//EXERCISE: include the right header (later Kokkos will include this) -//#include - -void test_simd(int N_in, int M, int R, double a) { - - //EXERCISE: get the right type here for CUDA/Non-Cuda - //#ifdef KOKKOS_ENABLE_CUDA - //using simd_t = ...; - //#else - //using simd_t = ...; - //#endif - //using simd_storage_t = ...; - - //EXERCISE: What will the N now be? - int N = N_in; - - //EXERCISE: create SIMD Views instead - Kokkos::View data("D",N,M); - Kokkos::View results("R",N); - - // EXERCISE: create correctly a scalar view of results and data - // For the final reduction we gonna need a scalar view of the data for now - // Relying on knowing the data layout, we will add SIMD Layouts later - // so that simple copy construction/assgnment would work - Kokkos::View data_scalar(data); - Kokkos::View results_scalar(results); - - // Lets fill the data deep_copy into scalar types doesn't work correctly for cuda_warp right now - Kokkos::parallel_for("init",data_scalar.extent(0), KOKKOS_LAMBDA(const int i) { - for (int j=0; j data("D",N,M); - Kokkos::View results("R",N); - - // Lets fill the input data - Kokkos::parallel_for("init",data.extent(0), KOKKOS_LAMBDA(const int i) { - for (int j=0; j(data.extent(0)/V,1,V), - KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) { - double b = a; - const int i = team.league_rank()*V; - for(int j=0; j1?atoi(argv[1]):320000; - int M = argc>2?atoi(argv[2]):3; - int R = argc>3?atoi(argv[3]):10; - double scal = argc>4?atof(argv[4]):1.5; - - if(N%32) { - printf("Please choose an N dividable by 32\n"); - return 0; - } - - test_team_vector(N,M,R,scal); - test_simd(N,M,R,scal); - - Kokkos::finalize(); -} diff --git a/Exercises/simd_warp/Solution/CMakeLists.txt b/Exercises/simd_warp/Solution/CMakeLists.txt deleted file mode 100644 index 80cc6654..00000000 --- a/Exercises/simd_warp/Solution/CMakeLists.txt +++ /dev/null @@ -1,6 +0,0 @@ -cmake_minimum_required(VERSION 3.16) -project(KokkosTutorialSIMDWarp) -include(../../common.cmake) - -add_executable(SIMDWarp simd_warp_solution.cpp) -target_link_libraries(SIMDWarp Kokkos::kokkos) diff --git a/Exercises/simd_warp/Solution/simd_warp_solution.cpp b/Exercises/simd_warp/Solution/simd_warp_solution.cpp deleted file mode 100644 index 6755fd27..00000000 --- a/Exercises/simd_warp/Solution/simd_warp_solution.cpp +++ /dev/null @@ -1,116 +0,0 @@ -#include -#include - -void test_simd(int N_in, int M, int R, double a) { - -#ifdef KOKKOS_ENABLE_CUDA - using simd_t = simd::simd>; -#else - using simd_t = simd::simd; -#endif - using simd_storage_t = simd_t::storage_type; - - int N = N_in/simd_t::size(); - - Kokkos::View data("D",N,M); - Kokkos::View results("R",N); - - // For the final reduction we gonna need a scalar view of the data for now - // Relying on knowing the data layout, we will add SIMD Layouts later - // so that simple copy construction/assignment would work - Kokkos::View data_scalar((double*)data.data(),N_in,M); - Kokkos::View results_scalar((double*)results.data(),N_in); - - // Lets fill the data - Kokkos::parallel_for("init",data_scalar.extent(0), KOKKOS_LAMBDA(const int i) { - for (int j=0; j(data.extent(0),1,simd_t::size()), - KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) { - simd_t tmp = 0.0; - double b = a; - const int i = team.league_rank(); - for(int j=0; j data("D",N,M); - Kokkos::View results("R",N); - - // Lets fill the input data - Kokkos::parallel_for("init",data.extent(0), KOKKOS_LAMBDA(const int i) { - for (int j=0; j(data.extent(0)/V,1,V), - KOKKOS_LAMBDA(const Kokkos::TeamPolicy<>::member_type& team) { - double b = a; - const int i = team.league_rank()*V; - for(int j=0; j1?atoi(argv[1]):320000; - int M = argc>2?atoi(argv[2]):3; - int R = argc>3?atoi(argv[3]):10; - double scal = argc>4?atof(argv[4]):1.5; - - if(N%32) { - printf("Please choose an N dividable by 32\n"); - return 0; - } - - test_team_vector(N,M,R,scal); - test_simd(N,M,R,scal); - - Kokkos::finalize(); -} diff --git a/Exercises/subview/Begin/exercise_subview_begin.cpp b/Exercises/subview/Begin/exercise_subview_begin.cpp index e656c0f9..d00af802 100644 --- a/Exercises/subview/Begin/exercise_subview_begin.cpp +++ b/Exercises/subview/Begin/exercise_subview_begin.cpp @@ -71,14 +71,16 @@ int main( int argc, char* argv[] ) Kokkos::initialize( argc, argv ); { - // using ExecSpace = Kokkos::Serial; + using ExecSpace = Kokkos::Serial; // using ExecSpace = Kokkos::Threads; // using ExecSpace = Kokkos::OpenMP; - using ExecSpace = Kokkos::Cuda; + // using ExecSpace = Kokkos::Cuda; + // using ExecSpace = Kokkos::HIP; - // using MemSpace = Kokkos::HostSpace; + using MemSpace = Kokkos::HostSpace; // using MemSpace = Kokkos::OpenMP; - using MemSpace = Kokkos::CudaSpace; + // using MemSpace = Kokkos::CudaSpace; + // using MemSpace = Kokkos::HIPSpace; // using MemSpace = Kokkos::CudaUVMSpace; using Layout = Kokkos::LayoutLeft; @@ -154,8 +156,6 @@ int main( int argc, char* argv[] ) } } - - // Calculate time. double time = timer.seconds(); diff --git a/Exercises/subview/Solution/exercise_subview_solution.cpp b/Exercises/subview/Solution/exercise_subview_solution.cpp index 981dcba9..a4f031b1 100644 --- a/Exercises/subview/Solution/exercise_subview_solution.cpp +++ b/Exercises/subview/Solution/exercise_subview_solution.cpp @@ -65,17 +65,8 @@ int main( int argc, char* argv[] ) Kokkos::initialize( argc, argv ); { - // using ExecSpace = Kokkos::Serial; - // using ExecSpace = Kokkos::Threads; - using ExecSpace = Kokkos::OpenMP; - // using ExecSpace = Kokkos::Cuda; - // using ExecSpace = Kokkos::HIP; - - // using MemSpace = Kokkos::HostSpace; - using MemSpace = Kokkos::OpenMP; - // using MemSpace = Kokkos::CudaSpace; - // using MemSpace = Kokkos::CudaUVMSpace; - // using MemSpace = Kokkos::HIPSpace; + using ExecSpace = Kokkos::DefaultExecutionSpace; + using MemSpace = ExecSpace::memory_space; // using Layout = Kokkos::LayoutLeft; using Layout = Kokkos::LayoutRight; @@ -147,7 +138,6 @@ int main( int argc, char* argv[] ) } } - // Calculate time. double time = timer.seconds(); diff --git a/Exercises/unique_token/Begin/unique_token.cpp b/Exercises/unique_token/Begin/unique_token.cpp index 0af6becb..36ce9b9e 100644 --- a/Exercises/unique_token/Begin/unique_token.cpp +++ b/Exercises/unique_token/Begin/unique_token.cpp @@ -1,4 +1,4 @@ -#include +#include // EXERCISE: need to remove the ifdef... #ifdef KOKKOS_ENABLE_OPENMP @@ -89,7 +89,7 @@ int main(int argc, char* argv[]) { Kokkos::deep_copy(values,values_h); double time_dup = scatter_add_loop(values,results,D); - std::cout << "Time Duplicated: " << N << " " << M << " " << time_dup << std::endl; + printf("Time Duplicated: %d %d %lf\n",N,M,time_dup); } Kokkos::finalize(); diff --git a/Exercises/unique_token/Solution/unique_token.cpp b/Exercises/unique_token/Solution/unique_token.cpp index b13db287..5fba229a 100644 --- a/Exercises/unique_token/Solution/unique_token.cpp +++ b/Exercises/unique_token/Solution/unique_token.cpp @@ -1,4 +1,4 @@ -#include +#include using atomic_2d_view = Kokkos::View >; @@ -87,7 +87,7 @@ int main(int argc, char* argv[]) { Kokkos::deep_copy(values,values_h); double time_dup = scatter_add_loop(values,results,D); - std::cout << "Time Duplicated: " << N << " " << M << " " << time_dup << std::endl; + printf("Time Duplicated: %d %d %d %lf\n",N,M,D,time_dup); } Kokkos::finalize(); diff --git a/Scripts/ci-configure-build-test.bat b/Scripts/ci-configure-build-test.bat index 1e0e22b4..d8cccbb0 100644 --- a/Scripts/ci-configure-build-test.bat +++ b/Scripts/ci-configure-build-test.bat @@ -12,7 +12,23 @@ set cpp_compiler=%~3 set build_type=%~4 set backend=%~5 -set "EXERCISES=01 02 03" +set EXERCISES=01 02 03 +set EXERCISES=%EXERCISES% dualview +set EXERCISES=%EXERCISES% kokkoskernels/BlockJacobi +set EXERCISES=%EXERCISES% kokkoskernels/GaussSeidel +set EXERCISES=%EXERCISES% kokkoskernels/GraphColoring +set EXERCISES=%EXERCISES% kokkoskernels/InnerProduct +set EXERCISES=%EXERCISES% mdrange +set EXERCISES=%EXERCISES% parallel_scan +set EXERCISES=%EXERCISES% random_number +set EXERCISES=%EXERCISES% scatter_view +set EXERCISES=%EXERCISES% simd +set EXERCISES=%EXERCISES% subview +set EXERCISES=%EXERCISES% team_policy +set EXERCISES=%EXERCISES% team_scratch_memory +set EXERCISES=%EXERCISES% team_vector_loop +set EXERCISES=%EXERCISES% unordered_map + if "%backend%"=="CUDA" set "EXERCISES=%EXERCISES% 04" set Kokkos_ROOT=%kokkos_root% diff --git a/Scripts/ci-configure-build-test.sh b/Scripts/ci-configure-build-test.sh index ce3f4bab..1e5fdb48 100644 --- a/Scripts/ci-configure-build-test.sh +++ b/Scripts/ci-configure-build-test.sh @@ -13,9 +13,6 @@ backend="$6" # TODO: advanced_reductions seems broken # TODO: hpcbind does not use cmake # TODO: instances does not use cmake -# TODO: parallel_scan seems broken -# TODO: simd_warp seems broken -# TODO: subview seems broken # TODO: vectorshift needs Kokkos Remote Spaces # TODO: kokkoskernels/CGSolve_SpILUKprecond needs to know where Kokkos Kernels source directory is # TODO: kokkoskernels/SpILUK needs to know where Kokkos Kernels source directory is @@ -32,9 +29,11 @@ kokkoskernels/GraphColoring kokkoskernels/InnerProduct mdrange mpi_pack_unpack +parallel_scan random_number scatter_view simd +subview team_policy team_scratch_memory team_vector_loop diff --git a/Scripts/ci-run-solutions.sh b/Scripts/ci-run-solutions.sh new file mode 100644 index 00000000..109fe0f6 --- /dev/null +++ b/Scripts/ci-run-solutions.sh @@ -0,0 +1,69 @@ +#!/bin/bash + +set -eou pipefail + +tutorials_src="$1" +backend="$2" + +# These are exercises with executables that can be run in the Solution subdirectory +# TODO: advanced_reductions seems broken +# TODO: hpcbind does not use cmake +# TODO: instances does not use cmake +# TODO: vectorshift needs Kokkos Remote Spaces +# TODO: kokkoskernels/CGSolve_SpILUKprecond needs to know where Kokkos Kernels source directory is +# TODO: kokkoskernels/SpILUK needs to know where Kokkos Kernels source directory is +# TODO: kokkoskernels/TeamGemm seems broken +# TODO: mpi_heat_conduction/no-mpi does not use cmake +# TODO: mpi_pack_unpack need to be run with MPI +SOLUTION_EXERCISES=( +01 +02 +03 +dualview +kokkoskernels/BlockJacobi +kokkoskernels/GaussSeidel +kokkoskernels/GraphColoring +kokkoskernels/InnerProduct +mdrange +parallel_scan +random_number +scatter_view +simd +subview +team_policy +team_scratch_memory +team_vector_loop +unordered_map +) + +if [ "$backend" == CUDA ]; then + SOLUTION_EXERCISES+=(04) + SOLUTION_EXERCISES+=(multi_gpu_cuda) +fi + +if [ ! "$backend" == CUDA ]; then + SOLUTION_EXERCISES+=(virtualfunction) +fi + +if [ "$backend" == OPENMP ]; then + SOLUTION_EXERCISES+=(unique_token) + export OMP_PROC_BIND=spread + export OMP_PLACES=threads +fi + +if [[ ! "$OSTYPE" == "darwin"* ]]; then + SOLUTION_EXERCISES+=(fortran-kokkosinterface) +fi + +for e in "${SOLUTION_EXERCISES[@]}"; do +solution_dir="build/Exercises/$e/Solution" + if [ -d "$solution_dir" ]; then + # Executable doesen't follow a naming convention + for executable in "$solution_dir"/*; do + if [ -x "$executable" ] && [ ! -d "$executable" ]; then + echo "Running $executable" + "$executable" + fi + done + fi +done