diff --git a/CMakeLists.txt b/CMakeLists.txt index daba3c4..364455c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -29,7 +29,6 @@ if (CUDA_FOUND) add_subdirectory(chapter08) add_subdirectory(chapter09) add_subdirectory(chapter10) - add_subdirectory(chapter11) else() message("CUDA not found!") endif() diff --git a/chapter11/CMakeLists.txt b/chapter11/CMakeLists.txt deleted file mode 100644 index 321f69b..0000000 --- a/chapter11/CMakeLists.txt +++ /dev/null @@ -1,11 +0,0 @@ -CMAKE_MINIMUM_REQUIRED(VERSION 3.1) - - -CUDA_ADD_EXECUTABLE(multidevice multidevice.cu) -SET_PROPERTY(TARGET multidevice PROPERTY FOLDER chapter11) - -CUDA_ADD_EXECUTABLE(portable portable.cu) -SET_PROPERTY(TARGET portable PROPERTY FOLDER chapter11) - -CUDA_ADD_EXECUTABLE(time_zero2 time_zero2.cu) -SET_PROPERTY(TARGET time_zero2 PROPERTY FOLDER chapter11) \ No newline at end of file diff --git a/chapter11/multidevice.cu b/chapter11/multidevice.cu deleted file mode 100644 index a6d2d68..0000000 --- a/chapter11/multidevice.cu +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright 1993-2010 NVIDIA Corporation. All rights reserved. - * - * NVIDIA Corporation and its licensors retain all intellectual property and - * proprietary rights in and to this software and related documentation. - * Any use, reproduction, disclosure, or distribution of this software - * and related documentation without an express license agreement from - * NVIDIA Corporation is strictly prohibited. - * - * Please refer to the applicable NVIDIA end user license agreement (EULA) - * associated with this source code for terms and conditions that govern - * your use of this NVIDIA software. - * - */ - - -#include "../common/book.h" - - -#define imin(a,b) (adeviceID ) ); - - int size = data->size; - float *a, *b, c, *partial_c; - float *dev_a, *dev_b, *dev_partial_c; - - // allocate memory on the CPU side - a = data->a; - b = data->b; - partial_c = (float*)malloc( blocksPerGrid*sizeof(float) ); - - // allocate the memory on the GPU - HANDLE_ERROR( cudaMalloc( (void**)&dev_a, - size*sizeof(float) ) ); - HANDLE_ERROR( cudaMalloc( (void**)&dev_b, - size*sizeof(float) ) ); - HANDLE_ERROR( cudaMalloc( (void**)&dev_partial_c, - blocksPerGrid*sizeof(float) ) ); - - // copy the arrays 'a' and 'b' to the GPU - HANDLE_ERROR( cudaMemcpy( dev_a, a, size*sizeof(float), - cudaMemcpyHostToDevice ) ); - HANDLE_ERROR( cudaMemcpy( dev_b, b, size*sizeof(float), - cudaMemcpyHostToDevice ) ); - - dot<<>>( size, dev_a, dev_b, - dev_partial_c ); - // copy the array 'c' back from the GPU to the CPU - HANDLE_ERROR( cudaMemcpy( partial_c, dev_partial_c, - blocksPerGrid*sizeof(float), - cudaMemcpyDeviceToHost ) ); - - // finish up on the CPU side - c = 0; - for (int i=0; ireturnValue = c; - return 0; -} - - -int main( void ) { - int deviceCount; - HANDLE_ERROR( cudaGetDeviceCount( &deviceCount ) ); - if (deviceCount < 2) { - printf( "We need at least two compute 1.0 or greater " - "devices, but only found %d\n", deviceCount ); - return 0; - } - - float *a = (float*)malloc( sizeof(float) * N ); - HANDLE_NULL( a ); - float *b = (float*)malloc( sizeof(float) * N ); - HANDLE_NULL( b ); - - // fill in the host memory with data - for (int i=0; ideviceID != 0) { - HANDLE_ERROR( cudaSetDevice( data->deviceID ) ); - HANDLE_ERROR( cudaSetDeviceFlags( cudaDeviceMapHost ) ); - } - - int size = data->size; - float *a, *b, c, *partial_c; - float *dev_a, *dev_b, *dev_partial_c; - - // allocate memory on the CPU side - a = data->a; - b = data->b; - partial_c = (float*)malloc( blocksPerGrid*sizeof(float) ); - - // allocate the memory on the GPU - HANDLE_ERROR( cudaHostGetDevicePointer( &dev_a, a, 0 ) ); - HANDLE_ERROR( cudaHostGetDevicePointer( &dev_b, b, 0 ) ); - HANDLE_ERROR( cudaMalloc( (void**)&dev_partial_c, - blocksPerGrid*sizeof(float) ) ); - - // offset 'a' and 'b' to where this GPU is gets it data - dev_a += data->offset; - dev_b += data->offset; - - dot<<>>( size, dev_a, dev_b, - dev_partial_c ); - // copy the array 'c' back from the GPU to the CPU - HANDLE_ERROR( cudaMemcpy( partial_c, dev_partial_c, - blocksPerGrid*sizeof(float), - cudaMemcpyDeviceToHost ) ); - - // finish up on the CPU side - c = 0; - for (int i=0; ireturnValue = c; - return 0; -} - - -int main( void ) { - int deviceCount; - HANDLE_ERROR( cudaGetDeviceCount( &deviceCount ) ); - if (deviceCount < 2) { - printf( "We need at least two compute 1.0 or greater " - "devices, but only found %d\n", deviceCount ); - return 0; - } - - cudaDeviceProp prop; - for (int i=0; i<2; i++) { - HANDLE_ERROR( cudaGetDeviceProperties( &prop, i ) ); - if (prop.canMapHostMemory != 1) { - printf( "Device %d can not map memory.\n", i ); - return 0; - } - } - - float *a, *b; - HANDLE_ERROR( cudaSetDevice( 0 ) ); - HANDLE_ERROR( cudaSetDeviceFlags( cudaDeviceMapHost ) ); - HANDLE_ERROR( cudaHostAlloc( (void**)&a, N*sizeof(float), - cudaHostAllocWriteCombined | - cudaHostAllocPortable | - cudaHostAllocMapped ) ); - HANDLE_ERROR( cudaHostAlloc( (void**)&b, N*sizeof(float), - cudaHostAllocWriteCombined | - cudaHostAllocPortable | - cudaHostAllocMapped ) ); - - // fill in the host memory with data - for (int i=0; i>>( size, dev_a, dev_b, - dev_partial_c ); - // copy the array 'c' back from the GPU to the CPU - HANDLE_ERROR( cudaMemcpy( partial_c, dev_partial_c, - blocksPerGrid*sizeof(float), - cudaMemcpyDeviceToHost ) ); - - HANDLE_ERROR( cudaEventRecord( stop, 0 ) ); - HANDLE_ERROR( cudaEventSynchronize( stop ) ); - HANDLE_ERROR( cudaEventElapsedTime( &elapsedTime, - start, stop ) ); - - // finish up on the CPU side - c = 0; - for (int i=0; i>>( size, dev_a, dev_b, - dev_partial_c ); - - HANDLE_ERROR( cudaThreadSynchronize() ); - HANDLE_ERROR( cudaEventRecord( stop, 0 ) ); - HANDLE_ERROR( cudaEventSynchronize( stop ) ); - HANDLE_ERROR( cudaEventElapsedTime( &elapsedTime, - start, stop ) ); - - // finish up on the CPU side - c = 0; - for (int i=0; i