CIS565-Fall-2015 · shrekshao · Sep 2, 2015 · Sep 1, 2015 · Sep 2, 2015 · Sep 2, 2015
diff --git a/Project1-Part1/CMakeLists.txt b/Project1-Part1/CMakeLists.txt
@@ -40,6 +40,8 @@ set(CORELIBS
 # Enable C++11 for host code
 set(CMAKE_CXX_STANDARD 11)
 
+list(APPEND CUDA_NVCC_FLAGS -G -g)
+
 # OSX-specific hacks/fixes
 if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
     list(APPEND CORELIBS "-framework IOKit")

diff --git a/Project1-Part1/shaders/planet.frag.glsl b/Project1-Part1/shaders/planet.frag.glsl
@@ -2,6 +2,10 @@
 
 out vec4 fragColor;
 
-void main() {
-    fragColor = vec4(1.0);
+float rand(vec2 co){
+    return fract(sin(dot(co.xy ,vec2(12.9898,78.233))) * 43758.5453);
 }
+
+void main() {
+    fragColor = vec4( rand(gl_FragCoord.yz) ,rand(gl_FragCoord.zx),rand(gl_FragCoord.xy),1.0);
+}
diff --git a/Project1-Part1/src/kernel.cu b/Project1-Part1/src/kernel.cu
@@ -22,6 +22,22 @@ void checkCUDAError(const char *msg, int line = -1) {
     }
 }
 
+/****************************
+* Cuda performance rating *
+****************************/
+#define PERFORMANCE_RATE
+
+#ifdef PERFORMANCE_RATE
+
+cudaEvent_t beginEvent;
+cudaEvent_t endEvent;
+const int update_times_start = 100;
+const int update_times_end = 200;
+int update_counts;
+
+#endif
+
+
 
 /*****************
  * Configuration *
@@ -113,6 +129,13 @@ void Nbody::initSimulation(int N) {
     numObjects = N;
     dim3 fullBlocksPerGrid((N + blockSize - 1) / blockSize);
 
+#ifdef PERFORMANCE_RATE
+	update_counts = update_times_start;
+	cudaEventCreate( &beginEvent );
+	cudaEventCreate( &endEvent );
+#endif
+
+
     cudaMalloc((void**)&dev_pos, N * sizeof(glm::vec3));
     checkCUDAErrorWithLine("cudaMalloc dev_pos failed!");
 
@@ -181,6 +204,7 @@ __device__  glm::vec3 accelerate(int N, int iSelf, glm::vec3 this_planet, const
     // HINT: You may want to write a helper function that will compute the acceleration at
     //   a single point due to a single other mass. Be careful that you protect against
     //   division by very small numbers.
+
     // HINT: Use Newtonian gravitational acceleration:
     //       G M
     //  g = -----
@@ -190,7 +214,42 @@ __device__  glm::vec3 accelerate(int N, int iSelf, glm::vec3 this_planet, const
     //    * M is the mass of the other object
     //    * r is the distance between this object and the other object
 
-    return glm::vec3(0.0f);
+	//int index = (blockIdx.x * blockDim.x) + threadIdx.x;
+	glm::vec3 acc(0.0f,0.0f,0.0f);
+
+	for (int k = 0; k < N; k++)
+	{
+		float r2;
+		float m;
+		float g;
+		glm::vec3 dis_vec;
+		if( iSelf == k )
+		{
+			//self planet
+			//calculate the acc by the star
+			dis_vec = - this_planet;
+			m = starMass;
+		}
+		else
+		{
+			//other planet
+			dis_vec = other_planets[k] - this_planet;
+			m = planetMass;
+		}
+
+		r2 = glm::dot(dis_vec,dis_vec);
+
+		if(r2 < 0.01f)
+		{
+			r2 = 0.01f;
+		}
+
+		g =  G * starMass / r2;
+
+		acc += glm::normalize(dis_vec) * g;
+	}
+
+	return acc;
 }
 
 /**
@@ -201,6 +260,13 @@ __global__ void kernUpdateAcc(int N, float dt, const glm::vec3 *pos, glm::vec3 *
     // TODO: implement updateAccArray.
     // This function body runs once on each CUDA thread.
     // To avoid race conditions, each instance should only write ONE value to `acc`!
+
+	int index = (blockIdx.x * blockDim.x) + threadIdx.x;
+
+	if(index < N)
+	{
+		acc[index] = accelerate(N,index,pos[index],pos);
+	}
 }
 
 /**
@@ -209,6 +275,14 @@ __global__ void kernUpdateAcc(int N, float dt, const glm::vec3 *pos, glm::vec3 *
  */
 __global__ void kernUpdateVelPos(int N, float dt, glm::vec3 *pos, glm::vec3 *vel, const glm::vec3 *acc) {
     // TODO: implement updateVelocityPosition
+
+	int index = (blockIdx.x * blockDim.x) + threadIdx.x;
+
+	if(index < N)
+	{
+		vel[index] += acc[index] * dt;
+		pos[index] += vel[index] * dt;
+	}
 }
 
 /**
@@ -217,4 +291,48 @@ __global__ void kernUpdateVelPos(int N, float dt, glm::vec3 *pos, glm::vec3 *vel
 void Nbody::stepSimulation(float dt) {
     // TODO: Using the CUDA kernels you wrote above, write a function that
     // calls the kernels to perform a full simulation step.
+
+	dim3 fullBlocksPerGrid((numObjects + blockSize - 1) / blockSize);
+
+#ifdef PERFORMANCE_RATE
+	if(update_counts == update_times_start)
+	{
+		cudaEventRecord(beginEvent,0);
+	}
+#endif
+	//cudaEventRecord(beginEvent,0);
+	kernUpdateAcc<<< fullBlocksPerGrid,blockSize >>>(numObjects,dt,dev_pos,dev_acc);
+	kernUpdateVelPos<<< fullBlocksPerGrid,blockSize >>>(numObjects,dt,dev_pos,dev_vel,dev_acc);
+	//cudaEventRecord(endEvent,0);
+	//cudaEventSynchronize( endEvent );
+	//float ms;
+	//cudaEventElapsedTime(&ms,beginEvent,endEvent);
+	//printf("%f\n",ms);
+
+#ifdef PERFORMANCE_RATE
+	update_counts ++ ;
+	if(update_counts == update_times_end)
+	{
+		cudaEventRecord(endEvent,0);
+		cudaEventSynchronize( endEvent );
+		float ms;
+		cudaEventElapsedTime(&ms,beginEvent,endEvent);
+		printf("updates:%d \nblocksize:%d \ntime:%f\n",update_times_end-update_times_start,blockSize,ms);
+
+	}
+#endif
 }
+
+
+
+void Nbody::endSimulation()
+{
+#ifdef PERFORMANCE_RATE
+	cudaEventDestroy( beginEvent );
+	cudaEventDestroy( endEvent );
+#endif
+
+	cudaFree(dev_acc);
+	cudaFree(dev_vel);
+	cudaFree(dev_pos);
+}
diff --git a/Project1-Part1/src/kernel.h b/Project1-Part1/src/kernel.h
@@ -9,4 +9,6 @@ namespace Nbody {
 void initSimulation(int N);
 void stepSimulation(float dt);
 void copyPlanetsToVBO(float *vbodptr);
+
+void endSimulation();
 }
diff --git a/Project1-Part1/src/main.cpp b/Project1-Part1/src/main.cpp
@@ -13,6 +13,7 @@
 // ================
 
 #define VISUALIZE 1
+//#define VISUALIZE 0
 
 const int N_FOR_VIS = 5000;
 const float DT = 0.2f;
@@ -25,6 +26,7 @@ int main(int argc, char* argv[]) {
 
     if (init(argc, argv)) {
         mainLoop();
+		Nbody::endSimulation();
         return 0;
     } else {
         return 1;
@@ -117,7 +119,7 @@ bool init(int argc, char **argv) {
 }
 
 void initVAO() {
-	glm::vec4 vertices[] = {
+    glm::vec4 vertices[] = {
         glm::vec4( -1.0, -1.0, 0.0, 0.0 ),
         glm::vec4( -1.0,  1.0, 0.0, 0.0 ),
         glm::vec4(  1.0,  1.0, 0.0, 0.0 ),

diff --git a/Project1-Part2/.gitignore b/Project1-Part2/.gitignore
diff --git a/Project1-Part2/CMakeLists.txt b/Project1-Part2/CMakeLists.txt
@@ -0,0 +1,86 @@
+cmake_minimum_required(VERSION 3.0)
+
+project(cis565_MatrixMath)
+
+set(CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake" ${CMAKE_MODULE_PATH})
+
+# Set up include and lib paths
+set(EXTERNAL "external")
+include_directories("${EXTERNAL}/include")
+include_directories("${EXTERNAL}/src")
+if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    set(EXTERNAL_LIB_PATH "${EXTERNAL}/lib/osx")
+elseif(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
+    set(EXTERNAL_LIB_PATH "${EXTERNAL}/lib/linux" "/usr/lib64")
+elseif(WIN32)
+    set(EXTERNAL_LIB_PATH "${EXTERNAL}/lib/win")
+endif()
+link_directories(${EXTERNAL_LIB_PATH})
+list(APPEND CMAKE_LIBRARY_PATH "${EXTERNAL_LIB_PATH}")
+
+# Find up and set up core dependency libs
+
+set(GLFW_INCLUDE_DIR "${EXTERNAL}/include")
+set(GLFW_LIBRARY_DIR "${CMAKE_LIBRARY_PATH}")
+find_library(GLFW_LIBRARY "glfw3" HINTS "${GLFW_LIBRARY_DIR}")
+
+set(GLEW_INCLUDE_DIR "${EXTERNAL}/include")
+set(GLEW_LIBRARY_DIR "${CMAKE_LIBRARY_PATH}")
+add_definitions(-DGLEW_STATIC)
+find_package(GLEW)
+
+find_package(OpenGL)
+
+set(CORELIBS
+    "${GLFW_LIBRARY}"
+    "${OPENGL_LIBRARY}"
+    "${GLEW_LIBRARY}"
+    )
+
+# Enable C++11 for host code
+set(CMAKE_CXX_STANDARD 11)
+
+list(APPEND CUDA_NVCC_FLAGS -G -g)
+
+# OSX-specific hacks/fixes
+if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    list(APPEND CORELIBS "-framework IOKit")
+    list(APPEND CORELIBS "-framework Cocoa")
+    list(APPEND CORELIBS "-framework CoreVideo")
+endif()
+
+# Linux-specific hacks/fixes
+if(${CMAKE_SYSTEM_NAME} MATCHES "Linux")
+    list(APPEND CMAKE_EXE_LINKER_FLAGS "-lX11 -lXxf86vm -lXrandr -lpthread -lXi")
+endif()
+
+# Crucial magic for CUDA linking
+find_package(Threads REQUIRED)
+find_package(CUDA REQUIRED)
+
+set(CUDA_ATTACH_VS_BUILD_RULE_TO_CUDA_FILE ON)
+set(CUDA_SEPARABLE_COMPILATION ON)
+
+if(${CMAKE_SYSTEM_NAME} MATCHES "Darwin")
+    set(CUDA_PROPAGATE_HOST_FLAGS OFF)
+endif()
+
+add_subdirectory(src)
+
+cuda_add_executable(${CMAKE_PROJECT_NAME}
+    "src/main.hpp"
+    "src/main.cpp"
+    )
+
+target_link_libraries(${CMAKE_PROJECT_NAME}
+    src
+    ${CORELIBS}
+    )
+
+add_custom_command(
+    TARGET ${CMAKE_PROJECT_NAME}
+    POST_BUILD
+    COMMAND ${CMAKE_COMMAND} -E copy_directory
+        ${CMAKE_SOURCE_DIR}/shaders
+        ${CMAKE_BINARY_DIR}/shaders
+    )
diff --git a/Project1-Part2/GNUmakefile b/Project1-Part2/GNUmakefile
@@ -0,0 +1,31 @@
+CMAKE_ALT1 := /usr/local/bin/cmake
+CMAKE_ALT2 := /Applications/CMake.app/Contents/bin/cmake
+CMAKE := $(shell \
+	which cmake 2>/dev/null || \
+	([ -e ${CMAKE_ALT1} ] && echo "${CMAKE_ALT1}") || \
+	([ -e ${CMAKE_ALT2} ] && echo "${CMAKE_ALT2}") \
+	)
+
+all: RelWithDebugInfo
+
+
+Debug: build
+	(cd build && ${CMAKE} -DCMAKE_BUILD_TYPE=$@ .. && make)
+
+MinSizeRel: build
+	(cd build && ${CMAKE} -DCMAKE_BUILD_TYPE=$@ .. && make)
+
+Release: build
+	(cd build && ${CMAKE} -DCMAKE_BUILD_TYPE=$@ .. && make)
+
+RelWithDebugInfo: build
+	(cd build && ${CMAKE} -DCMAKE_BUILD_TYPE=$@ .. && make)
+
+
+build:
+	(mkdir -p build && cd build)
+
+clean:
+	((cd build && make clean) 2>&- || true)
+
+.PHONY: all Debug MinSizeRel Release RelWithDebugInfo clean