From 6c21df3da87ca4d87683c69c28a9b31102b1c921 Mon Sep 17 00:00:00 2001 From: netwarm007 Date: Wed, 4 Jan 2023 15:20:39 +0800 Subject: [PATCH] fix bug --- Asset/Shaders/CUDA/OptixTest.shader.cu | 2 +- Framework/GeomMath/random.hpp | 20 ++++++++++---------- Test/CudaTest.cu | 8 ++++---- Test/OptixTest.cu | 6 +++--- Test/OptixTest.hpp | 2 +- 5 files changed, 19 insertions(+), 19 deletions(-) diff --git a/Asset/Shaders/CUDA/OptixTest.shader.cu b/Asset/Shaders/CUDA/OptixTest.shader.cu index 15371330..fd9de4da 100644 --- a/Asset/Shaders/CUDA/OptixTest.shader.cu +++ b/Asset/Shaders/CUDA/OptixTest.shader.cu @@ -75,7 +75,7 @@ __global__ void __raygen__rg() { unsigned int j = launch_index.y; unsigned int pixel_index = j * params.image->Width + i; - curandState* local_rand_state = ¶ms.rand_state[pixel_index]; + curandStateMRG32k3a* local_rand_state = ¶ms.rand_state[pixel_index]; int num_of_samples = rtData->num_of_samples; vec3 col = {0.f, 0.f, 0.f}; diff --git a/Framework/GeomMath/random.hpp b/Framework/GeomMath/random.hpp index b72082d2..b7ae1ee3 100644 --- a/Framework/GeomMath/random.hpp +++ b/Framework/GeomMath/random.hpp @@ -12,23 +12,23 @@ namespace My { #ifdef __CUDACC__ template -__device__ T random_f(curandStateMRG32k3a_t *local_rand_state) { +__device__ __inline__ T random_f(curandStateMRG32k3a_t *local_rand_state) { return curand_uniform(local_rand_state); } template -__device__ T random_f(T min, T max, curandStateMRG32k3a_t *local_rand_state) { +__device__ __inline__ T random_f(T min, T max, curandStateMRG32k3a_t *local_rand_state) { T scale = max - min; return min + scale * curand_uniform(local_rand_state); } template requires std::integral -__device__ T random_int(T min, T max, curandStateMRG32k3a_t *local_rand_state) { +__device__ __inline__ T random_int(T min, T max, curandStateMRG32k3a_t *local_rand_state) { return static_cast(random_f(static_cast(min), static_cast(max), local_rand_state)); } template -__device__ Vector random_v(curandStateMRG32k3a_t *local_rand_state) { +__device__ __inline__ Vector random_v(curandStateMRG32k3a_t *local_rand_state) { auto vec = Vector(); for (int i = 0; i < N; i++) { vec[i] = random_f(local_rand_state); @@ -38,7 +38,7 @@ __device__ Vector random_v(curandStateMRG32k3a_t *local_rand_state) { } template -__device__ Vector random_v(T min, T max, curandStateMRG32k3a_t *local_rand_state) { +__device__ __inline__ Vector random_v(T min, T max, curandStateMRG32k3a_t *local_rand_state) { auto vec = Vector(); for (int i = 0; i < N; i++) { vec[i] = random_f(min, max, local_rand_state); @@ -48,7 +48,7 @@ __device__ Vector random_v(T min, T max, curandStateMRG32k3a_t *local_rand } template -__device__ Vector random_in_unit_sphere(curandStateMRG32k3a_t *local_rand_state) { +__device__ __inline__ Vector random_in_unit_sphere(curandStateMRG32k3a_t *local_rand_state) { while (true) { auto p = random_v(T(-1), T(1), local_rand_state); if (LengthSquared(p) >= 1) continue; @@ -57,14 +57,14 @@ __device__ Vector random_in_unit_sphere(curandStateMRG32k3a_t *local_rand_ } template -__device__ Vector random_unit_vector(curandStateMRG32k3a_t *local_rand_state) { +__device__ __inline__ Vector random_unit_vector(curandStateMRG32k3a_t *local_rand_state) { auto p = random_in_unit_sphere(local_rand_state); Normalize(p); return p; } template -__device__ Vector random_in_hemisphere(const Vector& normal, curandStateMRG32k3a_t *local_rand_state) { +__device__ __inline__ Vector random_in_hemisphere(const Vector& normal, curandStateMRG32k3a_t *local_rand_state) { auto p = random_in_unit_sphere(local_rand_state); T result; DotProduct(result, p, normal); @@ -76,7 +76,7 @@ __device__ Vector random_in_hemisphere(const Vector& normal, curandS } template -__device__ Vector3 random_in_hemisphere_cosine_weighted(const Vector3& normal, curandStateMRG32k3a_t *local_rand_state) { +__device__ __inline__ Vector3 random_in_hemisphere_cosine_weighted(const Vector3& normal, curandStateMRG32k3a_t *local_rand_state) { auto uv = random_v(local_rand_state); T phi = 2.0 * PI * uv[0]; @@ -90,7 +90,7 @@ __device__ Vector3 random_in_hemisphere_cosine_weighted(const Vector3& nor } template -__device__ Vector3 random_in_unit_disk(curandStateMRG32k3a_t *local_rand_state) { +__device__ __inline__ Vector3 random_in_unit_disk(curandStateMRG32k3a_t *local_rand_state) { while (true) { auto p = Vector3({random_f(T(-1.0), T(1.0), local_rand_state), random_f(T(-1.0), T(1.0), local_rand_state), 0}); if (LengthSquared(p) >= (T)1.0) continue; diff --git a/Test/CudaTest.cu b/Test/CudaTest.cu index 4514185c..8bb5db2c 100644 --- a/Test/CudaTest.cu +++ b/Test/CudaTest.cu @@ -22,13 +22,13 @@ void check_cuda(cudaError_t result, char const *const func, } } -__global__ void rand_init(curandState *rand_state) { +__global__ void rand_init(curandStateMRG32k3a *rand_state) { if (threadIdx.x == 0 && blockIdx.x == 0) { curand_init(2023, 0, 0, rand_state); } } -__global__ void test(curandState *local_rand_state) { +__global__ void test(curandStateMRG32k3a *local_rand_state) { const int scene_obj_num = 1; My::Hitable** pList = new My::Hitable*[scene_obj_num]; for (int i = 0; i < scene_obj_num; i++) { @@ -42,9 +42,9 @@ __global__ void test(curandState *local_rand_state) { } int main() { - curandState *d_rand_state_1; + curandStateMRG32k3a *d_rand_state_1; - checkCudaErrors(cudaMalloc((void **)&d_rand_state_1, sizeof(curandState))); + checkCudaErrors(cudaMalloc((void **)&d_rand_state_1, sizeof(curandStateMRG32k3a))); rand_init<<<1, 1>>>(d_rand_state_1); diff --git a/Test/OptixTest.cu b/Test/OptixTest.cu index 07106015..57543321 100644 --- a/Test/OptixTest.cu +++ b/Test/OptixTest.cu @@ -73,7 +73,7 @@ static void context_log_cb( unsigned int level, const char* tag, const char* mes << message << "\n"; } -__global__ void rand_init(curandState *rand_state, const unsigned int max_x, const unsigned int max_y) { +__global__ void rand_init(curandStateMRG32k3a *rand_state, const unsigned int max_x, const unsigned int max_y) { // Each thread in a block gets unique seed int i = threadIdx.x + blockIdx.x * blockDim.x; int j = threadIdx.y + blockIdx.y * blockDim.y; @@ -360,7 +360,7 @@ int main() { My::Image img; My::Image* d_img; My::RayTracingCamera* d_camera; - curandState* d_rand_state; + curandStateMRG32k3a* d_rand_state; { const float aspect_ratio = 16.0 / 9.0; const int image_width = 1920; @@ -401,7 +401,7 @@ int main() { dim3 blocks((image_width + tile_width - 1) / tile_width, (image_height + tile_height - 1) / tile_height); dim3 threads(tile_width, tile_height); - checkCudaErrors(cudaMalloc((void **)&d_rand_state, num_pixels * sizeof(curandState))); + checkCudaErrors(cudaMalloc((void **)&d_rand_state, num_pixels * sizeof(curandStateMRG32k3a))); rand_init<<>>(d_rand_state, image_width, image_height); checkCudaErrors(cudaGetLastError()); diff --git a/Test/OptixTest.hpp b/Test/OptixTest.hpp index 1260a2d0..e5a0f206 100644 --- a/Test/OptixTest.hpp +++ b/Test/OptixTest.hpp @@ -7,7 +7,7 @@ struct Params{ My::Image* image; My::RayTracingCamera* cam; - curandState* rand_state; + curandStateMRG32k3a* rand_state; OptixTraversableHandle handle; };