diff --git a/CMakeLists.txt b/CMakeLists.txt index 62c0e599..7ac42c58 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -73,6 +73,15 @@ set(headers src/sceneStructs.h src/preview.h src/utilities.h + src/MeshLoading/tiny_obj_loader.h + src/MeshLoading/polygon.h + src/MeshLoading/BVH.h + src/LSystem/lsystem.h + src/LSystem/postcondition.h + src/LSystem/rule.h + src/LSystem/symbol.h + src/LSystem/turtle.h + src/ProceduralTextures.h ) set(sources @@ -84,6 +93,14 @@ set(sources src/scene.cpp src/preview.cpp src/utilities.cpp + src/MeshLoading/tiny_obj_loader.cc + src/MeshLoading/polygon.cpp +src/MeshLoading/BVH.cpp +src/LSystem/lsystem.cpp + src/LSystem/postcondition.cpp + src/LSystem/rule.cpp + src/LSystem/symbol.cpp + src/LSystem/turtle.cpp ) list(SORT headers) @@ -92,10 +109,10 @@ list(SORT sources) source_group(Headers FILES ${headers}) source_group(Sources FILES ${sources}) -#add_subdirectory(stream_compaction) # TODO: uncomment if using your stream compaction +add_subdirectory(stream_compaction) # TODO: uncomment if using your stream compaction cuda_add_executable(${CMAKE_PROJECT_NAME} ${sources} ${headers}) target_link_libraries(${CMAKE_PROJECT_NAME} ${LIBRARIES} - #stream_compaction # TODO: uncomment if using your stream compaction + stream_compaction # TODO: uncomment if using your stream compaction ) diff --git a/README.md b/README.md index 110697ce..a964ad31 100644 --- a/README.md +++ b/README.md @@ -3,11 +3,127 @@ CUDA Path Tracer **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 3** -* (TODO) YOUR NAME HERE -* Tested on: (TODO) Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Shubham Sharma + * [LinkedIn](www.linkedin.com/in/codeshubham), [personal website](https://shubhvr.com/). +* Tested on: Windows 10, i7-9750H @ 2.26GHz, 16GB, GTX 1660ti 6GB (Personal Computer). +*GPU Compute Capability: 7.5 -### (TODO: Your README) +![Performance Analysis](img/Mesh/xyzdragon.png) -*DO NOT* leave the README to the last minute! It is a crucial part of the -project, and we will not be able to grade you without a good README. +## Project Description +Path tracing is a computer graphics Monte Carlo method of rendering images of three-dimensional scenes such that the global illumination is faithful to reality. + +### Features +- Shading Kernel with BSDF Evaluation +- Uniform diffuse +- Perfect specular reflective (mirror) +- specular refractive (Fresnel dielectric) +- Path Continuation/Termination with Stream Compaction +- Toggleable Sorting of ray Paths by material type +- Toggleable first bounce intersection cache to be used by subsequent iterations +- Anti-aliasing rays with sub-pixel samples +- Arbitrary OBJ Mesh Loading with toggleable 7 Plane Bounding Volume intersection culling +- Camera depth of field +- Procedural Structure +- Procedural texture +- Subsurface Scattering (In Progress) + +### Materials +Material shading is split into different BSDF evaluation functions based on material type. This project supported materials include **diffuse**, **reflective** and **refractive** (fresnel dielectric). Diffuse material scattering is computed by using cosine-weighted samples within a hemisphere. Reflective materials reflect the light ray about the surface nornmal and refractive materials refracts the ray through the material according to Snell's law of refraction and with added fresnel computation for better real life depiction. +Here's an image consisting of the three materials +- Left: **Diffuse** +- Middle: **Refractive** (Fresnel and Schlick) +- Right: **Reflective (Perfect Specular)** + + +![Performance Analysis](img/Basic/Material.png) + +Below, a compound reflective and refractive impact is executed through a Fresnel fabric, which reflects light beams that are more digression to its surface. This makes a light rays passing through the object refracted, whereas rays brushing the sides of the fabric are reflected. Rather than specifically calculating the Fresnel component, I assess it utilizing Schlick's approximation. +Here's an image consisting of the three refractive materials. +- Left: Refractive index: 1.2 +- Middle: Refractive index: 2.5 +- Right : Probabilistic Reflective and Refractive Material: Refractive index: 1.2 + +![Performance Analysis](img/Basic/Refractive.png) + +### Depth Of Field +The scene camera can be set to enable Depth of Field effect which utilises focal distance and lens radius parameters to change the depth of this effect. Geometries located at the focal distance within the lens radius stay in focus while other geometry around the scene will be distorted. + +- **Focal Distance: 10** + ![Performance Analysis](img/DOF/focal10.png) + +- **Focal Distance: 20** + ![Performance Analysis](img/DOF/focal20.png) + +### Stochastic Anti-Aliasing +Utilizing anti-aliasing for subpixel sampling brings in smoother geometry edges within the render. It is vital to note that anti-aliasing and first bounce cache don't work together, since the pixel tests will vary per iteration, and ached first bounces from the first iteration won't match the generated ray direction in further iterations. I added the flag for Cache Bounce which toggles off anti-aliasing and setting cache off in turn enables anti-aliasing, + +- **With Anti-Aliasing** + ![Performance Analysis](img/AntiAliasing/aa.png) + +- **Without Anti-Aliasing** + ![Performance Analysis](img/AntiAliasing/waa.png) + +### OBJ Loading +In order to bring the mesh data into C++, I used the tinyobj library. I build the polygon mesh using the position and normal data of the triangles triangles from the imported data to create the mesh triangles and store triangle information per arbitrary mesh. + +7 planar Bounding volume intersection culling as proposed by Kay and Kajiya in accelerated structeres, is applied at the ray-geometry intersection test to reduce the number of rays that have to be checked against the entire mesh by first checking rays against a volume that completely bounds the mesh. This feature is implemented as toggleable for performance analysis purposes. Pressing the 'B' key while running the GPU renderer will enable this feature. + +In order to smoothen the triangles on round meshes, the intersection normal is computed from the barycentric interpolation of the 3 normals from the triangle vertices. + +- **Lucy** + ![Performance Analysis](img/Mesh/Lucy.png) + +- **Wahoo** + ![Performance Analysis](img/Mesh/wahoo.png) + +### Procedural Structures +I have used L System grammar which generates complex patterns for procedural data. An L-system consists of an alphabet of symbols that can be used to make strings, It consist of an axiom: initial configuration, a collection of production rules that expand each symbol into some larger string of symbols and a mechanism for translating the generated strings into geometric structures. + +![Performance Analysis](img/ProceduralShapes/1.png) + +![Performance Analysis](img/ProceduralShapes/tree.png) + +### Procedural Textures +I have used a simple sinusoidal and cosine functions as well combination of different noise functions like Perlin, FBM and Worley to generate procedural textures. A spherical bi linear function which transforms positional coordinates to UV coordinates is used in turn with noise functions to apply these textures accross wide variety of mesh data. + +![Performance Analysis](img/ProceduralTexture/1.png) + +# Performance Analysis + +## Optimisations + +### Stream Compaction + +Stream compaction generally progress the execution by terminating the rays in case they are futile. Less threads will be made and the execution quickened. The first-bounce cache moreover moves forward 13% execution by caching the primary crossing point of the beam. In expansion, the work puts the active rays closer together in memory, which ought to make getting to faster global memory access rates since they gets to will be continguous, rather than random. Underneath Values of remaining rays in Open and Closed Cornell Box shown in the chart. + +![Performance Analysis](img/Compaction.PNG) + +### Material Sorting +In my case the material sorting slows down the execution time of program. I believe this can be attributed to less number of materials in my scene to begin with. The method in theory would sort the rays with similar material object intersections closer as they will have about the same lifetime. Since the scene doesn't have many materials the probablity of rays with same material behaviour being contiguous in memory is already high and sorting them only adds an overhead in this case. If the scene has a large number of materials then i believe the execution times will increase with Material Sorting. + +### Caching First Bounce +Since the first bounce intersections stays the same if the camera doesn't change, A cache can be implemented to save the intersection of the first bounce in the first iteration and the results can be directly used in later iterations. Since for antialiasing each iteration first bounce is different caching cant be done. +I have recorded an average of the exection time of my program for 3000 iterations below, with and without caching the first bounce. From the data we can depict an improvement of of 90ms in execution times. + +| Without Cache | With Cache | +|---|---| +| 2min 4.03s | 2min 3.08s | + +### Volume Intersection Culling +I used 3 arbitrary mesh examples to analyze the peformance benefits of enabling volume intersection culling for complex meshes. The table below shows the mesh examples used for this analysis and how many triangles they contain. + +| Cube | Lucy | XYZ Dragon | +|---|---|---| +| 12 | 19998 | 50000 | + + +![Performance Analysis](img/VolumeIntersectionCulling.png) + +Using volume intersection culling for simpler arbitrary meshes with low triangle count such as cube doesn't provide a significant performance improvement. However as the triangle count increases we can see significant improvement which can be attributed to number of triangles to check if bounding volume is hit. Each ray only performs 7 intersection check with 7 sided polygon volume heirarchy as compared to 50000 intersection checks with triangles for XYZ dragon. With BVH we save about 7 seconds in just 10 iterations. + +### Bloopers +![Performance Analysis](img/Bloopers/1.png) +![Performance Analysis](img/Bloopers/2.png) +![Performance Analysis](img/Bloopers/3.png) \ No newline at end of file diff --git a/img/AntiAliasing/aa.png b/img/AntiAliasing/aa.png new file mode 100644 index 00000000..e0a5736f Binary files /dev/null and b/img/AntiAliasing/aa.png differ diff --git a/img/AntiAliasing/waa.png b/img/AntiAliasing/waa.png new file mode 100644 index 00000000..cc6dbcef Binary files /dev/null and b/img/AntiAliasing/waa.png differ diff --git a/img/Basic/Material.png b/img/Basic/Material.png new file mode 100644 index 00000000..443fe61c Binary files /dev/null and b/img/Basic/Material.png differ diff --git a/img/Basic/Refractive.png b/img/Basic/Refractive.png new file mode 100644 index 00000000..db4740c0 Binary files /dev/null and b/img/Basic/Refractive.png differ diff --git a/img/Bloopers/1.png b/img/Bloopers/1.png new file mode 100644 index 00000000..ec3924af Binary files /dev/null and b/img/Bloopers/1.png differ diff --git a/img/Bloopers/2.png b/img/Bloopers/2.png new file mode 100644 index 00000000..8087b913 Binary files /dev/null and b/img/Bloopers/2.png differ diff --git a/img/Bloopers/3.png b/img/Bloopers/3.png new file mode 100644 index 00000000..13491101 Binary files /dev/null and b/img/Bloopers/3.png differ diff --git a/img/Compaction.PNG b/img/Compaction.PNG new file mode 100644 index 00000000..2a7a1abe Binary files /dev/null and b/img/Compaction.PNG differ diff --git a/img/DOF/cornell.2021-10-10_16-41-25z.5000samp.png b/img/DOF/cornell.2021-10-10_16-41-25z.5000samp.png new file mode 100644 index 00000000..bcbae138 Binary files /dev/null and b/img/DOF/cornell.2021-10-10_16-41-25z.5000samp.png differ diff --git a/img/DOF/focal10.png b/img/DOF/focal10.png new file mode 100644 index 00000000..17b98b61 Binary files /dev/null and b/img/DOF/focal10.png differ diff --git a/img/DOF/focal20.png b/img/DOF/focal20.png new file mode 100644 index 00000000..e96cfe75 Binary files /dev/null and b/img/DOF/focal20.png differ diff --git a/img/Mesh/Lucy.png b/img/Mesh/Lucy.png new file mode 100644 index 00000000..27a54b4c Binary files /dev/null and b/img/Mesh/Lucy.png differ diff --git a/img/Mesh/sphere.2021-10-10_14-06-08z.1552samp.png b/img/Mesh/sphere.2021-10-10_14-06-08z.1552samp.png new file mode 100644 index 00000000..27adfdac Binary files /dev/null and b/img/Mesh/sphere.2021-10-10_14-06-08z.1552samp.png differ diff --git a/img/Mesh/wahoo.png b/img/Mesh/wahoo.png new file mode 100644 index 00000000..d7107aa5 Binary files /dev/null and b/img/Mesh/wahoo.png differ diff --git a/img/Mesh/xyzdragon.png b/img/Mesh/xyzdragon.png new file mode 100644 index 00000000..31d3a481 Binary files /dev/null and b/img/Mesh/xyzdragon.png differ diff --git a/img/ProceduralShapes/1.png b/img/ProceduralShapes/1.png new file mode 100644 index 00000000..078becaf Binary files /dev/null and b/img/ProceduralShapes/1.png differ diff --git a/img/ProceduralShapes/tree.png b/img/ProceduralShapes/tree.png new file mode 100644 index 00000000..972e6f6c Binary files /dev/null and b/img/ProceduralShapes/tree.png differ diff --git a/img/ProceduralTexture/1.png b/img/ProceduralTexture/1.png new file mode 100644 index 00000000..ab9693a9 Binary files /dev/null and b/img/ProceduralTexture/1.png differ diff --git a/img/VolumeIntersectionCulling.png b/img/VolumeIntersectionCulling.png new file mode 100644 index 00000000..fdf048a8 Binary files /dev/null and b/img/VolumeIntersectionCulling.png differ diff --git a/scenes/Cube.txt b/scenes/Cube.txt new file mode 100644 index 00000000..0dfb7d49 --- /dev/null +++ b/scenes/Cube.txt @@ -0,0 +1,145 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 +usingProcTex 0 +isSubSurface 0 + +// Emissive material +MATERIAL 1 +RGB 0.339 0.1 0.5552 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 +usingProcTex 0 +isSubSurface 0 + +// Emissive material +MATERIAL 2 +RGB 0.6 0.25 0 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 +usingProcTex 0 +isSubSurface 0 + +// Emissive material +MATERIAL 3 +RGB 0.002 0.438 0.559 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 +usingProcTex 0 +isSubSurface 0 + +// Diffuse white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 + +// Reflective white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 +isSubSurface 0 + +// Emissive material +MATERIAL 5 +RGB 0.9 0 0 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 +usingProcTex 0 +isSubSurface 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE cube +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 + +// Ceiling light +OBJECT 0 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 10 .3 10 + +// Right light2 +OBJECT 1 +cube +material 2 +TRANS 10 0 0 +ROTAT 0 0 -90 +SCALE 10 .3 10 + + +// Left light3 +OBJECT 2 +cube +material 3 +TRANS -10 0 0 +ROTAT 0 0 -90 +SCALE 10 .3 10 + + +// Back light3 +OBJECT 3 +cube +material 0 +TRANS 0 0 -10 +ROTAT 90 0 0 +SCALE 10 .3 10 + + +// Wahoo +OBJECT 4 +obj +material 4 +TRANS 0 3 0 +ROTAT 0 0 0 +SCALE 1 1 1 + +// Floor +OBJECT 5 +cube +material 4 +TRANS 0 -5 0 +ROTAT 0 0 0 +SCALE 10 .01 10 + diff --git a/scenes/ProceduralTexture.txt b/scenes/ProceduralTexture.txt new file mode 100644 index 00000000..776f1447 --- /dev/null +++ b/scenes/ProceduralTexture.txt @@ -0,0 +1,174 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 +usingProcTex 1 +isSubSurface 1 +ProcTexNum 1 + +// Refractive white +MATERIAL 5 +RGB .98 .98 .98 +SPECEX 1 +SPECRGB .8 .8 .98 +REFL 0 +REFR 1 +REFRIOR 5 +EMITTANCE 0 +usingProcTex 1 +isSubSurface 1 +ProcTexNum 2 + +// ProcTexture white +MATERIAL 6 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 +usingProcTex 1 +isSubSurface 1 +ProcTexNum 2 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE cornell +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 10 .01 10 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Sphere +OBJECT 6 +sphere +material 4 +TRANS -1 4 -1 +ROTAT 0 0 0 +SCALE 3 3 3 + + +// Sphere2 +OBJECT 7 +sphere +material 5 +TRANS -1 7 0 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Sphere3 +OBJECT 8 +sphere +material 6 +TRANS 2 7 0 +ROTAT 0 0 0 +SCALE 3 3 3 diff --git a/scenes/bunny.txt b/scenes/bunny.txt new file mode 100644 index 00000000..8f1f153a --- /dev/null +++ b/scenes/bunny.txt @@ -0,0 +1,94 @@ +// Emissive material (light) +MATERIAL 0 +RGB 0.98 0.98 0.98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 8 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Diffuse white +MATERIAL 1 +RGB 0.98 0.98 0.0 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 +usingProcTex 1 +isSubSurface 1 +ProcTexNum 2 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 1 +SPECRGB .85 .35 .35 +REFL 1 +REFR 0 +REFRIOR 0.5 +EMITTANCE 0 +usingProcTex 2 +isSubSurface 0 +ProcTexNum 0 + +// Refractive white +MATERIAL 3 +RGB .98 .98 .98 +SPECEX 1 +SPECRGB .8 .8 .98 +REFL 0 +REFR 1 +REFRIOR 10 +EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE sphere +EYE 0.0 10 10.5 +LOOKAT 0 0 0 +UP 0 1 0 + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 12 0 +ROTAT 0 0 0 +SCALE 10 .3 10 + +// Obj +OBJECT 1 +obj +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 1 1 1 + +// Front light +OBJECT 2 +cube +material 0 +TRANS 0 0 18 +ROTAT -90 0 0 +SCALE 10 .3 10 + +// Floor +OBJECT 3 +cube +material 1 +TRANS 0 -5 0 +ROTAT 0 0 0 +SCALE 10 .01 10 diff --git a/scenes/cornell.txt b/scenes/cornell.txt index 83ff8202..e4c37b62 100644 --- a/scenes/cornell.txt +++ b/scenes/cornell.txt @@ -7,6 +7,9 @@ REFL 0 REFR 0 REFRIOR 0 EMITTANCE 5 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 // Diffuse white MATERIAL 1 @@ -17,6 +20,9 @@ REFL 0 REFR 0 REFRIOR 0 EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 // Diffuse red MATERIAL 2 @@ -27,6 +33,9 @@ REFL 0 REFR 0 REFRIOR 0 EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 // Diffuse green MATERIAL 3 @@ -37,16 +46,35 @@ REFL 0 REFR 0 REFRIOR 0 EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 // Specular white MATERIAL 4 RGB .98 .98 .98 SPECEX 0 SPECRGB .98 .98 .98 -REFL 1 +REFL 0.6 REFR 0 REFRIOR 0 EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Refractive white +MATERIAL 5 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 0 +REFR 0.5 +REFRIOR 1.5 +EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 // Camera CAMERA @@ -59,7 +87,6 @@ EYE 0.0 5 10.5 LOOKAT 0 5 0 UP 0 1 0 - // Ceiling light OBJECT 0 cube @@ -115,3 +142,20 @@ material 4 TRANS -1 4 -1 ROTAT 0 0 0 SCALE 3 3 3 + + +// Sphere2 +OBJECT 7 +sphere +material 5 +TRANS -1 7 0 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Sphere3 +OBJECT 8 +sphere +material 1 +TRANS 2 7 0 +ROTAT 0 0 0 +SCALE 3 3 3 diff --git a/scenes/cornell2.txt b/scenes/cornell2.txt new file mode 100644 index 00000000..1ec84c3e --- /dev/null +++ b/scenes/cornell2.txt @@ -0,0 +1,129 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 + + +// Refractive white +MATERIAL 5 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 0 +REFR 1 +REFRIOR 1.5 +EMITTANCE 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE cornell +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 5 .3 5 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 10 .01 10 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Wahoo +OBJECT 6 +obj +material 1 +TRANS 0 2 0 +ROTAT 0 0 0 +SCALE 1 1 1 + diff --git a/scenes/cornellclosed.txt b/scenes/cornellclosed.txt new file mode 100644 index 00000000..3a2d4c99 --- /dev/null +++ b/scenes/cornellclosed.txt @@ -0,0 +1,152 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 0.6 +REFR 0 +REFRIOR 0 +EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Refractive white +MATERIAL 5 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 0 +REFR 0.5 +REFRIOR 1.5 +EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE cornell +EYE 0.0 5 4 +LOOKAT 0 5 0 +UP 0 1 0 + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 10 .01 10 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Sphere +OBJECT 6 +sphere +material 1 +TRANS -1 4 -1 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Front wall +OBJECT 7 +cube +material 1 +TRANS 0 5 5 +ROTAT 0 0 0 +SCALE 10 10 .1 \ No newline at end of file diff --git a/scenes/dof.txt b/scenes/dof.txt new file mode 100644 index 00000000..00db5921 --- /dev/null +++ b/scenes/dof.txt @@ -0,0 +1,172 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 5 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Diffuse green +MATERIAL 3 +RGB .35 .85 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Specular white +MATERIAL 4 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 1 +REFR 0 +REFRIOR 0 +EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Refractive white +MATERIAL 5 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB .98 .98 .98 +REFL 0 +REFR 1 +REFRIOR 1.5 +EMITTANCE 0 +usingProcTex 0 +isSubSurface 0 +ProcTexNum 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE cornell +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 10 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Floor +OBJECT 1 +cube +material 1 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 10 .01 10 + +// Ceiling +OBJECT 2 +cube +material 1 +TRANS 0 10 0 +ROTAT 0 0 90 +SCALE .01 10 10 + +// Back wall +OBJECT 3 +cube +material 1 +TRANS 0 5 -5 +ROTAT 0 90 0 +SCALE .01 10 10 + +// Left wall +OBJECT 4 +cube +material 2 +TRANS -5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Right wall +OBJECT 5 +cube +material 3 +TRANS 5 5 0 +ROTAT 0 0 0 +SCALE .01 10 10 + +// Sphere +OBJECT 6 +sphere +material 4 +TRANS 0 3 -1 +ROTAT 0 0 0 +SCALE 1 1 1 + + +// Sphere2 +OBJECT 7 +sphere +material 5 +TRANS 0 3 2 +ROTAT 0 0 0 +SCALE 1 1 1 + +// Sphere3 +OBJECT 8 +sphere +material 1 +TRANS 0 3 5 +ROTAT 0 0 0 +SCALE 1 1 1 + + + +// Sphere4 +OBJECT 9 +sphere +material 1 +TRANS 0 3 8 +ROTAT 0 0 0 +SCALE 1 1 1 + diff --git a/scenes/subsurface.txt b/scenes/subsurface.txt new file mode 100644 index 00000000..0dff2993 --- /dev/null +++ b/scenes/subsurface.txt @@ -0,0 +1,79 @@ +// Emissive material (light) +MATERIAL 0 +RGB 1 1 1 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 8 +usingProcTex 0 + +// Diffuse white +MATERIAL 1 +RGB .98 .98 .98 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 +usingProcTex 0 + +// Diffuse red +MATERIAL 2 +RGB .85 .35 .35 +SPECEX 0 +SPECRGB 0 0 0 +REFL 0 +REFR 0 +REFRIOR 0 +EMITTANCE 0 +usingProcTex 0 + +// Refractive white +MATERIAL 3 +RGB .98 .98 .98 +SPECEX 1 +SPECRGB .8 .8 .98 +REFL 0 +REFR 1 +REFRIOR 10 +EMITTANCE 0 +usingProcTex 0 + +// Camera +CAMERA +RES 800 800 +FOVY 45 +ITERATIONS 5000 +DEPTH 8 +FILE sphere +EYE 0.0 5 10.5 +LOOKAT 0 5 0 +UP 0 1 0 + +// Ceiling light +OBJECT 0 +cube +material 0 +TRANS 0 6 0 +ROTAT 0 0 0 +SCALE 3 .3 3 + +// Sphere +OBJECT 1 +sphere +material 2 +TRANS 0 0 0 +ROTAT 0 0 0 +SCALE 3 3 3 + +// Front light +OBJECT 2 +cube +material 0 +TRANS 0 0 -5 +ROTAT 90 0 0 +SCALE 3 .3 3 + diff --git a/src/LSystem/lsystem.cpp b/src/LSystem/lsystem.cpp new file mode 100644 index 00000000..13592cc3 --- /dev/null +++ b/src/LSystem/lsystem.cpp @@ -0,0 +1,325 @@ +#include "lsystem.h" + +float sdRoundCone(glm::vec3 p, glm::vec3 a, glm::vec3 b, float r1, float r2); +float sdCapsule(glm::vec3 p, glm::vec3 a, glm::vec3 b, float r); +void MoveForward(); +float sdTriPrism(glm::vec3 p, glm::vec2 h); + +LSystem::LSystem(Turtle& currturtle) +{ + currTurtle = mkU(currturtle); + AddDefaultFuncPointer(); +} + +void LSystem::AddRule(char a_refChar, Rule a_postCondition) +{ + RulesList.insert(std::make_pair(a_refChar, a_postCondition)); +} + +void LSystem::AddFuncPointer(char a_refChar, RuleFunc a_funcPointer) +{ + Func_Pointer.insert(std::make_pair(a_refChar, a_funcPointer)); +} + +void LSystem::LSystemParse(int iterations) +{ + for (int i = 0; i < iterations; i++) + { + Symbol* currSym = rootSymbol; + while (currSym != nullptr) + { + if (RulesList.find(currSym->m_refCharacter) != RulesList.end()) + { + Symbol* nextSym = currSym->next; + ApplyRule(currSym->prev, currSym, nextSym, currSym->m_refCharacter, i + 1); + + currSym = nextSym; + continue; + } + currSym = currSym->next; + } + } +} + +void LSystem::PrintParsedSystem() +{ + Symbol* currSym = rootSymbol; + if (currSym != NULL) + { + while (currSym != NULL) + { + std::cout << currSym->m_refCharacter; + currSym = currSym->next; + } + } +} + +void LSystem::ClearParsedString() +{ + SymbolList.clear(); + rootSymbol = nullptr; +} + +void LSystem::AssignAxiom(std::string a_refAxiom) +{ + axiom = a_refAxiom; + sPtr m_rootSymbol = mkS(a_refAxiom[0], 0); + rootSymbol = m_rootSymbol.get(); + SymbolList.push_back(std::move(m_rootSymbol)); + if (a_refAxiom.length() <= 1) + { + return; + } + Symbol* prevSym = rootSymbol; + + for (int i = 1; i < a_refAxiom.length(); i++) + { + char c = a_refAxiom[i]; + sPtr newSymbol = mkS(c, 0); + prevSym->next = newSymbol.get(); + newSymbol->prev = prevSym; + prevSym = prevSym->next; + SymbolList.push_back(std::move(newSymbol)); + } +} + +void LSystem::ApplyRule(Symbol* a_prevSym, Symbol* a_currSym, Symbol* a_endSym, char a_ruleKey, int iteration) +{ + Symbol* prevSym = a_prevSym; + Symbol* del_Symbol = a_currSym; + + + //delete del_Symbol; + float randNumber = (std::rand() % 100) / 100.0f; + Rule currRule(RulesList[a_ruleKey]); + PostCondition randomRule = currRule.GetRandomRule(randNumber); + std::string RuleValue = randomRule.new_symbol; + + if (a_currSym == rootSymbol) + { + sPtr newSymbol = mkS(RuleValue[0], iteration); + rootSymbol = newSymbol.get(); + prevSym = newSymbol.get(); + SymbolList.push_back(std::move(newSymbol)); + + for (int i = 1; i < RuleValue.length(); i++) + { + sPtr newSymbol = mkS(RuleValue[i], iteration); + prevSym->next = newSymbol.get(); + newSymbol->prev = prevSym; + prevSym = newSymbol.get(); + SymbolList.push_back(std::move(newSymbol)); + } + } + else + { + for (int i = 0; i < RuleValue.length(); i++) + { + sPtr newSymbol = mkS(RuleValue[i], iteration); + prevSym->next = newSymbol.get(); + newSymbol->prev = prevSym; + prevSym = newSymbol.get(); + SymbolList.push_back(std::move(newSymbol)); + } + } + prevSym->next = a_endSym; + if (a_endSym != nullptr) + { + a_endSym->prev = prevSym; + } +} + +void LSystem::AddDefaultFuncPointer() +{ + AddFuncPointer('F', &Turtle::Move); + AddFuncPointer('[', &Turtle::SaveState); + AddFuncPointer(']', &Turtle::LoadState); + AddFuncPointer('+', &Turtle::TurnRight); + AddFuncPointer('-', &Turtle::TurnLeft); +} + +void MoveForward(Turtle(*currTurtle)) +{ + std::cout << "Move Forward" << std::endl; + currTurtle->Move(); +} + +bool ContainsPos(std::vector &posBuffer, glm::vec3 a_position) +{ + for (int i = 0; i < posBuffer.size(); i++) + { + if (posBuffer[i] == a_position) + { + return true; + } + } + return false; +} + + +inline double random_double() { + static std::uniform_real_distribution distribution(0.0, 1.0); + static std::mt19937 generator; + return distribution(generator); +} + +int CreateNewMaterial(std::vector &materials) { + + + int start = materials.size(); + //load static properties + for (int i = start; i < start + 3; i++) { + Material newMaterial; + newMaterial.color = glm::vec3(random_double(), random_double(), random_double()); + newMaterial.isSubSurface = false; + + newMaterial.specular.exponent = random_double(); + newMaterial.specular.color = glm::vec3(random_double(), random_double(), random_double()); + double reflect = random_double(); + if (reflect > 0.7) + { + newMaterial.hasReflective = 1; + } + else + { + newMaterial.hasReflective = 0; + } + if (i == start) + { + //newMaterial.hasRefractive = 1; + newMaterial.usingProcTex = 1; + newMaterial.ProcTexNum = 1; + } + + newMaterial.indexOfRefraction = 1 + reflect; + + newMaterial.emittance = 0; + materials.push_back(newMaterial); + } + return start; + } + + +Geom createNewGeom(glm::vec3 currPos, int materialID, enum GeomType type) { + Geom newGeom; + newGeom.type = type; + newGeom.materialid = materialID; + + newGeom.translation = currPos; + newGeom.rotation = glm::vec3(0, 0, 0); + newGeom.scale = glm::vec3(0.5, 0.5, 0.5); + newGeom.transform = utilityCore::buildTransformationMatrix( + newGeom.translation, newGeom.rotation, newGeom.scale); + newGeom.inverseTransform = glm::inverse(newGeom.transform); + newGeom.invTranspose = glm::inverseTranspose(newGeom.transform); + return newGeom; +} + + +void LSystem::CarveBuilding(std::vector &procShape, std::vector &geoms, std::vector& materials, enum GeomType type) +{ + Symbol* currSym = rootSymbol; + glm::vec3 a = currTurtle->m_Position; + glm::vec3 b = currTurtle->m_Position; + + int startIdxMat = CreateNewMaterial(materials); + float r1 = currTurtle->radius; + float r2 = currTurtle->radius; + + while (currSym != nullptr) + { + if (Func_Pointer.find(currSym->m_refCharacter) != Func_Pointer.end()) + { + RuleFunc function = Func_Pointer[currSym->m_refCharacter]; + if (currSym->m_refCharacter == 'F') + { + r1 = currTurtle->radius; + a = currTurtle->m_Position; + } + (currTurtle.get()->*function)(); + if (currSym->m_refCharacter == 'F') + { + //currTurtle->radius = currTurtle->radius - 0.1f; + b = currTurtle->m_Position; + r2 = currTurtle->radius; + for (int x = -r1; x <= +r1; x++) + { + for (int y = -r1; y <= r1; y++) + { + for (int z = 0; z <= currTurtle->lambda; z++) + { + //glm::mat3 rotMat = glm::mat3(currTurtle->m_Right, currTurtle->m_Up, currTurtle->m_forward); + glm::vec3 currpos = a + float(z) * currTurtle->m_forward; + currpos = currpos + glm::vec3(x, 0.0f, y); + //glm::vec3 currPos = a + glm::vec3(x, y, z) * rotMat; + if ((sdCapsule(currpos, a, b, r1) <= 0)) + { + if (!ContainsPos(procShape, currpos)) + { + auto choose_mat = random_double(); + procShape.push_back(currpos); + if (choose_mat < 0.8) { + geoms.push_back(createNewGeom(currpos, startIdxMat, type)); + } + else if (choose_mat < 0.95) { + geoms.push_back(createNewGeom(currpos, startIdxMat + 1, type)); + } + else { + geoms.push_back(createNewGeom(currpos, startIdxMat + 2, type)); + } + } + } + + } + } + + } + } + } + + //std::cout<m_refCharacter; + currSym = currSym->next; + + } +} + +float dot2(glm::vec2 v) { return glm::dot(v, v); } +float dot2(glm::vec3 v) { return glm::dot(v, v); } + +float sdRoundCone(glm::vec3 p, glm::vec3 a, glm::vec3 b, float r1, float r2) +{ + // sampling independent computations (only depend on shape) + glm::vec3 ba = b - a; + float l2 = glm::dot(ba, ba); + float rr = r1 - r2; + float a2 = l2 - rr * rr; + float il2 = 1.0 / l2; + + // sampling dependant computations + glm::vec3 pa = p - a; + float y = glm::dot(pa, ba); + float z = y - l2; + float x2 = dot2(pa * l2 - ba * y); + float y2 = y * y * l2; + float z2 = z * z * l2; + + // single square root! + float k = glm::sign(rr) * rr * rr * x2; + if (glm::sign(z) * a2 * z2 > k) return sqrt(x2 + z2) * il2 - r2; + if (glm::sign(y) * a2 * y2 < k) return sqrt(x2 + y2) * il2 - r1; + return (sqrt(x2 * a2 * il2) + y * rr) * il2 - r1; +} + + +float sdTriPrism(glm::vec3 p, glm::vec2 h) +{ + glm::vec3 q = glm::abs(p); + return glm::max(q.z - h.y, glm::max(q.x * 0.866025f + p.y * 0.5f, -p.y) - h.x * 0.5f); +} + +float sdCapsule(glm::vec3 p, glm::vec3 a, glm::vec3 b, float r) +{ + glm::vec3 pa = p - a, ba = b - a; + float h = glm::clamp(glm::dot(pa, ba) / glm::dot(ba, ba), 0.0f, 1.0f); + return glm::length(pa - ba * h) - r; +} diff --git a/src/LSystem/lsystem.h b/src/LSystem/lsystem.h new file mode 100644 index 00000000..a57d0641 --- /dev/null +++ b/src/LSystem/lsystem.h @@ -0,0 +1,61 @@ +#ifndef LSYSTEMS_H +#define LSYSTEMS_H + +#include +#include +#include +#include "symbol.h" +#include +#include +#include +#include +#include "rule.h" +#include"turtle.h" +#include"glm/glm.hpp" +#include"glm/gtc/matrix_inverse.hpp" +#include "../sceneStructs.h" +#include "../utilities.h" +#pragma once +#include +#include + + +// A collection of preprocessor definitions to +// save time in writing out smart pointer syntax +#define uPtr std::unique_ptr +#define mkU std::make_unique +// Rewrite std::unique_ptr f = std::make_unique(5.f); +// as uPtr f = mkU(5.f); + +#define sPtr std::shared_ptr +#define mkS std::make_shared +// Rewrite std::shared_ptr f = std::make_shared(5.f); +// as sPtr f = mkS(5.f); + + +typedef void (Turtle::* RuleFunc)(); +class LSystem +{ +public: + LSystem(Turtle&); + uPtr currTurtle; + + std::string axiom; + std::unordered_map RulesList; + std::unordered_map Func_Pointer; + Symbol* rootSymbol; + std::vector> SymbolList; + void AddRule(char, Rule); + void AddFuncPointer(char, RuleFunc); + void AssignAxiom(std::string); + void LSystemParse(int iterations); + void ClearParsedString(); + void ApplyRule(Symbol* a_prevSym, Symbol* a_currSym, Symbol* a_endSym, char a_ruleKey, int iteration); + void GetRandomRule(char a_ruleKey); + void CarveBuilding(std::vector& procShape, std::vector &geoms, std::vector& materials, enum GeomType type); + void AddDefaultFuncPointer(); + void PrintParsedSystem(); + //void digBlock(int x, int z, int depth); +}; + +#endif // LSYSTEMS_H diff --git a/src/LSystem/postcondition.cpp b/src/LSystem/postcondition.cpp new file mode 100644 index 00000000..db316533 --- /dev/null +++ b/src/LSystem/postcondition.cpp @@ -0,0 +1,4 @@ +#include "postcondition.h" + +PostCondition::PostCondition(float a_probab, std::string a_addString) : m_probablity(a_probab), new_symbol(a_addString) +{} diff --git a/src/LSystem/postcondition.h b/src/LSystem/postcondition.h new file mode 100644 index 00000000..d928ae0e --- /dev/null +++ b/src/LSystem/postcondition.h @@ -0,0 +1,13 @@ +#ifndef POSTCONDITION_H +#define POSTCONDITION_H + +#include +class PostCondition +{ +public: + PostCondition(float, std::string); + float m_probablity; + std::string new_symbol; +}; + +#endif // POSTCONDITION_H diff --git a/src/LSystem/rule.cpp b/src/LSystem/rule.cpp new file mode 100644 index 00000000..cce634f8 --- /dev/null +++ b/src/LSystem/rule.cpp @@ -0,0 +1,61 @@ +#include "rule.h" + +Rule::Rule() +{} + +Rule::Rule(std::vector a_conditionsList) : Rules(a_conditionsList) +{} + +Rule::Rule( const Rule &obj) +{ + this->Rules = obj.Rules; + this->ConditionWeights = obj.ConditionWeights; + this->ConditionsNormalised = obj.ConditionsNormalised; +} + +Rule& Rule::operator=(const Rule & obj) +{ + this->Rules = obj.Rules; + this->ConditionWeights = obj.ConditionWeights; + this->ConditionsNormalised = obj.ConditionsNormalised; + return *this; +} + +void Rule::AddRules(PostCondition a_postCondition) +{ + Rules.push_back(a_postCondition); + ConditionsNormalised = false; +} + +void Rule::NormaliseandBuildWeights() +{ + float sum = 0.0f; + for(int i = 0; i< Rules.size(); i++) + { + float weight = Rules[i].m_probablity; + sum += weight; + ConditionWeights.push_back(sum); + } + + // now normalize so the CDF runs from 0 to 1 + for(int i = 0; i< ConditionWeights.size(); i++) + { + ConditionWeights[i] /= sum; + } + ConditionsNormalised = true; +} + +PostCondition Rule::GetRandomRule(float a_normValue) +{ + if(!ConditionsNormalised) + { + NormaliseandBuildWeights(); + } + for(int i=0; i +#include "postcondition.h" + +class Rule +{ +public: + Rule(); + Rule(std::vector); + Rule( const Rule &obj); + Rule& operator=(const Rule& n); + + std::vector Rules; + std::vector ConditionWeights; + bool ConditionsNormalised = false; + + void AddRules(PostCondition); + void NormaliseandBuildWeights(); + + PostCondition GetRandomRule(float a_normValue); + +}; + +#endif // RULE_H diff --git a/src/LSystem/symbol.cpp b/src/LSystem/symbol.cpp new file mode 100644 index 00000000..caec1d20 --- /dev/null +++ b/src/LSystem/symbol.cpp @@ -0,0 +1,4 @@ +#include "symbol.h" + +Symbol::Symbol(char a_refChar, int a_iteration): m_refCharacter(a_refChar), iteration(a_iteration), next(nullptr), prev(nullptr) +{} diff --git a/src/LSystem/symbol.h b/src/LSystem/symbol.h new file mode 100644 index 00000000..27bf29d0 --- /dev/null +++ b/src/LSystem/symbol.h @@ -0,0 +1,15 @@ +#ifndef SYMBOL_H +#define SYMBOL_H + + +class Symbol +{ +public: + Symbol(char, int); + char m_refCharacter; + int iteration; + Symbol *next; + Symbol *prev; +}; + +#endif // SYMBOL_H diff --git a/src/LSystem/turtle.cpp b/src/LSystem/turtle.cpp new file mode 100644 index 00000000..00194265 --- /dev/null +++ b/src/LSystem/turtle.cpp @@ -0,0 +1,74 @@ +#include "turtle.h" + +Turtle::Turtle() : m_takeRandomRotationsForward(true), m_rotateAngle(30.f) +{ + m_Position = glm::vec3(200.0f, 133.0f, 200.0f); + m_forward = glm::vec3(1.0, 0.0, 0.0); + m_Right = glm::vec3(0.0, 0.0, 1.0); + m_Up = glm::vec3(0.0, 1.0, 0.0); + lambda = 20.0f; + radius = 10.0f; +} + +Turtle::Turtle(glm::vec3 a_pos, glm::vec3 a_right, glm::vec3 a_up, glm::vec3 a_foward, float a_lambda, float a_radius) : m_Position(a_pos), + m_forward(a_foward), m_Right(a_right), m_Up(a_up), lambda(a_lambda), radius(a_radius), m_takeRandomRotationsForward(true), m_rotateAngle(30.f) +{} + +Turtle::Turtle(const Turtle &a_turtle) +{ + m_Position = a_turtle.m_Position; + m_forward = a_turtle.m_forward; + m_Right = a_turtle.m_Right; + m_Up = a_turtle.m_Up; + lambda = a_turtle.lambda; + radius = a_turtle.radius; + m_takeRandomRotationsForward = a_turtle.m_takeRandomRotationsForward; + m_rotateAngle = a_turtle.m_rotateAngle; + TurtleStates = a_turtle.TurtleStates; + + +} + +// Generate a random degree and rotate the direction to the left +void Turtle::TurnLeft(){ + glm::mat4 rotMax = glm::rotate(glm::mat4(), glm::radians( -1 * m_rotateAngle),m_Up); + m_forward = glm::vec3(glm::vec4(m_forward, 0) * rotMax); + m_Right = glm::vec3(glm::vec4(m_Right , 0) * rotMax); +} + +// Generate a random degree and rotate the direction to the right +void Turtle::TurnRight(){ + glm::mat4 rotMax = glm::rotate(glm::mat4(), glm::radians(m_rotateAngle),m_Up); + m_forward = glm::vec3(glm::vec4(m_forward, 0) * rotMax); + m_Right = glm::vec3(glm::vec4(m_Right , 0) * rotMax); +} + +// Draw a line along the direction in certain distance and carve out the terrain nearby +void Turtle::Move(){ + if(m_takeRandomRotationsForward) + { + float randomDegree = rand() % 19 + (-9); + glm::mat4 rotMax = glm::rotate(glm::mat4(), glm::radians(randomDegree),m_Up); + m_forward = glm::vec3(glm::vec4(m_forward, 0) * rotMax); + m_Right = glm::vec3(glm::vec4(m_Right , 0) * rotMax); + } + float amount = lambda; + m_Position += amount * m_forward; +} + +// Pop out the position and direction out of the stack +void Turtle::LoadState(){ + Turtle State = TurtleStates.back(); + TurtleStates.pop_back(); + this->m_Position = State.m_Position; + this->m_forward = State.m_forward; + this->m_Right = State.m_Right; + this->m_Up = State.m_Up; + this->lambda = State.lambda; + this->radius = State.radius ; +} + +// Push the position and direction into the stack +void Turtle::SaveState(){ + TurtleStates.push_back(*this); +} diff --git a/src/LSystem/turtle.h b/src/LSystem/turtle.h new file mode 100644 index 00000000..70cee0ce --- /dev/null +++ b/src/LSystem/turtle.h @@ -0,0 +1,33 @@ +#pragma once +#ifndef TURTLE_H +#define TURTLE_H +#include +#include +#include +class Turtle +{ +private: + +public: + Turtle(); + Turtle(glm::vec3 a_pos, glm::vec3 a_right, glm::vec3 a_up, glm::vec3 a_foward, float a_lambda = 20, float a_radius = 10); + Turtle(const Turtle&); + glm::vec3 m_Position; + glm::vec3 m_forward; + glm::vec3 m_Right; + glm::vec3 m_Up; + std::vector TurtleStates; + float lambda; //Storing Length for Carving Terrain + float radius; //Storing radius for Carving Terrain + + bool m_takeRandomRotationsForward; + float m_rotateAngle; + void TurnLeft(); + void TurnRight(); + void Move(); +// void drawThinerLines(); + void LoadState(); + void SaveState(); +}; + +#endif // TURTLE_H diff --git a/src/MeshLoading/BVH.cpp b/src/MeshLoading/BVH.cpp new file mode 100644 index 00000000..b733a99b --- /dev/null +++ b/src/MeshLoading/BVH.cpp @@ -0,0 +1,41 @@ +#include "BVH.h" + + +const glm::vec3 BVH::planeSetNormals[BVH::kNumPlaneSetNormals] = { + glm::vec3(1, 0, 0), + glm::vec3(0, 1, 0), + glm::vec3(0, 0, 1), + glm::vec3(sqrtf(3) / 3.f, sqrtf(3) / 3.f, sqrtf(3) / 3.f), + glm::vec3(-sqrtf(3) / 3.f, sqrtf(3) / 3.f, sqrtf(3) / 3.f), + glm::vec3(-sqrtf(3) / 3.f, -sqrtf(3) / 3.f, sqrtf(3) / 3.f), + glm::vec3(sqrtf(3) / 3.f, -sqrtf(3) / 3.f, sqrtf(3) / 3.f) }; + +BVH::~BVH(){} + +void computeBounds(int triangleCount, glm::vec4* Triangle_Point_Normals, const glm::vec3& planeNormal, float& dnear, float& dfar, Geom &geom) +{ + + float d; + for (uint32_t i = 0; i < triangleCount; ++i) { + for (int j = 0; j < 3; j++) + { + glm::vec3 transformedPoint = glm::vec3(geom.transform * Triangle_Point_Normals[6 * i + 2 * j]); + d = glm::dot(planeNormal, transformedPoint); + if (d < dnear) dnear = d; + if (d > dfar) dfar = d; + } + } +} + +void BVH::GetBounds(float* getBounds, Geom &geom) +{ + for (uint8_t j = 0; j < this->kNumPlaneSetNormals; ++j) { + computeBounds(this->triangleCount, this->Triangle_Point_Normals, this->planeSetNormals[j], this->extents.d[j][0], this->extents.d[j][1], geom); + } + + for (int i = 0; i < this->kNumPlaneSetNormals; i++) + { + getBounds[2 *i] = this->extents.d[i][0]; + getBounds[2 * i + 1] = this->extents.d[i][1]; + } +} \ No newline at end of file diff --git a/src/MeshLoading/BVH.h b/src/MeshLoading/BVH.h new file mode 100644 index 00000000..6c286216 --- /dev/null +++ b/src/MeshLoading/BVH.h @@ -0,0 +1,36 @@ +#pragma once + +#include "glm/glm.hpp" +#include "glm/gtx/norm.hpp" +#include "../sceneStructs.h" + + +class BVH +{ + +public: + static const int kNumPlaneSetNormals = 7; + + static const glm::vec3 planeSetNormals[kNumPlaneSetNormals]; + struct Extents + { + float d[kNumPlaneSetNormals][2]; + Extents() + { + for (uint8_t i = 0; i < kNumPlaneSetNormals; ++i) + d[i][0] = INT_MAX, d[i][1] = INT_MIN; + } + }; + + Extents extents; + int triangleCount; + glm::vec4 *Triangle_Point_Normals; + BVH(int a_triangleCount, glm::vec4* a_Triangle_Point_Normals) + { + triangleCount = a_triangleCount; + Triangle_Point_Normals = a_Triangle_Point_Normals; + } + void GetBounds(float *getBounds, Geom &geom); + ~BVH(); +}; + diff --git a/src/MeshLoading/polygon.cpp b/src/MeshLoading/polygon.cpp new file mode 100644 index 00000000..d5c34af2 --- /dev/null +++ b/src/MeshLoading/polygon.cpp @@ -0,0 +1,160 @@ +#include "polygon.h" +#include + +void Polygon::Triangulate() +{ + //TODO: Populate list of triangles + + Triangle temp_triangle; + int num_vertices = this->m_verts.size(); + for(int i=0; i< num_vertices -2; i++) + { + temp_triangle.m_indices[0] = 0; + temp_triangle.m_indices[1] = i+1; + temp_triangle.m_indices[2] = i+2; + this->m_tris.push_back(temp_triangle); + } +} + +//// Creates a polygon from the input list of vertex positions and colors +//Polygon::Polygon(char* name, const std::vector& pos, const std::vector& col) +// : m_tris(), m_verts(), m_name(name), mp_texture(nullptr), mp_normalMap(nullptr) +//{ +// for(unsigned int i = 0; i < pos.size(); i++) +// { +// m_verts.push_back(Vertex(pos[i], col[i], glm::vec4(), glm::vec2())); +// } +// Triangulate(); +//} + + +//Use the one above if using textures +// Creates a polygon from the input list of vertex positions and colors +Polygon::Polygon(char* name, const std::vector& pos, const std::vector& col) + : m_tris(), m_verts(), m_name(name) +{ + for (unsigned int i = 0; i < pos.size(); i++) + { + m_verts.push_back(Vertex(pos[i], col[i], glm::vec4(), glm::vec2())); + } + Triangulate(); +} + + +//// Creates a regular polygon with a number of sides indicated by the "sides" input integer. +//// All of its vertices are of color "color", and the polygon is centered at "pos". +//// It is rotated about its center by "rot" degrees, and is scaled from its center by "scale" units +//Polygon::Polygon(const QString& name, int sides, glm::vec3 color, glm::vec4 pos, float rot, glm::vec4 scale) +// : m_tris(), m_verts(), m_name(name), mp_texture(nullptr), mp_normalMap(nullptr) +//{ +// glm::vec4 v(0.f, 1.f, 0.f, 1.f); +// float angle = 360.f / sides; +// for(int i = 0; i < sides; i++) +// { +// glm::vec4 vert_pos = glm::translate(glm::vec3(pos.x, pos.y, pos.z)) +// * glm::rotate(rot, glm::vec3(0.f, 0.f, 1.f)) +// * glm::scale(glm::vec3(scale.x, scale.y, scale.z)) +// * glm::rotate(i * angle, glm::vec3(0.f, 0.f, 1.f)) +// * v; +// m_verts.push_back(Vertex(vert_pos, color, glm::vec4(), glm::vec2())); +// } +// +// Triangulate(); +//} +// +//Polygon::Polygon(const QString &name) +// : m_tris(), m_verts(), m_name(name), mp_texture(nullptr), mp_normalMap(nullptr) +//{} + +Polygon::Polygon(char *name) + : m_tris(), m_verts(), m_name(name) +{} + +//Polygon::Polygon() +// : m_tris(), m_verts(), m_name("Polygon"), mp_texture(nullptr), mp_normalMap(nullptr) +//{} + +Polygon::Polygon() + : m_tris(), m_verts(), m_name("Polygon") +{} + +Polygon::Polygon(const Polygon& p) + : m_tris(p.m_tris), m_verts(p.m_verts), m_name(p.m_name) +{ +} + +//Delete the once above use this +//Polygon::Polygon(const Polygon& p) +// : m_tris(p.m_tris), m_verts(p.m_verts), m_name(p.m_name), mp_texture(nullptr), mp_normalMap(nullptr) +//{ +// if(p.mp_texture != nullptr) +// { +// mp_texture = new QImage(*p.mp_texture); +// } +// if(p.mp_normalMap != nullptr) +// { +// mp_normalMap = new QImage(*p.mp_normalMap); +// } +//} + +Polygon::~Polygon() +{ + //delete mp_texture; +} + +//void Polygon::SetTexture(QImage* i) +//{ +// mp_texture = i; +//} + +//void Polygon::SetNormalMap(QImage* i) +//{ +// mp_normalMap = i; +//} + +void Polygon::AddTriangle(const Triangle& t) +{ + m_tris.push_back(t); +} + +void Polygon::AddVertex(const Vertex& v) +{ + m_verts.push_back(v); +} + +void Polygon::ClearTriangles() +{ + m_tris.clear(); +} + +Triangle& Polygon::TriAt(unsigned int i) +{ + return m_tris[i]; +} + +Triangle Polygon::TriAt(unsigned int i) const +{ + return m_tris[i]; +} + +Vertex &Polygon::VertAt(unsigned int i) +{ + return m_verts[i]; +} + +Vertex Polygon::VertAt(unsigned int i) const +{ + return m_verts[i]; +} +// +//glm::vec3 GetImageColor(const glm::vec2 &uv_coord, const QImage* const image) +//{ +// if(image) +// { +// int X = glm::min(image->width() * uv_coord.x, image->width() - 1.0f); +// int Y = glm::min(image->height() * (1.0f - uv_coord.y), image->height() - 1.0f); +// QColor color = image->pixel(X, Y); +// return glm::vec3(color.red(), color.green(), color.blue()); +// } +// return glm::vec3(255.f, 255.f, 255.f); +//} diff --git a/src/MeshLoading/polygon.h b/src/MeshLoading/polygon.h new file mode 100644 index 00000000..62eae92b --- /dev/null +++ b/src/MeshLoading/polygon.h @@ -0,0 +1,85 @@ +#pragma once +#include +#include +#include + +// A Vertex is a point in space that defines one corner of a polygon. +// Each Vertex has several attributes that determine how they contribute to the +// appearance of their Polygon, such as coloration. +struct Vertex +{ + glm::vec4 m_pos; // The position of the vertex. In hw02, this is in pixel space. + glm::vec3 m_color; // The color of the vertex. X corresponds to Red, Y corresponds to Green, and Z corresponds to Blue. + glm::vec4 m_normal; // The surface normal of the vertex (not yet used) + glm::vec2 m_uv; // The texture coordinates of the vertex (not yet used) + + Vertex(glm::vec4 p, glm::vec3 c, glm::vec4 n, glm::vec2 u) + : m_pos(p), m_color(c), m_normal(n), m_uv(u) + {} +}; + +struct BoundingBox +{ +public: + float m_ymin, m_ymax, m_xmax, m_xmin; +}; + +// Each Polygon can be decomposed into triangles that fill its area. +struct Triangle +{ + // The indices of the Vertices that make up this triangle. + // The indices correspond to the std::vector of Vertices stored in the Polygon + // which stores this Triangle + unsigned int m_indices[3]; + BoundingBox m_boundBox; +}; + +class Polygon +{ +public: + // TODO: Populate this list of triangles in Triangulate() + std::vector m_tris; + // The list of Vertices that define this polygon. This is already filled by the Polygon constructor. + std::vector m_verts; + // The name of this polygon, primarily to help you debug + char* m_name; + // The image that can be read to determine pixel color when used in conjunction with UV coordinates + // Not used until homework 3. + //QImage* mp_texture; + // The image that can be read to determine surface normal offset when used in conjunction with UV coordinates + // Not used until homework 3 + //QImage* mp_normalMap; + + // Polygon class constructors + Polygon(char *name, const std::vector& pos, const std::vector &col); + //Polygon(const QString& name, int sides, glm::vec3 color, glm::vec4 pos, float rot, glm::vec4 scale); + Polygon(char *name); + Polygon(); + Polygon(const Polygon& p); + ~Polygon(); + + // TODO: Complete the body of Triangulate() in polygon.cpp + // Creates a set of triangles that, when combined, fill the area of this convex polygon. + void Triangulate(); + + //// Copies the input QImage into this Polygon's texture + //void SetTexture(QImage*); + + //// Copies the input QImage into this Polygon's normal map + //void SetNormalMap(QImage*); + + // Various getter, setter, and adder functions + void AddVertex(const Vertex&); + void AddTriangle(const Triangle&); + void ClearTriangles(); + + Triangle& TriAt(unsigned int); + Triangle TriAt(unsigned int) const; + + Vertex& VertAt(unsigned int); + Vertex VertAt(unsigned int) const; +}; + +// Returns the color of the pixel in the image at the specified texture coordinates. +// Returns white if the image is a null pointer +//glm::vec3 GetImageColor(const glm::vec2 &uv_coord, const QImage* const image); diff --git a/src/MeshLoading/tiny_obj_loader.cc b/src/MeshLoading/tiny_obj_loader.cc new file mode 100644 index 00000000..bc53fd41 --- /dev/null +++ b/src/MeshLoading/tiny_obj_loader.cc @@ -0,0 +1,871 @@ +// +// Copyright 2012-2015, Syoyo Fujita. +// +// Licensed under 2-clause BSD liecense. +// + +// +// version 0.9.9: Replace atof() with custom parser. +// version 0.9.8: Fix multi-materials(per-face material ID). +// version 0.9.7: Support multi-materials(per-face material ID) per +// object/group. +// version 0.9.6: Support Ni(index of refraction) mtl parameter. +// Parse transmittance material parameter correctly. +// version 0.9.5: Parse multiple group name. +// Add support of specifying the base path to load material file. +// version 0.9.4: Initial suupport of group tag(g) +// version 0.9.3: Fix parsing triple 'x/y/z' +// version 0.9.2: Add more .mtl load support +// version 0.9.1: Add initial .mtl load support +// version 0.9.0: Initial +// + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include "tiny_obj_loader.h" + +namespace tinyobj { + +struct vertex_index { + int v_idx, vt_idx, vn_idx; + vertex_index(){}; + vertex_index(int idx) : v_idx(idx), vt_idx(idx), vn_idx(idx){}; + vertex_index(int vidx, int vtidx, int vnidx) + : v_idx(vidx), vt_idx(vtidx), vn_idx(vnidx){}; +}; +// for std::map +static inline bool operator<(const vertex_index &a, const vertex_index &b) { + if (a.v_idx != b.v_idx) + return (a.v_idx < b.v_idx); + if (a.vn_idx != b.vn_idx) + return (a.vn_idx < b.vn_idx); + if (a.vt_idx != b.vt_idx) + return (a.vt_idx < b.vt_idx); + + return false; +} + +struct obj_shape { + std::vector v; + std::vector vn; + std::vector vt; +}; + +static inline bool isSpace(const char c) { return (c == ' ') || (c == '\t'); } + +static inline bool isNewLine(const char c) { + return (c == '\r') || (c == '\n') || (c == '\0'); +} + +// Make index zero-base, and also support relative index. +static inline int fixIndex(int idx, int n) { + if (idx > 0) return idx - 1; + if (idx == 0) return 0; + return n + idx; // negative value = relative +} + +static inline std::string parseString(const char *&token) { + std::string s; + token += strspn(token, " \t"); + size_t e = strcspn(token, " \t\r"); + s = std::string(token, &token[e]); + token += e; + return s; +} + +static inline int parseInt(const char *&token) { + token += strspn(token, " \t"); + int i = atoi(token); + token += strcspn(token, " \t\r"); + return i; +} + + +// Tries to parse a floating point number located at s. +// +// s_end should be a location in the string where reading should absolutely +// stop. For example at the end of the string, to prevent buffer overflows. +// +// Parses the following EBNF grammar: +// sign = "+" | "-" ; +// END = ? anything not in digit ? +// digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" ; +// integer = [sign] , digit , {digit} ; +// decimal = integer , ["." , integer] ; +// float = ( decimal , END ) | ( decimal , ("E" | "e") , integer , END ) ; +// +// Valid strings are for example: +// -0 +3.1417e+2 -0.0E-3 1.0324 -1.41 11e2 +// +// If the parsing is a success, result is set to the parsed value and true +// is returned. +// +// The function is greedy and will parse until any of the following happens: +// - a non-conforming character is encountered. +// - s_end is reached. +// +// The following situations triggers a failure: +// - s >= s_end. +// - parse failure. +// +static bool tryParseDouble(const char *s, const char *s_end, double *result) +{ + if (s >= s_end) + { + return false; + } + + double mantissa = 0.0; + // This exponent is base 2 rather than 10. + // However the exponent we parse is supposed to be one of ten, + // thus we must take care to convert the exponent/and or the + // mantissa to a * 2^E, where a is the mantissa and E is the + // exponent. + // To get the final double we will use ldexp, it requires the + // exponent to be in base 2. + int exponent = 0; + + // NOTE: THESE MUST BE DECLARED HERE SINCE WE ARE NOT ALLOWED + // TO JUMP OVER DEFINITIONS. + char sign = '+'; + char exp_sign = '+'; + char const *curr = s; + + // How many characters were read in a loop. + int read = 0; + // Tells whether a loop terminated due to reaching s_end. + bool end_not_reached = false; + + /* + BEGIN PARSING. + */ + + // Find out what sign we've got. + if (*curr == '+' || *curr == '-') + { + sign = *curr; + curr++; + } + else if (isdigit(*curr)) { /* Pass through. */ } + else + { + goto fail; + } + + // Read the integer part. + while ((end_not_reached = (curr != s_end)) && isdigit(*curr)) + { + mantissa *= 10; + mantissa += static_cast(*curr - 0x30); + curr++; read++; + } + + // We must make sure we actually got something. + if (read == 0) + goto fail; + // We allow numbers of form "#", "###" etc. + if (!end_not_reached) + goto assemble; + + // Read the decimal part. + if (*curr == '.') + { + curr++; + read = 1; + while ((end_not_reached = (curr != s_end)) && isdigit(*curr)) + { + // NOTE: Don't use powf here, it will absolutely murder precision. + mantissa += static_cast(*curr - 0x30) * pow(10, -read); + read++; curr++; + } + } + else if (*curr == 'e' || *curr == 'E') {} + else + { + goto assemble; + } + + if (!end_not_reached) + goto assemble; + + // Read the exponent part. + if (*curr == 'e' || *curr == 'E') + { + curr++; + // Figure out if a sign is present and if it is. + if ((end_not_reached = (curr != s_end)) && (*curr == '+' || *curr == '-')) + { + exp_sign = *curr; + curr++; + } + else if (isdigit(*curr)) { /* Pass through. */ } + else + { + // Empty E is not allowed. + goto fail; + } + + read = 0; + while ((end_not_reached = (curr != s_end)) && isdigit(*curr)) + { + exponent *= 10; + exponent += static_cast(*curr - 0x30); + curr++; read++; + } + exponent *= (exp_sign == '+'? 1 : -1); + if (read == 0) + goto fail; + } + +assemble: + *result = (sign == '+'? 1 : -1) * ldexp(mantissa * pow(5, exponent), exponent); + return true; +fail: + return false; +} +static inline float parseFloat(const char *&token) { + token += strspn(token, " \t"); +#ifdef TINY_OBJ_LOADER_OLD_FLOAT_PARSER + float f = (float)atof(token); + token += strcspn(token, " \t\r"); +#else + const char *end = token + strcspn(token, " \t\r"); + double val = 0.0; + tryParseDouble(token, end, &val); + float f = static_cast(val); + token = end; +#endif + return f; +} + + +static inline void parseFloat2(float &x, float &y, const char *&token) { + x = parseFloat(token); + y = parseFloat(token); +} + +static inline void parseFloat3(float &x, float &y, float &z, + const char *&token) { + x = parseFloat(token); + y = parseFloat(token); + z = parseFloat(token); +} + +// Parse triples: i, i/j/k, i//k, i/j +static vertex_index parseTriple(const char *&token, int vsize, int vnsize, + int vtsize) { + vertex_index vi(-1); + + vi.v_idx = fixIndex(atoi(token), vsize); + token += strcspn(token, "/ \t\r"); + if (token[0] != '/') { + return vi; + } + token++; + + // i//k + if (token[0] == '/') { + token++; + vi.vn_idx = fixIndex(atoi(token), vnsize); + token += strcspn(token, "/ \t\r"); + return vi; + } + + // i/j/k or i/j + vi.vt_idx = fixIndex(atoi(token), vtsize); + token += strcspn(token, "/ \t\r"); + if (token[0] != '/') { + return vi; + } + + // i/j/k + token++; // skip '/' + vi.vn_idx = fixIndex(atoi(token), vnsize); + token += strcspn(token, "/ \t\r"); + return vi; +} + +static unsigned int +updateVertex(std::map &vertexCache, + std::vector &positions, std::vector &normals, + std::vector &texcoords, + const std::vector &in_positions, + const std::vector &in_normals, + const std::vector &in_texcoords, const vertex_index &i) { + const std::map::iterator it = vertexCache.find(i); + + if (it != vertexCache.end()) { + // found cache + return it->second; + } + + assert(in_positions.size() > (unsigned int)(3 * i.v_idx + 2)); + + positions.push_back(in_positions[3 * i.v_idx + 0]); + positions.push_back(in_positions[3 * i.v_idx + 1]); + positions.push_back(in_positions[3 * i.v_idx + 2]); + + if (i.vn_idx >= 0) { + normals.push_back(in_normals[3 * i.vn_idx + 0]); + normals.push_back(in_normals[3 * i.vn_idx + 1]); + normals.push_back(in_normals[3 * i.vn_idx + 2]); + } + + if (i.vt_idx >= 0) { + texcoords.push_back(in_texcoords[2 * i.vt_idx + 0]); + texcoords.push_back(in_texcoords[2 * i.vt_idx + 1]); + } + + unsigned int idx = static_cast(positions.size() / 3 - 1); + vertexCache[i] = idx; + + return idx; +} + +void InitMaterial(material_t &material) { + material.name = ""; + material.ambient_texname = ""; + material.diffuse_texname = ""; + material.specular_texname = ""; + material.normal_texname = ""; + for (int i = 0; i < 3; i++) { + material.ambient[i] = 0.f; + material.diffuse[i] = 0.f; + material.specular[i] = 0.f; + material.transmittance[i] = 0.f; + material.emission[i] = 0.f; + } + material.illum = 0; + material.dissolve = 1.f; + material.shininess = 1.f; + material.ior = 1.f; + material.unknown_parameter.clear(); +} + +static bool exportFaceGroupToShape( + shape_t &shape, std::map vertexCache, + const std::vector &in_positions, + const std::vector &in_normals, + const std::vector &in_texcoords, + const std::vector > &faceGroup, + const int material_id, const std::string &name, bool clearCache) { + if (faceGroup.empty()) { + return false; + } + + // Flatten vertices and indices + for (size_t i = 0; i < faceGroup.size(); i++) { + const std::vector &face = faceGroup[i]; + + vertex_index i0 = face[0]; + vertex_index i1(-1); + vertex_index i2 = face[1]; + + size_t npolys = face.size(); + + // Polygon -> triangle fan conversion + for (size_t k = 2; k < npolys; k++) { + i1 = i2; + i2 = face[k]; + + unsigned int v0 = updateVertex( + vertexCache, shape.mesh.positions, shape.mesh.normals, + shape.mesh.texcoords, in_positions, in_normals, in_texcoords, i0); + unsigned int v1 = updateVertex( + vertexCache, shape.mesh.positions, shape.mesh.normals, + shape.mesh.texcoords, in_positions, in_normals, in_texcoords, i1); + unsigned int v2 = updateVertex( + vertexCache, shape.mesh.positions, shape.mesh.normals, + shape.mesh.texcoords, in_positions, in_normals, in_texcoords, i2); + + shape.mesh.indices.push_back(v0); + shape.mesh.indices.push_back(v1); + shape.mesh.indices.push_back(v2); + + shape.mesh.material_ids.push_back(material_id); + } + } + + shape.name = name; + + if (clearCache) + vertexCache.clear(); + + return true; +} + +std::string LoadMtl(std::map &material_map, + std::vector &materials, + std::istream &inStream) { + std::stringstream err; + + material_t material; + + int maxchars = 8192; // Alloc enough size. + std::vector buf(maxchars); // Alloc enough size. + while (inStream.peek() != -1) { + inStream.getline(&buf[0], maxchars); + + std::string linebuf(&buf[0]); + + // Trim newline '\r\n' or '\n' + if (linebuf.size() > 0) { + if (linebuf[linebuf.size() - 1] == '\n') + linebuf.erase(linebuf.size() - 1); + } + if (linebuf.size() > 0) { + if (linebuf[linebuf.size() - 1] == '\r') + linebuf.erase(linebuf.size() - 1); + } + + // Skip if empty line. + if (linebuf.empty()) { + continue; + } + + // Skip leading space. + const char *token = linebuf.c_str(); + token += strspn(token, " \t"); + + assert(token); + if (token[0] == '\0') + continue; // empty line + + if (token[0] == '#') + continue; // comment line + + // new mtl + if ((0 == strncmp(token, "newmtl", 6)) && isSpace((token[6]))) { + // flush previous material. + if (!material.name.empty()) { + material_map.insert( + std::pair(material.name, static_cast(materials.size()))); + materials.push_back(material); + } + + // initial temporary material + InitMaterial(material); + + // set new mtl name + char namebuf[4096]; + token += 7; +#ifdef _MSC_VER + sscanf_s(token, "%s", namebuf); +#else + sscanf(token, "%s", namebuf); +#endif + material.name = namebuf; + continue; + } + + // ambient + if (token[0] == 'K' && token[1] == 'a' && isSpace((token[2]))) { + token += 2; + float r, g, b; + parseFloat3(r, g, b, token); + material.ambient[0] = r; + material.ambient[1] = g; + material.ambient[2] = b; + continue; + } + + // diffuse + if (token[0] == 'K' && token[1] == 'd' && isSpace((token[2]))) { + token += 2; + float r, g, b; + parseFloat3(r, g, b, token); + material.diffuse[0] = r; + material.diffuse[1] = g; + material.diffuse[2] = b; + continue; + } + + // specular + if (token[0] == 'K' && token[1] == 's' && isSpace((token[2]))) { + token += 2; + float r, g, b; + parseFloat3(r, g, b, token); + material.specular[0] = r; + material.specular[1] = g; + material.specular[2] = b; + continue; + } + + // transmittance + if (token[0] == 'K' && token[1] == 't' && isSpace((token[2]))) { + token += 2; + float r, g, b; + parseFloat3(r, g, b, token); + material.transmittance[0] = r; + material.transmittance[1] = g; + material.transmittance[2] = b; + continue; + } + + // ior(index of refraction) + if (token[0] == 'N' && token[1] == 'i' && isSpace((token[2]))) { + token += 2; + material.ior = parseFloat(token); + continue; + } + + // emission + if (token[0] == 'K' && token[1] == 'e' && isSpace(token[2])) { + token += 2; + float r, g, b; + parseFloat3(r, g, b, token); + material.emission[0] = r; + material.emission[1] = g; + material.emission[2] = b; + continue; + } + + // shininess + if (token[0] == 'N' && token[1] == 's' && isSpace(token[2])) { + token += 2; + material.shininess = parseFloat(token); + continue; + } + + // illum model + if (0 == strncmp(token, "illum", 5) && isSpace(token[5])) { + token += 6; + material.illum = parseInt(token); + continue; + } + + // dissolve + if ((token[0] == 'd' && isSpace(token[1]))) { + token += 1; + material.dissolve = parseFloat(token); + continue; + } + if (token[0] == 'T' && token[1] == 'r' && isSpace(token[2])) { + token += 2; + material.dissolve = parseFloat(token); + continue; + } + + // ambient texture + if ((0 == strncmp(token, "map_Ka", 6)) && isSpace(token[6])) { + token += 7; + material.ambient_texname = token; + continue; + } + + // diffuse texture + if ((0 == strncmp(token, "map_Kd", 6)) && isSpace(token[6])) { + token += 7; + material.diffuse_texname = token; + continue; + } + + // specular texture + if ((0 == strncmp(token, "map_Ks", 6)) && isSpace(token[6])) { + token += 7; + material.specular_texname = token; + continue; + } + + // normal texture + if ((0 == strncmp(token, "map_Ns", 6)) && isSpace(token[6])) { + token += 7; + material.normal_texname = token; + continue; + } + + // unknown parameter + const char *_space = strchr(token, ' '); + if (!_space) { + _space = strchr(token, '\t'); + } + if (_space) { + std::ptrdiff_t len = _space - token; + std::string key(token, len); + std::string value = _space + 1; + material.unknown_parameter.insert( + std::pair(key, value)); + } + } + // flush last material. + material_map.insert( + std::pair(material.name, static_cast(materials.size()))); + materials.push_back(material); + + return err.str(); +} + +std::string MaterialFileReader::operator()(const std::string &matId, + std::vector &materials, + std::map &matMap) { + std::string filepath; + + if (!m_mtlBasePath.empty()) { + filepath = std::string(m_mtlBasePath) + matId; + } else { + filepath = matId; + } + + std::ifstream matIStream(filepath.c_str()); + return LoadMtl(matMap, materials, matIStream); +} + +std::string LoadObj(std::vector &shapes, + std::vector &materials, // [output] + const char *filename, const char *mtl_basepath) { + + shapes.clear(); + + std::stringstream err; + + std::ifstream ifs(filename); + if (!ifs) { + err << "Cannot open file [" << filename << "]" << std::endl; + return err.str(); + } + + std::string basePath; + if (mtl_basepath) { + basePath = mtl_basepath; + } + MaterialFileReader matFileReader(basePath); + + return LoadObj(shapes, materials, ifs, matFileReader); +} + +std::string LoadObj(std::vector &shapes, + std::vector &materials, // [output] + std::istream &inStream, MaterialReader &readMatFn) { + std::stringstream err; + + std::vector v; + std::vector vn; + std::vector vt; + std::vector > faceGroup; + std::string name; + + // material + std::map material_map; + std::map vertexCache; + int material = -1; + + shape_t shape; + + int maxchars = 8192; // Alloc enough size. + std::vector buf(maxchars); // Alloc enough size. + while (inStream.peek() != -1) { + inStream.getline(&buf[0], maxchars); + + std::string linebuf(&buf[0]); + + // Trim newline '\r\n' or '\n' + if (linebuf.size() > 0) { + if (linebuf[linebuf.size() - 1] == '\n') + linebuf.erase(linebuf.size() - 1); + } + if (linebuf.size() > 0) { + if (linebuf[linebuf.size() - 1] == '\r') + linebuf.erase(linebuf.size() - 1); + } + + // Skip if empty line. + if (linebuf.empty()) { + continue; + } + + // Skip leading space. + const char *token = linebuf.c_str(); + token += strspn(token, " \t"); + + assert(token); + if (token[0] == '\0') + continue; // empty line + + if (token[0] == '#') + continue; // comment line + + // vertex + if (token[0] == 'v' && isSpace((token[1]))) { + token += 2; + float x, y, z; + parseFloat3(x, y, z, token); + v.push_back(x); + v.push_back(y); + v.push_back(z); + continue; + } + + // normal + if (token[0] == 'v' && token[1] == 'n' && isSpace((token[2]))) { + token += 3; + float x, y, z; + parseFloat3(x, y, z, token); + vn.push_back(x); + vn.push_back(y); + vn.push_back(z); + continue; + } + + // texcoord + if (token[0] == 'v' && token[1] == 't' && isSpace((token[2]))) { + token += 3; + float x, y; + parseFloat2(x, y, token); + vt.push_back(x); + vt.push_back(y); + continue; + } + + // face + if (token[0] == 'f' && isSpace((token[1]))) { + token += 2; + token += strspn(token, " \t"); + + std::vector face; + while (!isNewLine(token[0])) { + vertex_index vi = + parseTriple(token, static_cast(v.size() / 3), static_cast(vn.size() / 3), static_cast(vt.size() / 2)); + face.push_back(vi); + size_t n = strspn(token, " \t\r"); + token += n; + } + + faceGroup.push_back(face); + + continue; + } + + + + //Uncomment Afterwards + // use mtl +// if ((0 == strncmp(token, "usemtl", 6)) && isSpace((token[6]))) { +// +// char namebuf[4096]; +// token += 7; +//#ifdef _MSC_VER +// sscanf_s(token, "%s", namebuf); +//#else +// sscanf(token, "%s", namebuf); +//#endif +// +// // Create face group per material. +// bool ret = exportFaceGroupToShape(shape, vertexCache, v, vn, vt, +// faceGroup, material, name, true); +// if (ret) { +// faceGroup.clear(); +// } +// +// if (material_map.find(namebuf) != material_map.end()) { +// material = material_map[namebuf]; +// } else { +// // { error!! material not found } +// material = -1; +// } +// +// continue; +// } + + // load mtl +// if ((0 == strncmp(token, "mtllib", 6)) && isSpace((token[6]))) { +// char namebuf[4096]; +// token += 7; +//#ifdef _MSC_VER +// sscanf_s(token, "%s", namebuf); +//#else +// sscanf(token, "%s", namebuf); +//#endif +// +// std::string err_mtl = readMatFn(namebuf, materials, material_map); +// if (!err_mtl.empty()) { +// faceGroup.clear(); // for safety +// return err_mtl; +// } +// +// continue; +// } + + // group name + if (token[0] == 'g' && isSpace((token[1]))) { + + // flush previous face group. + bool ret = exportFaceGroupToShape(shape, vertexCache, v, vn, vt, + faceGroup, material, name, true); + if (ret) { + shapes.push_back(shape); + } + + shape = shape_t(); + + // material = -1; + faceGroup.clear(); + + std::vector names; + while (!isNewLine(token[0])) { + std::string str = parseString(token); + names.push_back(str); + token += strspn(token, " \t\r"); // skip tag + } + + assert(names.size() > 0); + + // names[0] must be 'g', so skip the 0th element. + if (names.size() > 1) { + name = names[1]; + } else { + name = ""; + } + + continue; + } + + // object name + if (token[0] == 'o' && isSpace((token[1]))) { + + // flush previous face group. + bool ret = exportFaceGroupToShape(shape, vertexCache, v, vn, vt, + faceGroup, material, name, true); + if (ret) { + shapes.push_back(shape); + } + + // material = -1; + faceGroup.clear(); + shape = shape_t(); + + // @todo { multiple object name? } + char namebuf[4096]; + token += 2; +#ifdef _MSC_VER + sscanf_s(token, "%s", namebuf); +#else + sscanf(token, "%s", namebuf); +#endif + name = std::string(namebuf); + + continue; + } + + // Ignore unknown command. + } + + bool ret = exportFaceGroupToShape(shape, vertexCache, v, vn, vt, faceGroup, + material, name, true); + if (ret) { + shapes.push_back(shape); + } + faceGroup.clear(); // for safety + + return err.str(); +} +} diff --git a/src/MeshLoading/tiny_obj_loader.h b/src/MeshLoading/tiny_obj_loader.h new file mode 100644 index 00000000..512f32ba --- /dev/null +++ b/src/MeshLoading/tiny_obj_loader.h @@ -0,0 +1,94 @@ +// +// Copyright 2012-2015, Syoyo Fujita. +// +// Licensed under 2-clause BSD liecense. +// +#ifndef _TINY_OBJ_LOADER_H +#define _TINY_OBJ_LOADER_H + +#include +#include +#include + +namespace tinyobj { + +typedef struct { + std::string name; + + float ambient[3]; + float diffuse[3]; + float specular[3]; + float transmittance[3]; + float emission[3]; + float shininess; + float ior; // index of refraction + float dissolve; // 1 == opaque; 0 == fully transparent + // illumination model (see http://www.fileformat.info/format/material/) + int illum; + + std::string ambient_texname; + std::string diffuse_texname; + std::string specular_texname; + std::string normal_texname; + std::map unknown_parameter; +} material_t; + +typedef struct { + std::vector positions; + std::vector normals; + std::vector texcoords; + std::vector indices; + std::vector material_ids; // per-mesh material ID +} mesh_t; + +typedef struct { + std::string name; + mesh_t mesh; +} shape_t; + +class MaterialReader { +public: + MaterialReader() {} + virtual ~MaterialReader() {} + + virtual std::string operator()(const std::string &matId, + std::vector &materials, + std::map &matMap) = 0; +}; + +class MaterialFileReader : public MaterialReader { +public: + MaterialFileReader(const std::string &mtl_basepath) + : m_mtlBasePath(mtl_basepath) {} + virtual ~MaterialFileReader() {} + virtual std::string operator()(const std::string &matId, + std::vector &materials, + std::map &matMap); + +private: + std::string m_mtlBasePath; +}; + +/// Loads .obj from a file. +/// 'shapes' will be filled with parsed shape data +/// The function returns error string. +/// Returns empty string when loading .obj success. +/// 'mtl_basepath' is optional, and used for base path for .mtl file. +std::string LoadObj(std::vector &shapes, // [output] + std::vector &materials, // [output] + const char *filename, const char *mtl_basepath = NULL); + +/// Loads object from a std::istream, uses GetMtlIStreamFn to retrieve +/// std::istream for materials. +/// Returns empty string when loading .obj success. +std::string LoadObj(std::vector &shapes, // [output] + std::vector &materials, // [output] + std::istream &inStream, MaterialReader &readMatFn); + +/// Loads materials into std::map +/// Returns an empty string if successful +std::string LoadMtl(std::map &material_map, + std::vector &materials, std::istream &inStream); +} + +#endif // _TINY_OBJ_LOADER_H diff --git a/src/ProceduralTextures.h b/src/ProceduralTextures.h new file mode 100644 index 00000000..1ffea201 --- /dev/null +++ b/src/ProceduralTextures.h @@ -0,0 +1,91 @@ +#pragma once +#include +#include +#include + +#include "sceneStructs.h" +#include "utilities.h" + +__device__ +float noise2D(glm::vec2 p) { + return glm::fract(glm::sin(glm::dot(p, glm::vec2(127.1, 311.7))) * + 43758.5453); +} + +__device__ +float interpNoise2D(float x, float y) { + int intX = int(floor(x)); + float fractX = glm::fract(x); + int intY = int(floor(y)); + float fractY = glm::fract(y); + + float v1 = noise2D(glm::vec2(intX, intY)); + float v2 = noise2D(glm::vec2(intX + 1, intY)); + float v3 = noise2D(glm::vec2(intX, intY + 1)); + float v4 = noise2D(glm::vec2(intX + 1, intY + 1)); + + float i1 = glm::mix(v1, v2, fractX); + float i2 = glm::mix(v3, v4, fractX); + return glm::mix(i1, i2, fractY); +} + +__device__ +float fbm(glm::vec2 p) { + float total = 0; + float persistence = 0.5f; + int octaves = 8; + float freq = 2.f; + float amp = 0.5f; + for (int i = 1; i <= octaves; i++) { + freq *= 2.f; + amp *= persistence; + + total += interpNoise2D(p.x * freq, + p.y * freq) * amp; + } + return total; +} + +__device__ +float pattern(glm::vec2 p) +{ + glm::vec2 q = glm::vec2(fbm(p + glm::vec2(0.0, 0.0)), + fbm(p + glm::vec2(5.2, 1.3))); + + glm::vec2 r = glm::vec2(fbm(p + 4.0f * q + glm::vec2(1.7, 9.2)), + fbm(p + 4.0f * q + glm::vec2(8.3, 2.8))); + + return fbm(p + 4.0f * r); +} +__device__ +glm::vec3 ProcColorValue2(double u, double v, const glm::vec3 p) { + glm::vec3 odd(0.98, 0.98, 0.98); + glm::vec3 even(0.1, 0.1, 0.1); + float pac = pattern(glm::vec2(u, v)) * 2; + + + if (pac > 0.6) + { + return glm::vec3(0.25f, 0.25f, 0.85f); + } + if (pac > 0.4) + { + return glm::vec3(0.85f, 0.2f, 0.3f); + } + + // return yellow + return glm::vec3(0.85, 0.67, 0.35); +} + + + +__device__ +glm::vec3 ProcColorValue(double u, double v, const glm::vec3 p) { + glm::vec3 odd(0.2, 0.3, 0.1); + glm::vec3 even(0.9, 0.9, 0.9); + auto sines = sin(10 * p.x) * sin(10 * p.y) * sin(10 * p.z); + if (sines < 0) + return odd; + else + return even; +} diff --git a/src/interactions.h b/src/interactions.h index f969e458..f32579ba 100644 --- a/src/interactions.h +++ b/src/interactions.h @@ -1,15 +1,15 @@ #pragma once #include "intersections.h" - +#include "ProceduralTextures.h" // CHECKITOUT /** * Computes a cosine-weighted random direction in a hemisphere. * Used for diffuse lighting. */ -__host__ __device__ +__device__ glm::vec3 calculateRandomDirectionInHemisphere( - glm::vec3 normal, thrust::default_random_engine &rng) { + glm::vec3 normal, thrust::default_random_engine& rng) { thrust::uniform_real_distribution u01(0, 1); float up = sqrt(u01(rng)); // cos(theta) @@ -24,9 +24,11 @@ glm::vec3 calculateRandomDirectionInHemisphere( glm::vec3 directionNotNormal; if (abs(normal.x) < SQRT_OF_ONE_THIRD) { directionNotNormal = glm::vec3(1, 0, 0); - } else if (abs(normal.y) < SQRT_OF_ONE_THIRD) { + } + else if (abs(normal.y) < SQRT_OF_ONE_THIRD) { directionNotNormal = glm::vec3(0, 1, 0); - } else { + } + else { directionNotNormal = glm::vec3(0, 0, 1); } @@ -41,6 +43,137 @@ glm::vec3 calculateRandomDirectionInHemisphere( + sin(around) * over * perpendicularDirection2; } + +__device__ float reflectance(double cosine, double ref_idx) { + // Use Schlick's approximation for reflectance. + auto r0 = (1 - ref_idx) / (1 + ref_idx); + r0 = r0 * r0; + return r0 + (1 - r0) * pow((1 - cosine), 5); +} + +__device__ double length_squared(glm::vec3 vec) { + return vec[0] * vec[0] + vec[1] * vec[1] + vec[2] * vec[2]; +} + +__device__ glm::vec3 refract(const glm::vec3& uv, const glm::vec3& n, float etai_over_etat) { + float cos_theta = fmin(glm::dot(-uv, n), 1.0f); + glm::vec3 r_out_perp = etai_over_etat * (uv + cos_theta * n); + glm::vec3 r_out_parallel = (float)-sqrt(fabs(1.0 - length_squared(r_out_perp))) * n; + return r_out_perp + r_out_parallel; +} + +#define DISTORTION 2.0f +#define GLOW 10.0f +#define SCALE 2.0f +#define AMBIENT 0.5f +__device__ glm::vec3 subsurfaceColor(const glm::vec3 lightDir, const glm::vec3 normal, const glm::vec3 viewVec, float thin, const Material& m) +{ + glm::vec3 scatterDir = lightDir + normal * DISTORTION; + float abc = glm::dot(viewVec, scatterDir); + float bcg = glm::dot(normal, lightDir); + float def = glm::dot(-viewVec, -lightDir); + float lightReacingEYE = glm::pow(glm::clamp(glm::dot(viewVec, scatterDir), 0.0f, 1.0f), GLOW) * SCALE; + float attenuation = glm::max(0.0f, glm::dot(normal, lightDir) + glm::dot(-viewVec, -lightDir)); + + float totalLight = attenuation * (lightReacingEYE + AMBIENT) * thin; + if (totalLight < 0.0f) + { + totalLight = totalLight; + } + glm::vec3 scolor = (m.color * totalLight); + //glm::vec3 lightColor = glm::vec3(0.98f, 0.2f, 0.5f); + return m.color *totalLight; +} + +__device__ +glm::vec3 DielectricScatter( + PathSegment& pathSegment, glm::vec3 intersect, glm::vec3 normal, const Material& m, thrust::default_random_engine& rng, const Camera& cam) { + + + + thrust::uniform_real_distribution u01(0, 1); + normal = glm::normalize(normal); + bool front_face = glm::dot(pathSegment.ray.direction, normal) < 0.0f; + double refraction_ratio = front_face ? (1.0f / m.indexOfRefraction) : m.indexOfRefraction; + + glm::vec3 unit_direction = glm::normalize(pathSegment.ray.direction); + double cos_theta = fmin(glm::dot(-unit_direction, normal), 1.0f); + double sin_theta = sqrt(1.0f - cos_theta * cos_theta); + + bool cannot_refract = refraction_ratio * sin_theta > 1.0f; + glm::vec3 direction; + + if (cannot_refract || reflectance(cos_theta, refraction_ratio) > u01(rng)) + { + float scale = m.hasRefractive <= 0.0 ? 0.0 : 1.0 / (1-m.hasRefractive); + direction = glm::reflect(unit_direction, normal); + if (!m.usingProcTex) + { + pathSegment.color *= m.color; + } + } + else + { + float scale = m.hasRefractive <= 0.0 ? 0.0 : 1.0 / (m.hasRefractive); + direction = refract(unit_direction, normal, u01(rng)); + if (m.specular.exponent > 0 && !m.usingProcTex) { + pathSegment.color *= m.specular.color ; + } + } + + return glm::normalize(direction); +} + +inline bool near_zero(glm::vec3 vec) { + // Return true if the vector is close to zero in all dimensions. + const auto s = 1e-8; + return (fabs(vec[0]) < EPSILON) && (fabs(vec[1]) < EPSILON) && (fabs(vec[2]) < EPSILON); +} + + +__device__ +void get_sphere_uv(const glm::vec3& p, double& u, double& v) { + // p: a given point on the sphere of radius one, centered at the origin. + // u: returned value [0,1] of angle around the Y axis from X=-1. + // v: returned value [0,1] of angle from Y=-1 to Y=+1. + // <1 0 0> yields <0.50 0.50> <-1 0 0> yields <0.00 0.50> + // <0 1 0> yields <0.50 1.00> < 0 -1 0> yields <0.50 0.00> + // <0 0 1> yields <0.25 0.50> < 0 0 -1> yields <0.75 0.50> + + glm::vec3 p_copy = p; + p_copy = glm::normalize(p_copy); + auto theta = acos(-p_copy.y); + auto phi = atan2(-p_copy.z, p_copy.x) + PI; + + u = phi / (2 * PI); + v = theta / PI; +} + +__device__ +void ColorProcTex(PathSegment& pathSegment, const Material& m, glm::vec3 intersect, const Camera& cam, const glm::vec3 normal) +{ + double u, v; + glm::vec3 testInter = intersect; + get_sphere_uv(intersect, u, v); + glm::vec3 subsurfaceCol = glm::vec3(0, 0, 0); + if (m.isSubSurface) + { + glm::vec3 viewVec = cam.view; + glm::vec3 scolor = subsurfaceColor(glm::normalize(pathSegment.ray.direction), normal, viewVec, 3, m); + subsurfaceCol = scolor; + } + switch (m.ProcTexNum) + { + case 1: + pathSegment.color *= subsurfaceCol + ProcColorValue(u, v, intersect); + case 2: + pathSegment.color *= subsurfaceCol + ProcColorValue2(u, v, intersect); + default: + break; + } + +} + /** * Scatter a ray with some probabilities according to the material properties. * For example, a diffuse surface scatters in a cosine-weighted hemisphere. @@ -66,14 +199,72 @@ glm::vec3 calculateRandomDirectionInHemisphere( * * You may need to change the parameter list for your purposes! */ -__host__ __device__ +__device__ void scatterRay( - PathSegment & pathSegment, - glm::vec3 intersect, - glm::vec3 normal, - const Material &m, - thrust::default_random_engine &rng) { + PathSegment& pathSegment, + glm::vec3 intersect, + glm::vec3 normal, + const Material& m, + thrust::default_random_engine& rng, const Camera &cam) { // TODO: implement this. // A basic implementation of pure-diffuse shading will just call the // calculateRandomDirectionInHemisphere defined above. + glm::vec3 scatter_direction; + thrust::uniform_real_distribution u01(0, 1); + float random = u01(rng); + + float scale = 1.0f; + + + if (random <=m.hasReflective ) + { + scatter_direction = glm::reflect(pathSegment.ray.direction, normal); + scale = m.hasReflective <= 0.0 ? 0.0 : 1.0 / m.hasReflective; + if (!m.usingProcTex) + { + if (m.specular.exponent > 0 && random > 0.7) + { + pathSegment.color *= m.specular.color; + } + else + { + pathSegment.color *= m.color; + } + + } + } + else if (random <= m.hasRefractive + m.hasReflective ) + { + scatter_direction = DielectricScatter(pathSegment, intersect, normal, m, rng, cam); + + } + else + { + + scale = m.hasReflective <= 0.0 ? 0.0 : 1.0 / (1 - m.hasReflective); + scatter_direction = calculateRandomDirectionInHemisphere(normal, rng); + + if (!m.usingProcTex) + { + if (m.isSubSurface) + { + glm::vec3 viewVec = cam.view; + glm::vec3 scolor = subsurfaceColor(glm::normalize(pathSegment.ray.direction), normal, viewVec, 3, m); + glm::vec3 subsurface = scolor; + pathSegment.color *= m.color + subsurface; + } + else + { + pathSegment.color *= m.color; + } + + } + } + if (m.usingProcTex) + { + ColorProcTex(pathSegment, m, intersect, cam, normal); + } + pathSegment.ray.origin = intersect + scatter_direction * EPSILON2; + pathSegment.ray.direction = scatter_direction; + } diff --git a/src/intersections.h b/src/intersections.h index b1504071..eb6f17bf 100644 --- a/src/intersections.h +++ b/src/intersections.h @@ -2,6 +2,7 @@ #include #include +#include #include "sceneStructs.h" #include "utilities.h" @@ -142,3 +143,133 @@ __host__ __device__ float sphereIntersectionTest(Geom sphere, Ray r, return glm::length(r.origin - intersectionPoint); } + + +__host__ __device__ float TriangleArea(glm::vec4 a_p1, glm::vec4 a_p2, glm::vec4 a_p3) +{ + glm::vec4 p1 = a_p1; + glm::vec4 p2 = a_p2; + glm::vec4 p3 = a_p3; + float A = 0.5f * glm::length(glm::cross(glm::vec3(a_p2[0] - a_p1[0], a_p2[1] - a_p1[1], a_p2[2] - a_p1[2]), + glm::vec3(a_p3[0] - a_p1[0], a_p3[1] - a_p1[1], a_p3[2] - a_p1[2]))); + return A; +} + + +__host__ __device__ glm::vec4 GetBarycentricWeightedNormal(glm::vec4 a_p1, glm::vec4 a_n1, glm::vec4 a_p2, glm::vec4 a_n2, glm::vec4 a_p3, glm::vec4 a_n3, glm::vec4 a_p) +{ + + float A = TriangleArea(a_p1, a_p2, a_p3); + float A1 = TriangleArea(a_p2, a_p3, a_p); + float A2 = TriangleArea(a_p1, a_p3, a_p); + float A3 = TriangleArea(a_p1, a_p2, a_p); + glm::vec4 a_surfaceNormal = a_p[2] * ((a_n1 * A1) / (A * a_p1[2]) + (a_n2 * A2) / (A * a_p2[2]) + (a_n3 * A3) / (A * a_p3[2])); + return a_surfaceNormal; +} + +__host__ __device__ +inline bool intersect( Geom &geom, + const Ray ray, + float& tNear, float& tFar, uint8_t& planeIndex) +{ + const int kNumPlaneSetNormals = 7; + const glm::vec3 planeSetNormals[kNumPlaneSetNormals] = { + glm::vec3(1, 0, 0), + glm::vec3(0, 1, 0), + glm::vec3(0, 0, 1), + glm::vec3(sqrtf(3) / 3.f, sqrtf(3) / 3.f, sqrtf(3) / 3.f), + glm::vec3(-sqrtf(3) / 3.f, sqrtf(3) / 3.f, sqrtf(3) / 3.f), + glm::vec3(-sqrtf(3) / 3.f, -sqrtf(3) / 3.f, sqrtf(3) / 3.f), + glm::vec3(sqrtf(3) / 3.f, -sqrtf(3) / 3.f, sqrtf(3) / 3.f) }; + + + + float precomputedNumerator[kNumPlaneSetNormals], precomputeDenominator[kNumPlaneSetNormals]; + for (uint8_t i = 0; i < kNumPlaneSetNormals; ++i) { + precomputedNumerator[i] = glm::dot(planeSetNormals[i], ray.origin); + precomputeDenominator[i] = glm::dot(planeSetNormals[i], ray.direction); + } + for (int i = 0; i < kNumPlaneSetNormals; ++i) { + float tn = (geom.Device_BVH[2 * i] - precomputedNumerator[i]) / precomputeDenominator[i]; + float tf = (geom.Device_BVH[2 * i + 1] - precomputedNumerator[i]) / precomputeDenominator[i]; + if (precomputeDenominator[i] < 0) + { + float temp = tn; + tn = tf; + tf = temp; + //std::swap(tn, tf); + } + if (tn > tNear) tNear = tn, planeIndex = i; + if (tf < tFar) tFar = tf; + if (tNear > tFar) return false; + } + + return true; +} + +__host__ __device__ +const bool intersect(const Ray ray, Geom &currGeom) +{ + float tClosest = FLT_MAX_10_EXP; + bool hit = false; + float tNear = -FLT_MAX_10_EXP, tFar = FLT_MAX_10_EXP; + uint8_t planeIndex; + hit = intersect(currGeom, ray, tNear, tFar, planeIndex); + if (hit) { + if (tNear < tClosest) + { + tClosest = tNear; + } + } + return hit; + } + + +__host__ __device__ float MeshIntersectionTest(Geom &objGeom, Ray r, + glm::vec3& intersectionPoint, glm::vec3& normal, bool& outside) { + + bool intersection = false; + glm::vec3 interPoint; + glm::vec3 internormal; + int count = objGeom.triangleCount; + + glm::vec3 minBary = glm::vec3(INT_MAX, INT_MAX, INT_MAX); + glm::vec3 BaryPosition; + glm::vec4 n1, n2, n3; + + for (int i = 0; i < count; i++) + { + int triangleOffset = 6 * i; + //glm::mat4 modelMat = objGeom.transform; + glm::vec4 p1 = objGeom.transform * objGeom.Device_Triangle_points_normals[triangleOffset + 0]; + glm::vec4 p2 = objGeom.transform * objGeom.Device_Triangle_points_normals[triangleOffset + 2]; + glm::vec4 p3 = objGeom.transform * objGeom.Device_Triangle_points_normals[triangleOffset + 4]; + + intersection = glm::intersectRayTriangle(r.origin, r.direction, glm::vec3(p1), glm::vec3(p2), glm::vec3(p3), BaryPosition); + + if (intersection) + { + if (BaryPosition[2] >= 0 && BaryPosition[2] < minBary[2]) + { + minBary = BaryPosition; + } + n1 = objGeom.Device_Triangle_points_normals[triangleOffset + 1]; + n2 = objGeom.Device_Triangle_points_normals[triangleOffset + 3]; + n3 = objGeom.Device_Triangle_points_normals[triangleOffset + 5]; + break; + } + } + if (intersection) + { + float u = minBary[0]; + float v = minBary[1]; + float t = minBary[2]; + interPoint = getPointOnRay(r, t); + internormal = glm::vec3(u * n1 + v * n2 + (1 - u - v) * n3); + intersectionPoint = interPoint; + normal = internormal; + return glm::length(r.origin - intersectionPoint); + } + return -1; +} + diff --git a/src/main.cpp b/src/main.cpp index fe8e85ec..879d3051 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -1,7 +1,6 @@ #include "main.h" #include "preview.h" #include - static std::string startTimeString; // For camera controls @@ -15,191 +14,376 @@ static bool camchanged = true; static float dtheta = 0, dphi = 0; static glm::vec3 cammove; +static double aperture = 0.2; +static double focus_dist = 3.0f; float zoom, theta, phi; glm::vec3 cameraPosition; glm::vec3 ogLookAt; // for recentering the camera -Scene *scene; -RenderState *renderState; +Scene* scene; +RenderState* renderState; int iteration; int width; int height; -//------------------------------- -//-------------MAIN-------------- -//------------------------------- -int main(int argc, char** argv) { - startTimeString = currentTimeString(); +void BuildBVH(Geom& objGeom) +{ + objGeom.Host_BVH = new float[14]; + BVH a(objGeom.triangleCount, objGeom.Host_Triangle_points_normals); + a.GetBounds(objGeom.Host_BVH, objGeom); +} - if (argc < 2) { - printf("Usage: %s SCENEFILE.txt\n", argv[0]); - return 1; - } - const char *sceneFile = argv[1]; +Polygon LoadOBJ(const char* file, char* polyName, Geom& objGeom) +{ + Polygon p(polyName); + const char* filepath = file; + std::vector shapes; std::vector materials; + std::string errors = tinyobj::LoadObj(shapes, materials, filepath); + std::cout << errors << std::endl; + if (errors.size() == 0) + { + int min_idx = 0; + //Read the information from the vector of shape_ts + for (unsigned int i = 0; i < shapes.size(); i++) + { + std::vector pos, nor; + std::vector uv; + std::vector& positions = shapes[i].mesh.positions; + std::vector& normals = shapes[i].mesh.normals; + std::vector& uvs = shapes[i].mesh.texcoords; + for (unsigned int j = 0; j < positions.size() / 3; j++) + { + pos.push_back(glm::vec4(positions[j * 3], positions[j * 3 + 1], positions[j * 3 + 2], 1)); + } + for (unsigned int j = 0; j < normals.size() / 3; j++) + { + nor.push_back(glm::vec4(normals[j * 3], normals[j * 3 + 1], normals[j * 3 + 2], 0)); + } + for (unsigned int j = 0; j < uvs.size() / 2; j++) + { + uv.push_back(glm::vec2(uvs[j * 2], uvs[j * 2 + 1])); + } + for (unsigned int j = 0; j < pos.size(); j++) + { + p.AddVertex(Vertex(pos[j], glm::vec3(255, 255, 255), nor[j], uv[j])); + } + + std::vector indices = shapes[i].mesh.indices; + for (unsigned int j = 0; j < indices.size(); j += 3) + { + Triangle t; + t.m_indices[0] = indices[j] + min_idx; + t.m_indices[1] = indices[j + 1] + min_idx; + t.m_indices[2] = indices[j + 2] + min_idx; + p.AddTriangle(t); + } + + min_idx += pos.size(); + } + } + else + { + //An error loading the OBJ occurred! + std::cout << errors << std::endl; + } + + objGeom.triangleCount = p.m_tris.size(); + objGeom.Host_Triangle_points_normals = new glm::vec4[6 * objGeom.triangleCount]; + for (int i = 0; i < objGeom.triangleCount; i++) + { + for (int j = 0; j < 3; j++) + { + //Geom.Triangle.vertex = Polygon.vertex[triangle[i].index[j]] + glm::vec4 vertPos = p.m_verts[p.m_tris[i].m_indices[j]].m_pos; + glm::vec4 vertNormal = p.m_verts[p.m_tris[i].m_indices[j]].m_normal; + objGeom.Host_Triangle_points_normals[(6 * i) + 2 * j] = vertPos; + objGeom.Host_Triangle_points_normals[(6 * i ) + 2 * j + 1] = vertNormal; + } + } + BuildBVH(objGeom); + return p; +} - // Load scene file - scene = new Scene(sceneFile); - // Set up camera stuff from loaded path tracer settings - iteration = 0; - renderState = &scene->state; - Camera &cam = renderState->camera; - width = cam.resolution.x; - height = cam.resolution.y; +void GenerateProceduralStructure1(vector &geoms, vector& mats, GeomType geomType) +{ + vector posValues; + Turtle buildingTurtle1 = Turtle(glm::vec3(0.0f, 5.0f, 0.0f), glm::vec3(1, 0, 0), glm::vec3(0, 0, 1), + glm::vec3(0, 1, 0), 0.5f, 1); + buildingTurtle1.m_rotateAngle = 90.0f; + buildingTurtle1.m_takeRandomRotationsForward = false; + LSystem *BuildingSystem1 = new LSystem(buildingTurtle1); + std::vector F_Conditions = { PostCondition(1.0, "F+F-F-F+F") }; + Rule F_Rule1(F_Conditions); + BuildingSystem1->AddRule('F', F_Rule1); + BuildingSystem1->AssignAxiom("-F"); + BuildingSystem1->LSystemParse(4); + //BuildingSystem1->PrintParsedSystem(); + BuildingSystem1->CarveBuilding(posValues, geoms, mats, geomType); +} - glm::vec3 view = cam.view; - glm::vec3 up = cam.up; - glm::vec3 right = glm::cross(view, up); - up = glm::cross(right, view); +void GenerateProceduralStructure2(vector& geoms, vector& mats, GeomType geomType) +{ + vector posValues; + Turtle buildingTurtle2 = Turtle(glm::vec3(0.0f, 5.0f, 3.0f), glm::vec3(1, 0, 0), glm::vec3(0, 0, 1), + glm::vec3(0, 1, 0), 0.5f, 1); + buildingTurtle2.m_rotateAngle = 90.0f; + buildingTurtle2.m_takeRandomRotationsForward = false; + uPtr BuildingSystem2 = mkU(buildingTurtle2); + std::vector X_Conditions2 = { PostCondition(1.0, "[+F-FX][-F+FX]") }; + Rule X_Rule(X_Conditions2); + BuildingSystem2.get()->AddRule('X', X_Rule); + BuildingSystem2.get()->AssignAxiom("FX"); + BuildingSystem2.get()->LSystemParse(5); + BuildingSystem2->CarveBuilding(posValues, geoms, mats, geomType); +} - cameraPosition = cam.position; +void GenerateProceduralStructure3(vector& geoms, vector& mats, GeomType geomType) +{ + vector posValues; + Turtle buildingTurtle1 = Turtle(glm::vec3(0.0f, 5.0f, 0.0f), glm::vec3(1, 0, 0), glm::vec3(0, 0, 1), + glm::vec3(0, 1, 0), 0.5f, 1); + buildingTurtle1.m_rotateAngle = 90.0f; + buildingTurtle1.m_takeRandomRotationsForward = false; + LSystem* BuildingSystem3 = new LSystem(buildingTurtle1); + std::vector F_Conditions = { PostCondition(1.0, "FF") }; + Rule F_Rule1(F_Conditions); + BuildingSystem3->AddRule('F', F_Rule1); + BuildingSystem3->AssignAxiom("F-F++F-F"); + BuildingSystem3->LSystemParse(4); + BuildingSystem3->CarveBuilding(posValues, geoms, mats, geomType); +} - // compute phi (horizontal) and theta (vertical) relative 3D axis - // so, (0 0 1) is forward, (0 1 0) is up - glm::vec3 viewXZ = glm::vec3(view.x, 0.0f, view.z); - glm::vec3 viewZY = glm::vec3(0.0f, view.y, view.z); - phi = glm::acos(glm::dot(glm::normalize(viewXZ), glm::vec3(0, 0, -1))); - theta = glm::acos(glm::dot(glm::normalize(viewZY), glm::vec3(0, 1, 0))); - ogLookAt = cam.lookAt; - zoom = glm::length(cam.position - ogLookAt); +void GenerateProceduralStructure4(vector& geoms, vector& mats, GeomType geomType) +{ + vector posValues; + Turtle buildingTurtle1 = Turtle(glm::vec3(0.0f, 5.0f, 0.0f), glm::vec3(1, 0, 0), glm::vec3(0, 0, 1), + glm::vec3(0, 1, 0), 0.5f, 1); + buildingTurtle1.m_rotateAngle = 30.0f; + buildingTurtle1.m_takeRandomRotationsForward = false; + LSystem* BuildingSystem3 = new LSystem(buildingTurtle1); + std::vector F_Conditions = { PostCondition(1.0, "F[-F]F[+F][F]") }; + Rule F_Rule1(F_Conditions); + BuildingSystem3->AddRule('F', F_Rule1); + BuildingSystem3->AssignAxiom("F"); + BuildingSystem3->LSystemParse(3); + BuildingSystem3->CarveBuilding(posValues, geoms, mats, geomType); +} - // Initialize CUDA and GL components - init(); +void GenerateProceduralStructure5(vector& geoms, vector& mats, GeomType geomType) +{ + vector posValues; + Turtle buildingTurtle3 = Turtle(glm::vec3(0.0f, 5.0f, 0.0f), glm::vec3(1, 0, 0), glm::vec3(0, 0, 1), + glm::vec3(0, 1, 0), 0.5f, 1); + buildingTurtle3.m_rotateAngle = 30.0f; + buildingTurtle3.m_takeRandomRotationsForward = false; + uPtr BuildingSystem3 = mkU(buildingTurtle3); + std::vector X_Conditions3 = { PostCondition(0.3, "[+F-FX][-F+FX]"), PostCondition(0.7, "[+FX]-FX") }; + Rule X_Rul3(X_Conditions3); + BuildingSystem3.get()->AddRule('X', X_Rul3); + BuildingSystem3.get()->AssignAxiom("FX"); + BuildingSystem3.get()->LSystemParse(5); + BuildingSystem3.get()->CarveBuilding(posValues, geoms, mats, geomType); +} - // GLFW main loop - mainLoop(); +//------------------------------- +//-------------MAIN-------------- +//------------------------------- - return 0; +int main(int argc, char** argv) { + startTimeString = currentTimeString(); + + if (argc < 2) { + printf("Usage: %s SCENEFILE.txt\n", argv[0]); + return 1; + } + const char* sceneFile = argv[1]; + + // Load scene file + scene = new Scene(sceneFile); + + //GenerateProceduralStructure4(scene->geoms, scene->materials, SPHERE); + //generateProcedural(scene->geoms, scene->materials); + // Set up camera stuff from loaded path tracer settings + iteration = 0; + renderState = &scene->state; + Camera& cam = renderState->camera; + cam.aperture = aperture; + cam.focus_dist = focus_dist; + width = cam.resolution.x; + height = cam.resolution.y; + + glm::vec3 view = cam.view; + glm::vec3 up = cam.up; + glm::vec3 right = glm::cross(view, up); + up = glm::cross(right, view); + + cameraPosition = cam.position; + cam.focus_dist = (cam.position - cam.lookAt).length(); + // compute phi (horizontal) and theta (vertical) relative 3D axis + // so, (0 0 1) is forward, (0 1 0) is up + glm::vec3 viewXZ = glm::vec3(view.x, 0.0f, view.z); + glm::vec3 viewZY = glm::vec3(0.0f, view.y, view.z); + phi = glm::acos(glm::dot(glm::normalize(viewXZ), glm::vec3(0, 0, -1))); + theta = glm::acos(glm::dot(glm::normalize(viewZY), glm::vec3(0, 1, 0))); + ogLookAt = cam.lookAt; + zoom = glm::length(cam.position - ogLookAt); + const char* filepathWahoo = "D:/GitHub/CIS565/Project3-CUDA-Path-Tracer/scenes/wahoo.obj"; + const char* filepathCube = "D:/GitHub/CIS565/Project3-CUDA-Path-Tracer/scenes/cube.obj"; + const char* filepathbunny = "D:/GitHub/CIS565/Project3-CUDA-Path-Tracer/scenes/stanford-bunny.obj"; + Polygon p; + char* filename = "wahoo"; + + for (int i = 0; i < scene->geoms.size(); i++) + { + if ((int)scene->geoms[i].type == 2) + { + p = LoadOBJ(filepathWahoo, filename, scene->geoms[i]); + + } + } + + // Initialize CUDA and GL components + init(); + + // GLFW main loop + mainLoop(); + + return 0; } + void saveImage() { - float samples = iteration; - // output image file - image img(width, height); - - for (int x = 0; x < width; x++) { - for (int y = 0; y < height; y++) { - int index = x + (y * width); - glm::vec3 pix = renderState->image[index]; - img.setPixel(width - 1 - x, y, glm::vec3(pix) / samples); - } - } - - std::string filename = renderState->imageName; - std::ostringstream ss; - ss << filename << "." << startTimeString << "." << samples << "samp"; - filename = ss.str(); - - // CHECKITOUT - img.savePNG(filename); - //img.saveHDR(filename); // Save a Radiance HDR file + float samples = iteration; + // output image file + image img(width, height); + + for (int x = 0; x < width; x++) { + for (int y = 0; y < height; y++) { + int index = x + (y * width); + glm::vec3 pix = renderState->image[index]; + img.setPixel(width - 1 - x, y, glm::vec3(pix) / samples); + } + } + + std::string filename = renderState->imageName; + std::ostringstream ss; + ss << filename << "." << startTimeString << "." << samples << "samp"; + filename = ss.str(); + + // CHECKITOUT + img.savePNG(filename); + //img.saveHDR(filename); // Save a Radiance HDR file } void runCuda() { - if (camchanged) { - iteration = 0; - Camera &cam = renderState->camera; - cameraPosition.x = zoom * sin(phi) * sin(theta); - cameraPosition.y = zoom * cos(theta); - cameraPosition.z = zoom * cos(phi) * sin(theta); - - cam.view = -glm::normalize(cameraPosition); - glm::vec3 v = cam.view; - glm::vec3 u = glm::vec3(0, 1, 0);//glm::normalize(cam.up); - glm::vec3 r = glm::cross(v, u); - cam.up = glm::cross(r, v); - cam.right = r; - - cam.position = cameraPosition; - cameraPosition += cam.lookAt; - cam.position = cameraPosition; - camchanged = false; - } - - // Map OpenGL buffer object for writing from CUDA on a single GPU - // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer - - if (iteration == 0) { - pathtraceFree(); - pathtraceInit(scene); - } - - if (iteration < renderState->iterations) { - uchar4 *pbo_dptr = NULL; - iteration++; - cudaGLMapBufferObject((void**)&pbo_dptr, pbo); - - // execute the kernel - int frame = 0; - pathtrace(pbo_dptr, frame, iteration); - - // unmap buffer object - cudaGLUnmapBufferObject(pbo); - } else { - saveImage(); - pathtraceFree(); - cudaDeviceReset(); - exit(EXIT_SUCCESS); - } + if (camchanged) { + iteration = 0; + Camera& cam = renderState->camera; + cameraPosition.x = zoom * sin(phi) * sin(theta); + cameraPosition.y = zoom * cos(theta); + cameraPosition.z = zoom * cos(phi) * sin(theta); + + cam.view = -glm::normalize(cameraPosition); + glm::vec3 v = cam.view; + glm::vec3 u = glm::vec3(0, 1, 0);//glm::normalize(cam.up); + glm::vec3 r = glm::cross(v, u); + cam.up = glm::cross(r, v); + cam.right = r; + //cam.focus_dist = (cam.position - cam.lookAt).length(); + cam.focus_dist = focus_dist; + + cam.position = cameraPosition; + cameraPosition += cam.lookAt; + cam.position = cameraPosition; + camchanged = false; + SetCacheState(false); + } + // Map OpenGL buffer object for writing from CUDA on a single GPU + // No data is moved (Win & Linux). When mapped to CUDA, OpenGL should not use this buffer + + if (iteration == 0) { + pathtraceFree(); + pathtraceInit(scene); + } + + if (iteration < renderState->iterations) { + uchar4* pbo_dptr = NULL; + iteration++; + cudaGLMapBufferObject((void**)&pbo_dptr, pbo); + + // execute the kernel + int frame = 0; + pathtrace(pbo_dptr, frame, iteration); + + // unmap buffer object + cudaGLUnmapBufferObject(pbo); + } + else { + saveImage(); + pathtraceFree(); + cudaDeviceReset(); + exit(EXIT_SUCCESS); + } } void keyCallback(GLFWwindow* window, int key, int scancode, int action, int mods) { - if (action == GLFW_PRESS) { - switch (key) { - case GLFW_KEY_ESCAPE: - saveImage(); - glfwSetWindowShouldClose(window, GL_TRUE); - break; - case GLFW_KEY_S: - saveImage(); - break; - case GLFW_KEY_SPACE: - camchanged = true; - renderState = &scene->state; - Camera &cam = renderState->camera; - cam.lookAt = ogLookAt; - break; - } - } + if (action == GLFW_PRESS) { + switch (key) { + case GLFW_KEY_ESCAPE: + saveImage(); + glfwSetWindowShouldClose(window, GL_TRUE); + break; + case GLFW_KEY_S: + saveImage(); + break; + case GLFW_KEY_SPACE: + camchanged = true; + renderState = &scene->state; + Camera& cam = renderState->camera; + cam.lookAt = ogLookAt; + break; + } + } } void mouseButtonCallback(GLFWwindow* window, int button, int action, int mods) { - leftMousePressed = (button == GLFW_MOUSE_BUTTON_LEFT && action == GLFW_PRESS); - rightMousePressed = (button == GLFW_MOUSE_BUTTON_RIGHT && action == GLFW_PRESS); - middleMousePressed = (button == GLFW_MOUSE_BUTTON_MIDDLE && action == GLFW_PRESS); + leftMousePressed = (button == GLFW_MOUSE_BUTTON_LEFT && action == GLFW_PRESS); + rightMousePressed = (button == GLFW_MOUSE_BUTTON_RIGHT && action == GLFW_PRESS); + middleMousePressed = (button == GLFW_MOUSE_BUTTON_MIDDLE && action == GLFW_PRESS); } void mousePositionCallback(GLFWwindow* window, double xpos, double ypos) { - if (xpos == lastX || ypos == lastY) return; // otherwise, clicking back into window causes re-start - if (leftMousePressed) { - // compute new camera parameters - phi -= (xpos - lastX) / width; - theta -= (ypos - lastY) / height; - theta = std::fmax(0.001f, std::fmin(theta, PI)); - camchanged = true; - } - else if (rightMousePressed) { - zoom += (ypos - lastY) / height; - zoom = std::fmax(0.1f, zoom); - camchanged = true; - } - else if (middleMousePressed) { - renderState = &scene->state; - Camera &cam = renderState->camera; - glm::vec3 forward = cam.view; - forward.y = 0.0f; - forward = glm::normalize(forward); - glm::vec3 right = cam.right; - right.y = 0.0f; - right = glm::normalize(right); - - cam.lookAt -= (float) (xpos - lastX) * right * 0.01f; - cam.lookAt += (float) (ypos - lastY) * forward * 0.01f; - camchanged = true; - } - lastX = xpos; - lastY = ypos; + if (xpos == lastX || ypos == lastY) return; // otherwise, clicking back into window causes re-start + if (leftMousePressed) { + // compute new camera parameters + phi -= (xpos - lastX) / width; + theta -= (ypos - lastY) / height; + theta = std::fmax(0.001f, std::fmin(theta, PI)); + camchanged = true; + } + else if (rightMousePressed) { + zoom += (ypos - lastY) / height; + zoom = std::fmax(0.1f, zoom); + camchanged = true; + } + else if (middleMousePressed) { + renderState = &scene->state; + Camera& cam = renderState->camera; + glm::vec3 forward = cam.view; + forward.y = 0.0f; + forward = glm::normalize(forward); + glm::vec3 right = cam.right; + right.y = 0.0f; + right = glm::normalize(right); + + cam.lookAt -= (float)(xpos - lastX) * right * 0.01f; + cam.lookAt += (float)(ypos - lastY) * forward * 0.01f; + camchanged = true; + } + lastX = xpos; + lastY = ypos; } diff --git a/src/main.h b/src/main.h index fdb7d5d1..44e993be 100644 --- a/src/main.h +++ b/src/main.h @@ -19,6 +19,12 @@ #include "pathtrace.h" #include "utilities.h" #include "scene.h" +#include "..\src\MeshLoading\polygon.h" +#include "..\src\MeshLoading\tiny_obj_loader.h" + +#include "..\src\MeshLoading\BVH.h" +#include "LSystem/turtle.h"; +#include "LSystem/lsystem.h"; using namespace std; diff --git a/src/pathtrace.cu b/src/pathtrace.cu index 056e1467..5a805964 100644 --- a/src/pathtrace.cu +++ b/src/pathtrace.cu @@ -4,6 +4,7 @@ #include #include #include +#include #include "sceneStructs.h" #include "scene.h" @@ -13,102 +14,156 @@ #include "pathtrace.h" #include "intersections.h" #include "interactions.h" +//#include "../stream_compaction/efficient.h" +#include +#include #define ERRORCHECK 1 #define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) #define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__) -void checkCUDAErrorFn(const char *msg, const char *file, int line) { +void checkCUDAErrorFn(const char* msg, const char* file, int line) { #if ERRORCHECK - cudaDeviceSynchronize(); - cudaError_t err = cudaGetLastError(); - if (cudaSuccess == err) { - return; - } - - fprintf(stderr, "CUDA error"); - if (file) { - fprintf(stderr, " (%s:%d)", file, line); - } - fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err)); + cudaDeviceSynchronize(); + cudaError_t err = cudaGetLastError(); + if (cudaSuccess == err) { + return; + } + + fprintf(stderr, "CUDA error"); + if (file) { + fprintf(stderr, " (%s:%d)", file, line); + } + fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err)); # ifdef _WIN32 - getchar(); + getchar(); # endif - exit(EXIT_FAILURE); + exit(EXIT_FAILURE); #endif } __host__ __device__ thrust::default_random_engine makeSeededRandomEngine(int iter, int index, int depth) { - int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index); - return thrust::default_random_engine(h); + int h = utilhash((1 << 31) | (depth << 22) | iter) ^ utilhash(index); + return thrust::default_random_engine(h); } //Kernel that writes the image to the OpenGL PBO directly. __global__ void sendImageToPBO(uchar4* pbo, glm::ivec2 resolution, - int iter, glm::vec3* image) { - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - - if (x < resolution.x && y < resolution.y) { - int index = x + (y * resolution.x); - glm::vec3 pix = image[index]; - - glm::ivec3 color; - color.x = glm::clamp((int) (pix.x / iter * 255.0), 0, 255); - color.y = glm::clamp((int) (pix.y / iter * 255.0), 0, 255); - color.z = glm::clamp((int) (pix.z / iter * 255.0), 0, 255); - - // Each thread writes one pixel location in the texture (textel) - pbo[index].w = 0; - pbo[index].x = color.x; - pbo[index].y = color.y; - pbo[index].z = color.z; - } + int iter, glm::vec3* image) { + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < resolution.x && y < resolution.y) { + int index = x + (y * resolution.x); + glm::vec3 pix = image[index]; + + glm::ivec3 color; + color.x = glm::clamp((int)(pix.x / iter * 255.0), 0, 255); + color.y = glm::clamp((int)(pix.y / iter * 255.0), 0, 255); + color.z = glm::clamp((int)(pix.z / iter * 255.0), 0, 255); + + // Each thread writes one pixel location in the texture (textel) + pbo[index].w = 0; + pbo[index].x = color.x; + pbo[index].y = color.y; + pbo[index].z = color.z; + } } -static Scene * hst_scene = NULL; -static glm::vec3 * dev_image = NULL; -static Geom * dev_geoms = NULL; -static Material * dev_materials = NULL; -static PathSegment * dev_paths = NULL; -static ShadeableIntersection * dev_intersections = NULL; +static Scene* hst_scene = NULL; +static glm::vec3* dev_image = NULL; +static Geom* dev_geoms = NULL; +static Material* dev_materials = NULL; +static PathSegment* dev_paths = NULL; +static ShadeableIntersection* dev_intersections = NULL; +static int* dev_Stencil; +static PathSegment* dev_cache_paths = NULL; +static ShadeableIntersection* dev_cache_intersections = NULL; +static bool cacheAvailable = false; +int cacheNumPaths = 0; + // TODO: static variables for device memory, any extra info you need, etc // ... -void pathtraceInit(Scene *scene) { - hst_scene = scene; - const Camera &cam = hst_scene->state.camera; - const int pixelcount = cam.resolution.x * cam.resolution.y; - cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3)); - cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3)); +bool usingCache = true; +bool usingDOF = false; +bool useBVH = true; + +void pathtraceInit(Scene* scene) { + hst_scene = scene; + const Camera& cam = hst_scene->state.camera; + const int pixelcount = cam.resolution.x * cam.resolution.y; + + cudaMalloc(&dev_image, pixelcount * sizeof(glm::vec3)); + cudaMemset(dev_image, 0, pixelcount * sizeof(glm::vec3)); + + cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment)); + + + for (int i = 0; i < scene->geoms.size(); i++) + { + if (scene->geoms[i].type == OBJ) + { + Geom& currGeom = scene->geoms[i]; + cudaMalloc(&currGeom.Device_Triangle_points_normals, 6 * currGeom.triangleCount * sizeof(glm::vec4)); + cudaMemcpy(currGeom.Device_Triangle_points_normals, currGeom.Host_Triangle_points_normals, 6 * currGeom.triangleCount * sizeof(glm::vec4), cudaMemcpyHostToDevice); + + //Copy Bound Volume Data + for (int i = 0; i < 14; i++) + { + std::cout << currGeom.Host_BVH[i] << "\n"; + } + cudaMalloc(&currGeom.Device_BVH, 14 * sizeof(float)); + cudaMemcpy(currGeom.Device_BVH, currGeom.Host_BVH, 14 * sizeof(float), cudaMemcpyHostToDevice); + } + } - cudaMalloc(&dev_paths, pixelcount * sizeof(PathSegment)); + cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom)); + cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice); - cudaMalloc(&dev_geoms, scene->geoms.size() * sizeof(Geom)); - cudaMemcpy(dev_geoms, scene->geoms.data(), scene->geoms.size() * sizeof(Geom), cudaMemcpyHostToDevice); + cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material)); + cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice); - cudaMalloc(&dev_materials, scene->materials.size() * sizeof(Material)); - cudaMemcpy(dev_materials, scene->materials.data(), scene->materials.size() * sizeof(Material), cudaMemcpyHostToDevice); + cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection)); + cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); - cudaMalloc(&dev_intersections, pixelcount * sizeof(ShadeableIntersection)); - cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); + // TODO: initialize any extra device memeory you need + cudaMalloc(&dev_Stencil, pixelcount * sizeof(int)); - // TODO: initialize any extra device memeory you need + cudaMalloc(&dev_cache_paths, pixelcount * sizeof(PathSegment)); - checkCUDAError("pathtraceInit"); + cudaMalloc(&dev_cache_intersections, pixelcount * sizeof(ShadeableIntersection)); + cudaMemset(dev_cache_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); + + + // Only Handles for one mesh Structore right now need to add functionality for more afterwards + + + checkCUDAError("pathtraceInit"); } void pathtraceFree() { - cudaFree(dev_image); // no-op if dev_image is null - cudaFree(dev_paths); - cudaFree(dev_geoms); - cudaFree(dev_materials); - cudaFree(dev_intersections); - // TODO: clean up any extra device memory you created - - checkCUDAError("pathtraceFree"); + cudaFree(dev_image); // no-op if dev_image is null + cudaFree(dev_paths); + cudaFree(dev_geoms); + cudaFree(dev_materials); + cudaFree(dev_intersections); + cudaFree(dev_Stencil); + // TODO: clean up any extra device memory you created + + cudaFree(dev_cache_paths); + cudaFree(dev_cache_intersections); + + checkCUDAError("pathtraceFree"); +} + +__device__ glm::vec3 random_in_unit_disk(thrust::default_random_engine& rng) { + thrust::uniform_real_distribution u01(0, 1); + glm::vec3 p = glm::vec3(u01(rng), u01(rng), 0); + p = glm::normalize(p); + return p; } /** @@ -119,27 +174,61 @@ void pathtraceFree() { * motion blur - jitter rays "in time" * lens effect - jitter ray origin positions based on a lens */ -__global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, PathSegment* pathSegments) +__global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, PathSegment* pathSegments, bool usingCache, bool usingDOF) { - int x = (blockIdx.x * blockDim.x) + threadIdx.x; - int y = (blockIdx.y * blockDim.y) + threadIdx.y; - - if (x < cam.resolution.x && y < cam.resolution.y) { - int index = x + (y * cam.resolution.x); - PathSegment & segment = pathSegments[index]; - - segment.ray.origin = cam.position; - segment.color = glm::vec3(1.0f, 1.0f, 1.0f); - - // TODO: implement antialiasing by jittering the ray - segment.ray.direction = glm::normalize(cam.view - - cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f) - - cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f) - ); - - segment.pixelIndex = index; - segment.remainingBounces = traceDepth; - } + int x = (blockIdx.x * blockDim.x) + threadIdx.x; + int y = (blockIdx.y * blockDim.y) + threadIdx.y; + + if (x < cam.resolution.x && y < cam.resolution.y) { + int index = x + (y * cam.resolution.x); + PathSegment& segment = pathSegments[index]; + + segment.ray.origin = cam.position; + segment.color = glm::vec3(1.0f, 1.0f, 1.0f); + + if (!usingCache) + { + + /// Using Anti ALiasing with Jittering + thrust::default_random_engine rng = makeSeededRandomEngine(iter, index, 0); + thrust::uniform_real_distribution u01(0, 1); + // TODO: implement antialiasing by jittering the ray + segment.ray.direction = glm::normalize(cam.view + - cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f + u01(rng)) + - cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f + +u01(rng)) + ); + } + else + { + // TODO: implement antialiasing by jittering the ray + segment.ray.direction = glm::normalize(cam.view + - cam.right * cam.pixelLength.x * ((float)x - (float)cam.resolution.x * 0.5f) + - cam.up * cam.pixelLength.y * ((float)y - (float)cam.resolution.y * 0.5f) + ); + } + + //AA plus DOF + if (usingDOF) + { + thrust::default_random_engine rng = makeSeededRandomEngine(iter, index, 0); + thrust::uniform_real_distribution u01(0, 1); + double lens_radius = cam.aperture / 2; + glm::vec3 rd = float(lens_radius) * random_in_unit_disk(rng); + glm::vec3 offset = cam.up * rd.y + cam.right * rd.x; + glm::vec3 rayOrigin = cam.position + offset; // NEw origin + + //Focal Point + glm::vec3 focalPoint = segment.ray.origin + (float)cam.focus_dist * segment.ray.direction; + + + + segment.ray.origin = rayOrigin; + segment.ray.direction = glm::normalize(focalPoint - rayOrigin); + } + + segment.pixelIndex = index; + segment.remainingBounces = traceDepth; + } } // TODO: @@ -147,69 +236,85 @@ __global__ void generateRayFromCamera(Camera cam, int iter, int traceDepth, Path // Generating new rays is handled in your shader(s). // Feel free to modify the code below. __global__ void computeIntersections( - int depth - , int num_paths - , PathSegment * pathSegments - , Geom * geoms - , int geoms_size - , ShadeableIntersection * intersections - ) + int depth + , int num_paths + , PathSegment* pathSegments + , Geom* geoms + , int geoms_size + , ShadeableIntersection* intersections + , bool useBVH) { - int path_index = blockIdx.x * blockDim.x + threadIdx.x; - - if (path_index < num_paths) - { - PathSegment pathSegment = pathSegments[path_index]; - - float t; - glm::vec3 intersect_point; - glm::vec3 normal; - float t_min = FLT_MAX; - int hit_geom_index = -1; - bool outside = true; - - glm::vec3 tmp_intersect; - glm::vec3 tmp_normal; - - // naive parse through global geoms - - for (int i = 0; i < geoms_size; i++) - { - Geom & geom = geoms[i]; - - if (geom.type == CUBE) - { - t = boxIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside); - } - else if (geom.type == SPHERE) - { - t = sphereIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside); - } - // TODO: add more intersection tests here... triangle? metaball? CSG? - - // Compute the minimum t from the intersection tests to determine what - // scene geometry object was hit first. - if (t > 0.0f && t_min > t) - { - t_min = t; - hit_geom_index = i; - intersect_point = tmp_intersect; - normal = tmp_normal; - } - } - - if (hit_geom_index == -1) - { - intersections[path_index].t = -1.0f; - } - else - { - //The ray hits something - intersections[path_index].t = t_min; - intersections[path_index].materialId = geoms[hit_geom_index].materialid; - intersections[path_index].surfaceNormal = normal; - } - } + int path_index = blockIdx.x * blockDim.x + threadIdx.x; + + if (path_index < num_paths) + { + PathSegment pathSegment = pathSegments[path_index]; + + float t; + glm::vec3 intersect_point; + glm::vec3 normal; + float t_min = FLT_MAX; + int hit_geom_index = -1; + bool outside = true; + + glm::vec3 tmp_intersect; + glm::vec3 tmp_normal; + + // naive parse through global geoms + + for (int i = 0; i < geoms_size; i++) + { + Geom& geom = geoms[i]; + + if (geom.type == CUBE) + { + t = boxIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside); + } + else if (geom.type == SPHERE) + { + t = sphereIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside); + } + + else if (geom.type == OBJ) + { + if (useBVH) + { + if (intersect(pathSegment.ray, geom)) + { + t = MeshIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside); + } + } + else + { + t = MeshIntersectionTest(geom, pathSegment.ray, tmp_intersect, tmp_normal, outside); + } + } + // TODO: add more intersection tests here... triangle? metaball? CSG? + + // Compute the minimum t from the intersection tests to determine what + // scene geometry object was hit first. + if (t > 0.0f && t_min > t) + { + t_min = t; + hit_geom_index = i; + intersect_point = tmp_intersect; + normal = tmp_normal; + } + } + + if (hit_geom_index == -1) + { + intersections[path_index].t = -1.0f; + } + else + { + //The ray hits something + intersections[path_index].t = t_min; + intersections[path_index].materialId = geoms[hit_geom_index].materialid; + intersections[path_index].surfaceNormal = normal; + + } + } } // LOOK: "fake" shader demonstrating what you might do with the info in @@ -221,173 +326,335 @@ __global__ void computeIntersections( // Note that this shader does NOT do a BSDF evaluation! // Your shaders should handle that - this can allow techniques such as // bump mapping. -__global__ void shadeFakeMaterial ( - int iter - , int num_paths - , ShadeableIntersection * shadeableIntersections - , PathSegment * pathSegments - , Material * materials - ) +__global__ void shadeFakeMaterial( + int iter + , int num_paths + , ShadeableIntersection* shadeableIntersections + , PathSegment* pathSegments + , Material* materials +) +{ + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_paths) + { + ShadeableIntersection intersection = shadeableIntersections[idx]; + if (intersection.t > 0.0f) { // if the intersection exists... + // Set up the RNG + // LOOK: this is how you use thrust's RNG! Please look at + // makeSeededRandomEngine as well. + thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, 0); + thrust::uniform_real_distribution u01(0, 1); + + Material material = materials[intersection.materialId]; + glm::vec3 materialColor = material.color; + + // If the material indicates that the object was a light, "light" the ray + if (material.emittance > 0.0f) { + pathSegments[idx].color *= (materialColor * material.emittance); + pathSegments[idx].remainingBounces = 0; + } + // Otherwise, do some pseudo-lighting computation. This is actually more + // like what you would expect from shading in a rasterizer like OpenGL. + // TODO: replace this! you should be able to start with basically a one-liner + + else { + pathSegments[idx].remainingBounces -= 1; + glm::vec3 intersectPt = getPointOnRay(pathSegments[idx].ray, intersection.t); + + + //scatterRay(pathSegments[idx], intersectPt, intersection.surfaceNormal, material, rng); + /* if (material.hasRefractive > 0.0f) + { + pathSegments[idx].color = pathSegments[idx].color; + } + else + { + pathSegments[idx].color *= materialColor; + }*/ + pathSegments[idx].color *= materialColor; + //float lightTerm = glm::dot(intersection.surfaceNormal, glm::vec3(0.0f, 1.0f, 0.0f)); + //pathSegments[idx].color *= (materialColor * lightTerm) * 0.3f + ((1.0f - intersection.t * 0.02f) * materialColor) * 0.7f; + //pathSegments[idx].color *= u01(rng); // apply some noise because why not + } + // If there was no intersection, color the ray black. + // Lots of renderers use 4 channel color, RGBA, where A = alpha, often + // used for opacity, in which case they can indicate "no opacity". + // This can be useful for post-processing and image compositing. + } + else { + pathSegments[idx].color = glm::vec3(0.0f); + pathSegments[idx].remainingBounces = 0; + } + } +} + +__global__ void shadeBSDFMaterial( + int iter + , int num_paths + , ShadeableIntersection* shadeableIntersections + , PathSegment* pathSegments + , Material* materials, int depth, Camera cam +) { - int idx = blockIdx.x * blockDim.x + threadIdx.x; - if (idx < num_paths) - { - ShadeableIntersection intersection = shadeableIntersections[idx]; - if (intersection.t > 0.0f) { // if the intersection exists... - // Set up the RNG - // LOOK: this is how you use thrust's RNG! Please look at - // makeSeededRandomEngine as well. - thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, 0); - thrust::uniform_real_distribution u01(0, 1); - - Material material = materials[intersection.materialId]; - glm::vec3 materialColor = material.color; - - // If the material indicates that the object was a light, "light" the ray - if (material.emittance > 0.0f) { - pathSegments[idx].color *= (materialColor * material.emittance); - } - // Otherwise, do some pseudo-lighting computation. This is actually more - // like what you would expect from shading in a rasterizer like OpenGL. - // TODO: replace this! you should be able to start with basically a one-liner - else { - float lightTerm = glm::dot(intersection.surfaceNormal, glm::vec3(0.0f, 1.0f, 0.0f)); - pathSegments[idx].color *= (materialColor * lightTerm) * 0.3f + ((1.0f - intersection.t * 0.02f) * materialColor) * 0.7f; - pathSegments[idx].color *= u01(rng); // apply some noise because why not - } - // If there was no intersection, color the ray black. - // Lots of renderers use 4 channel color, RGBA, where A = alpha, often - // used for opacity, in which case they can indicate "no opacity". - // This can be useful for post-processing and image compositing. - } else { - pathSegments[idx].color = glm::vec3(0.0f); - } - } + int idx = blockIdx.x * blockDim.x + threadIdx.x; + if (idx < num_paths) + { + ShadeableIntersection intersection = shadeableIntersections[idx]; + if (intersection.t > 0.0f) { // if the intersection exists... + // Set up the RNG + // LOOK: this is how you use thrust's RNG! Please look at + // makeSeededRandomEngine as well. + thrust::default_random_engine rng = makeSeededRandomEngine(iter, idx, 0); + thrust::uniform_real_distribution u01(0, 1); + + Material material = materials[intersection.materialId]; + glm::vec3 materialColor = material.color; + + // If the material indicates that the object was a light, "light" the ray + if (material.emittance > 0.0f) { + pathSegments[idx].color *= (materialColor * material.emittance); + pathSegments[idx].remainingBounces = 0; + } + // Otherwise, do some pseudo-lighting computation. This is actually more + // like what you would expect from shading in a rasterizer like OpenGL. + // TODO: replace this! you should be able to start with basically a one-liner + + else { + pathSegments[idx].remainingBounces -= 1; + glm::vec3 intersectPt = getPointOnRay(pathSegments[idx].ray, intersection.t); + + + scatterRay(pathSegments[idx], intersectPt, intersection.surfaceNormal, material, rng, cam); + + } + // If there was no intersection, color the ray black. + // Lots of renderers use 4 channel color, RGBA, where A = alpha, often + // used for opacity, in which case they can indicate "no opacity". + // This can be useful for post-processing and image compositing. + } + else { + pathSegments[idx].color = glm::vec3(0.0f); + pathSegments[idx].remainingBounces = 0; + } + } } // Add the current iteration's output to the overall image -__global__ void finalGather(int nPaths, glm::vec3 * image, PathSegment * iterationPaths) +__global__ void finalGather(int nPaths, glm::vec3* image, PathSegment* iterationPaths) { - int index = (blockIdx.x * blockDim.x) + threadIdx.x; + int index = (blockIdx.x * blockDim.x) + threadIdx.x; - if (index < nPaths) - { - PathSegment iterationPath = iterationPaths[index]; - image[iterationPath.pixelIndex] += iterationPath.color; - } + if (index < nPaths) + { + PathSegment iterationPath = iterationPaths[index]; + image[iterationPath.pixelIndex] += iterationPath.color; + } +} + +struct hasTerminated +{ + __host__ __device__ + bool operator()(const int& x) + { + return x == 1; + } +}; + +__global__ void CompactionStencil(int nPaths, PathSegment* iterationPaths, int* dev_Stencil) +{ + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + + if (index < nPaths) + { + if (iterationPaths[index].remainingBounces == 0) + { + dev_Stencil[index] = 0; + return; + } + dev_Stencil[index] = 1; + } } /** * Wrapper for the __global__ call that sets up the kernel calls and does a ton * of memory management */ -void pathtrace(uchar4 *pbo, int frame, int iter) { - const int traceDepth = hst_scene->state.traceDepth; - const Camera &cam = hst_scene->state.camera; - const int pixelcount = cam.resolution.x * cam.resolution.y; - - // 2D block for generating ray from camera - const dim3 blockSize2d(8, 8); - const dim3 blocksPerGrid2d( - (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, - (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); - - // 1D block for path tracing - const int blockSize1d = 128; - - /////////////////////////////////////////////////////////////////////////// - - // Recap: - // * Initialize array of path rays (using rays that come out of the camera) - // * You can pass the Camera object to that kernel. - // * Each path ray must carry at minimum a (ray, color) pair, - // * where color starts as the multiplicative identity, white = (1, 1, 1). - // * This has already been done for you. - // * For each depth: - // * Compute an intersection in the scene for each path ray. - // A very naive version of this has been implemented for you, but feel - // free to add more primitives and/or a better algorithm. - // Currently, intersection distance is recorded as a parametric distance, - // t, or a "distance along the ray." t = -1.0 indicates no intersection. - // * Color is attenuated (multiplied) by reflections off of any object - // * TODO: Stream compact away all of the terminated paths. - // You may use either your implementation or `thrust::remove_if` or its - // cousins. - // * Note that you can't really use a 2D kernel launch any more - switch - // to 1D. - // * TODO: Shade the rays that intersected something or didn't bottom out. - // That is, color the ray by performing a color computation according - // to the shader, then generate a new ray to continue the ray path. - // We recommend just updating the ray's PathSegment in place. - // Note that this step may come before or after stream compaction, - // since some shaders you write may also cause a path to terminate. - // * Finally, add this iteration's results to the image. This has been done - // for you. - - // TODO: perform one iteration of path tracing - - generateRayFromCamera <<>>(cam, iter, traceDepth, dev_paths); - checkCUDAError("generate camera ray"); - - int depth = 0; - PathSegment* dev_path_end = dev_paths + pixelcount; - int num_paths = dev_path_end - dev_paths; - - // --- PathSegment Tracing Stage --- - // Shoot ray into scene, bounce between objects, push shading chunks - - bool iterationComplete = false; - while (!iterationComplete) { - - // clean shading chunks - cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); - - // tracing - dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d; - computeIntersections <<>> ( - depth - , num_paths - , dev_paths - , dev_geoms - , hst_scene->geoms.size() - , dev_intersections - ); - checkCUDAError("trace one bounce"); - cudaDeviceSynchronize(); - depth++; - - - // TODO: - // --- Shading Stage --- - // Shade path segments based on intersections and generate new rays by - // evaluating the BSDF. - // Start off with just a big kernel that handles all the different - // materials you have in the scenefile. - // TODO: compare between directly shading the path segments and shading - // path segments that have been reshuffled to be contiguous in memory. - - shadeFakeMaterial<<>> ( - iter, - num_paths, - dev_intersections, - dev_paths, - dev_materials - ); - iterationComplete = true; // TODO: should be based off stream compaction results. - } - - // Assemble this iteration and apply it to the image - dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; - finalGather<<>>(num_paths, dev_image, dev_paths); - - /////////////////////////////////////////////////////////////////////////// - - // Send results to OpenGL buffer for rendering - sendImageToPBO<<>>(pbo, cam.resolution, iter, dev_image); - - // Retrieve image from GPU - cudaMemcpy(hst_scene->state.image.data(), dev_image, - pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost); - - checkCUDAError("pathtrace"); + +void SetCacheState(bool a_state) +{ + cacheAvailable = a_state; +} + +struct ShadeableIntersectionComparator +{ + __host__ __device__ + inline bool operator() (const ShadeableIntersection& a, const ShadeableIntersection& b) + { + return a.materialId < b.materialId; + + } +}; + +void pathtrace(uchar4* pbo, int frame, int iter) { + const int traceDepth = hst_scene->state.traceDepth; + const Camera& cam = hst_scene->state.camera; + const int pixelcount = cam.resolution.x * cam.resolution.y; + + // 2D block for generating ray from camera + const dim3 blockSize2d(8, 8); + const dim3 blocksPerGrid2d( + (cam.resolution.x + blockSize2d.x - 1) / blockSize2d.x, + (cam.resolution.y + blockSize2d.y - 1) / blockSize2d.y); + + // 1D block for path tracing + const int blockSize1d = 128; + + /////////////////////////////////////////////////////////////////////////// + + // Recap: + // * Initialize array of path rays (using rays that come out of the camera) + // * You can pass the Camera object to that kernel. + // * Each path ray must carry at minimum a (ray, color) pair, + // * where color starts as the multiplicative identity, white = (1, 1, 1). + // * This has already been done for you. + // * For each depth: + // * Compute an intersection in the scene for each path ray. + // A very naive version of this has been implemented for you, but feel + // free to add more primitives and/or a better algorithm. + // Currently, intersection distance is recorded as a parametric distance, + // t, or a "distance along the ray." t = -1.0 indicates no intersection. + // * Color is attenuated (multiplied) by reflections off of any object + // * TODO: Stream compact away all of the terminated paths. + // You may use either your implementation or `thrust::remove_if` or its + // cousins. + // * Note that you can't really use a 2D kernel launch any more - switch + // to 1D. + // * TODO: Shade the rays that intersected something or didn't bottom out. + // That is, color the ray by performing a color computation according + // to the shader, then generate a new ray to continue the ray path. + // We recommend just updating the ray's PathSegment in place. + // Note that this step may come before or after stream compaction, + // since some shaders you write may also cause a path to terminate. + // * Finally, add this iteration's results to the image. This has been done + // for you. + + // TODO: perform one iteration of path tracing + + int depth = 0; + if (usingCache) + { + if (!cacheAvailable) + { + generateRayFromCamera << > > (cam, iter, traceDepth, dev_paths, usingCache, usingDOF); + checkCUDAError("generate camera ray"); + } + else + { + cudaMemcpy(dev_paths, dev_cache_paths, pixelcount * sizeof(PathSegment), cudaMemcpyDeviceToDevice); + cudaMemcpy(dev_intersections, dev_cache_intersections, pixelcount * sizeof(ShadeableIntersection), cudaMemcpyDeviceToDevice); + cudaDeviceSynchronize(); + depth = 1; + } + } + else + { + generateRayFromCamera << > > (cam, iter, traceDepth, dev_paths, usingCache, usingDOF); + checkCUDAError("generate camera ray"); + } + cudaDeviceSynchronize(); + PathSegment* dev_path_end = dev_paths + pixelcount; + int num_paths = dev_path_end - dev_paths; + + + // --- PathSegment Tracing Stage --- + // Shoot ray into scene, bounce between objects, push shading chunks + + bool iterationComplete = false; + while (!iterationComplete) { + + // clean shading chunks + cudaMemset(dev_intersections, 0, pixelcount * sizeof(ShadeableIntersection)); + + // tracing + dim3 numblocksPathSegmentTracing = (num_paths + blockSize1d - 1) / blockSize1d; + computeIntersections << > > ( + depth + , num_paths + , dev_paths + , dev_geoms + , hst_scene->geoms.size() + , dev_intersections + , useBVH); + checkCUDAError("trace one bounce"); + cudaDeviceSynchronize(); + depth++; + + + + + + if (!cacheAvailable && usingCache) + { + cudaMemcpy(dev_cache_paths, dev_paths, pixelcount * sizeof(PathSegment), cudaMemcpyDeviceToDevice); + cudaMemcpy(dev_cache_intersections, dev_intersections, pixelcount * sizeof(ShadeableIntersection), cudaMemcpyDeviceToDevice); + SetCacheState(true); + cudaDeviceSynchronize(); + } + +#pragma region SortingMaterial + + //thrust::device_ptr dev_thrust_intersections = thrust::device_ptr(dev_intersections); + //thrust::device_ptr dev_thrust_PathSegment = thrust::device_ptr(dev_paths); + //thrust::sort_by_key(thrust::device, dev_thrust_intersections, dev_thrust_intersections + num_paths, dev_thrust_PathSegment, ShadeableIntersectionComparator()); + + +#pragma endregion + + + // TODO: + // --- Shading Stage --- + // Shade path segments based on intersections and generate new rays by + // evaluating the BSDF. + // Start off with just a big kernel that handles all the different + // materials you have in the scenefile. + // TODO: compare between directly shading the path segments and shading + // path segments that have been reshuffled to be contiguous in memory. + + const Camera *camcpy = &cam; + shadeBSDFMaterial << > > ( + iter, + num_paths, + dev_intersections, + dev_paths, + dev_materials, depth, *camcpy + ); + + CompactionStencil << > > (num_paths, + dev_paths, dev_Stencil); + + PathSegment* itr = thrust::stable_partition(thrust::device, dev_paths, dev_paths + num_paths, dev_Stencil, hasTerminated()); + int n = itr - dev_paths; + num_paths = n; + + if (num_paths == 0) + { + iterationComplete = true; // TODO: should be based off stream compaction results. + } + //iterationComplete = true; // TODO: should be based off stream compaction results. + } + + // Assemble this iteration and apply it to the image + dim3 numBlocksPixels = (pixelcount + blockSize1d - 1) / blockSize1d; + finalGather << > > (pixelcount, dev_image, dev_paths); + + /////////////////////////////////////////////////////////////////////////// + + // Send results to OpenGL buffer for rendering + sendImageToPBO << > > (pbo, cam.resolution, iter, dev_image); + + // Retrieve image from GPU + cudaMemcpy(hst_scene->state.image.data(), dev_image, + pixelcount * sizeof(glm::vec3), cudaMemcpyDeviceToHost); + + checkCUDAError("pathtrace"); } diff --git a/src/pathtrace.h b/src/pathtrace.h index 12412277..72bbeeb9 100644 --- a/src/pathtrace.h +++ b/src/pathtrace.h @@ -6,3 +6,6 @@ void pathtraceInit(Scene *scene); void pathtraceFree(); void pathtrace(uchar4 *pbo, int frame, int iteration); + +void SetCacheState(bool a_state); + diff --git a/src/scene.cpp b/src/scene.cpp index 3fb6239a..37fcbe64 100644 --- a/src/scene.cpp +++ b/src/scene.cpp @@ -52,6 +52,11 @@ int Scene::loadGeom(string objectid) { cout << "Creating new cube..." << endl; newGeom.type = CUBE; } + else if (strcmp(line.c_str(), "obj") == 0) { + + cout << "Creating new obj..." << endl; + newGeom.type = OBJ; + } } //link material @@ -160,7 +165,7 @@ int Scene::loadMaterial(string materialid) { Material newMaterial; //load static properties - for (int i = 0; i < 7; i++) { + for (int i = 0; i < 10; i++) { string line; utilityCore::safeGetline(fp_in, line); vector tokens = utilityCore::tokenizeString(line); @@ -180,6 +185,14 @@ int Scene::loadMaterial(string materialid) { newMaterial.indexOfRefraction = atof(tokens[1].c_str()); } else if (strcmp(tokens[0].c_str(), "EMITTANCE") == 0) { newMaterial.emittance = atof(tokens[1].c_str()); + }else if (strcmp(tokens[0].c_str(), "usingProcTex") == 0) { + newMaterial.usingProcTex = atof(tokens[1].c_str()); + } + else if (strcmp(tokens[0].c_str(), "isSubSurface") == 0) { + newMaterial.isSubSurface = atof(tokens[1].c_str()); + } + else if (strcmp(tokens[0].c_str(), "ProcTexNum") == 0) { + newMaterial.ProcTexNum = atof(tokens[1].c_str()); } } materials.push_back(newMaterial); diff --git a/src/sceneStructs.h b/src/sceneStructs.h index da4dbf30..f62d1e3c 100644 --- a/src/sceneStructs.h +++ b/src/sceneStructs.h @@ -2,14 +2,17 @@ #include #include +#include "MeshLoading/polygon.h" #include #include "glm/glm.hpp" +#include #define BACKGROUND_COLOR (glm::vec3(0.0f)) enum GeomType { SPHERE, CUBE, + OBJ }; struct Ray { @@ -26,8 +29,16 @@ struct Geom { glm::mat4 transform; glm::mat4 inverseTransform; glm::mat4 invTranspose; + glm::vec4* Host_Triangle_points_normals; + glm::vec4* Device_Triangle_points_normals; + + float* Host_BVH; + float* Device_BVH; + int triangleCount; + // glm::vec3* points; }; + struct Material { glm::vec3 color; struct { @@ -38,6 +49,9 @@ struct Material { float hasRefractive; float indexOfRefraction; float emittance; + float usingProcTex; + float isSubSurface; + int ProcTexNum; }; struct Camera { @@ -49,6 +63,8 @@ struct Camera { glm::vec3 right; glm::vec2 fov; glm::vec2 pixelLength; + double aperture; + double focus_dist; }; struct RenderState { @@ -70,7 +86,8 @@ struct PathSegment { // 1) color contribution computation // 2) BSDF evaluation: generate a new ray struct ShadeableIntersection { - float t; - glm::vec3 surfaceNormal; - int materialId; + float t; + glm::vec3 surfaceNormal; + int materialId; + }; diff --git a/src/utilities.h b/src/utilities.h index abb4f27c..1d60accb 100644 --- a/src/utilities.h +++ b/src/utilities.h @@ -13,6 +13,7 @@ #define TWO_PI 6.2831853071795864769252867665590057683943f #define SQRT_OF_ONE_THIRD 0.5773502691896257645091487805019574556476f #define EPSILON 0.00001f +#define EPSILON2 0.001f namespace utilityCore { extern float clamp(float f, float min, float max); diff --git a/stream_compaction/CMakeLists.txt b/stream_compaction/CMakeLists.txt index 4538f04e..4bcc9e9e 100644 --- a/stream_compaction/CMakeLists.txt +++ b/stream_compaction/CMakeLists.txt @@ -1,6 +1,25 @@ -set(SOURCE_FILES +set(headers + "common.h" + "cpu.h" + "naive.h" + "efficient.h" + "thrust.h" + "RadixSort.h" ) -cuda_add_library(stream_compaction - ${SOURCE_FILES} +set(sources + "common.cu" + "cpu.cu" + "naive.cu" + "efficient.cu" + "thrust.cu" + "RadixSort.cu" ) + +list(SORT headers) +list(SORT sources) + +source_group(Headers FILES ${headers}) +source_group(Sources FILES ${sources}) + +cuda_add_library(stream_compaction ${sources} ${headers}) diff --git a/stream_compaction/RadixSort.cu b/stream_compaction/RadixSort.cu new file mode 100644 index 00000000..5c212f57 --- /dev/null +++ b/stream_compaction/RadixSort.cu @@ -0,0 +1,173 @@ +#include "RadixSort.h" +#include +#include +#include +#include +namespace StreamCompaction { + namespace RadixSort { + using StreamCompaction::Common::PerformanceTimer; + PerformanceTimer& timer() + { + static PerformanceTimer timer; + return timer; + } + +#define checkCUDAErrorWithLine(msg) checkCUDAError(msg, __LINE__) // We can use defines provided in this project + + int* dev_buf; + int* bufBit; + int* falseBuf; + int* trueBuf; + int* bufNotBits; + int* bufScatter; + int* bufAnswer; +#define blockSize 512 + + + void AllocateMemory(int n) + { + cudaMalloc((void**)&dev_buf, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_buf failed!"); + cudaMalloc((void**)&bufBit, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_bufloader failed!"); + cudaMalloc((void**)&falseBuf, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_bufB failed!"); + cudaMalloc((void**)&trueBuf, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_bufS failed!"); + cudaMalloc((void**)&bufNotBits, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_bufAnswers failed!"); + cudaMalloc((void**)&bufScatter, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_bufAnswers failed!"); + cudaMalloc((void**)&bufAnswer, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_bufAnswers failed!"); + cudaDeviceSynchronize(); + } + + void FreeMemory() { + cudaFree(dev_buf); + cudaFree(bufBit); + cudaFree(falseBuf); + cudaFree(trueBuf); + cudaFree(bufNotBits); + cudaFree(bufScatter); + cudaFree(bufAnswer); + } + + + __global__ void PopulateBits(int bitOrder, int* bufInputData, int* bufBit, int N) + { + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index > N - 1) + { + return; + } + int mask = 1 << bitOrder; + int masked_num = bufInputData[index] & mask; + int thebit = masked_num >> bitOrder; + bufBit[index] = thebit; + } + + __global__ void PopulateNotBits(int *bitNotBits, const int* bufBits, int N) + { + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index > N - 1) + { + return; + } + if (bufBits[index] == 0) + { + bitNotBits[index] = 1; + return; + } + bitNotBits[index] = 0; + } + + __global__ void CopyAnswerToInputBuf(int* BufAnswer, int* ScatterBuffer, int* InputBuf, int N) + { + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index > N - 1) + { + return; + } + BufAnswer[ScatterBuffer[index]] = InputBuf[index]; + } + + + __global__ void ComputeTArray(int* outputBuf, int *falseBuf, int totalFalses, int N) + { + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index > N - 1) + { + return; + } + outputBuf[index] = index - falseBuf[index] + totalFalses; + } + + __global__ void PerformScatter(int* outputBuf, int* inputBuf, int* bitBuf, int*falseBuf, int *trueBuf, int N) + { + int index = threadIdx.x + (blockIdx.x * blockDim.x); + if (index > N - 1) + { + return; + } + if (bitBuf[index]) + { + outputBuf[index] = trueBuf[index]; + return; + } + outputBuf[index] = falseBuf[index]; + + } + + + void PerformThrustSort(int n, int* odata, const int* idata) + { + thrust::host_vectorhstIn(idata, idata + n); + timer().startGpuTimer(); + // TODO use `thrust::exclusive_scan` + // example: for device_vectors dv_in and dv_out: + // thrust::exclusive_scan(dv_in.begin(), dv_in.end(), dv_out.begin()); + thrust::sort(hstIn.begin(), hstIn.end()); + + thrust::copy(hstIn.begin(), hstIn.end(), odata); + + timer().endGpuTimer(); + } + + + + void PerformGPUSort(int n, int* odata, const int* idata) + { + AllocateMemory(n); + cudaMemcpy(dev_buf, idata, sizeof(int) * n, cudaMemcpyHostToDevice); + + dim3 fullBlocksPerGrid((n + blockSize - 1) / blockSize); + timer().startGpuTimer(); + for (int i = 0; i < 6; i++) + { + PopulateBits << < fullBlocksPerGrid, blockSize >> > (i, dev_buf, bufBit, n); + cudaDeviceSynchronize(); + PopulateNotBits << < fullBlocksPerGrid, blockSize >> > (bufNotBits, bufBit, n); + cudaDeviceSynchronize(); + + int* inputNotBits= new int[n]; + cudaMemcpy(inputNotBits, bufNotBits, n * sizeof(int), cudaMemcpyDeviceToHost); + Efficient::scan(n, odata, inputNotBits); + cudaMemcpy(falseBuf, odata, n * sizeof(int), cudaMemcpyHostToDevice); + + int TotalFalses = inputNotBits[n - 1] + odata[n - 1]; + ComputeTArray << < fullBlocksPerGrid, blockSize >> > (trueBuf, falseBuf, TotalFalses, n); + cudaDeviceSynchronize(); + PerformScatter << < fullBlocksPerGrid, blockSize >> > (bufScatter, dev_buf, bufBit, falseBuf, trueBuf, n); + cudaDeviceSynchronize(); + CopyAnswerToInputBuf << < fullBlocksPerGrid, blockSize >> > (bufAnswer, bufScatter, dev_buf, n); + cudaDeviceSynchronize(); + std::swap(dev_buf, bufAnswer); + cudaDeviceSynchronize(); + } + timer().endGpuTimer(); + cudaMemcpy(odata, dev_buf, sizeof(int) * n, cudaMemcpyDeviceToHost); + } + + } +} \ No newline at end of file diff --git a/stream_compaction/RadixSort.h b/stream_compaction/RadixSort.h new file mode 100644 index 00000000..ae50e376 --- /dev/null +++ b/stream_compaction/RadixSort.h @@ -0,0 +1,13 @@ +#pragma once +#include "common.h" +#include "efficient.h" + +namespace StreamCompaction { + namespace RadixSort { + StreamCompaction::Common::PerformanceTimer& timer(); + + + void PerformThrustSort(int n, int* odata, const int* idata); + void PerformGPUSort(int n, int* odata, const int* idata); + } +} \ No newline at end of file diff --git a/stream_compaction/common.cu b/stream_compaction/common.cu new file mode 100644 index 00000000..13f2f09f --- /dev/null +++ b/stream_compaction/common.cu @@ -0,0 +1,93 @@ +#pragma once +#include "common.h" + +void checkCUDAErrorFn(const char *msg, const char *file, int line) { + cudaError_t err = cudaGetLastError(); + if (cudaSuccess == err) { + return; + } + + fprintf(stderr, "CUDA error"); + if (file) { + fprintf(stderr, " (%s:%d)", file, line); + } + fprintf(stderr, ": %s: %s\n", msg, cudaGetErrorString(err)); + exit(EXIT_FAILURE); +} + + +namespace StreamCompaction { + namespace Common { + + /** + * Maps an array to an array of 0s and 1s for stream compaction. Elements + * which map to 0 will be removed, and elements which map to 1 will be kept. + */ + __global__ void kernMapToBoolean(int n, int *bools, const int *idata) { + // TODO + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index > n - 1) + { + return; + } + if (idata[index] == 0) + { + bools[index] = 0; + return; + } + bools[index] = 1; + } + + __global__ void kernMapToBooleanPathTracer(int n, int* bools, const PathSegment* idata) { + // TODO + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index > n - 1) + { + return; + } + if (idata[index].remainingBounces == 0) + { + bools[index] = 0; + return; + } + bools[index] = 1; + } + + /** + * Performs scatter on an array. That is, for each element in idata, + * if bools[idx] == 1, it copies idata[idx] to odata[indices[idx]]. + */ + __global__ void kernScatter(int n, int *odata, + const int *idata, const int *bools, const int *indices) { + // TODO + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index > n - 1) + { + return; + } + + if (bools[index] == 0) + { + return; + } + odata[indices[index]] = idata[index]; + } + + __global__ void kernScatterPathTracer(int n, PathSegment* odata, + const PathSegment* idata, const int* bools, const int* indices) { + // TODO + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index > n - 1) + { + return; + } + + if (bools[index] == 0) + { + return; + } + odata[indices[index]] = idata[index]; + } + + } +} diff --git a/stream_compaction/common.h b/stream_compaction/common.h new file mode 100644 index 00000000..48ef9083 --- /dev/null +++ b/stream_compaction/common.h @@ -0,0 +1,136 @@ +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include "../src/sceneStructs.h" + +#define FILENAME (strrchr(__FILE__, '/') ? strrchr(__FILE__, '/') + 1 : __FILE__) +#define checkCUDAError(msg) checkCUDAErrorFn(msg, FILENAME, __LINE__) + +/** + * Check for CUDA errors; print and exit if there was a problem. + */ +void checkCUDAErrorFn(const char *msg, const char *file = NULL, int line = -1); + +inline int ilog2(int x) { + int lg = 0; + while (x >>= 1) { + ++lg; + } + return lg; +} + +inline int ilog2ceil(int x) { + return x == 1 ? 0 : ilog2(x - 1) + 1; +} + +namespace StreamCompaction { + namespace Common { + __global__ void kernMapToBoolean(int n, int *bools, const int *idata); + __global__ void kernMapToBooleanPathTracer(int n, int* bools, const PathSegment* idata); + __global__ void kernScatterPathTracer(int n, PathSegment* odata, + const PathSegment* idata, const int* bools, const int* indices); + + __global__ void kernScatter(int n, int *odata, + const int *idata, const int *bools, const int *indices); + + /** + * This class is used for timing the performance + * Uncopyable and unmovable + * + * Adapted from WindyDarian(https://github.com/WindyDarian) + */ + class PerformanceTimer + { + public: + PerformanceTimer() + { + cudaEventCreate(&event_start); + cudaEventCreate(&event_end); + } + + ~PerformanceTimer() + { + cudaEventDestroy(event_start); + cudaEventDestroy(event_end); + } + + void startCpuTimer() + { + if (cpu_timer_started) { throw std::runtime_error("CPU timer already started"); } + cpu_timer_started = true; + + time_start_cpu = std::chrono::high_resolution_clock::now(); + } + + void endCpuTimer() + { + time_end_cpu = std::chrono::high_resolution_clock::now(); + + if (!cpu_timer_started) { throw std::runtime_error("CPU timer not started"); } + + std::chrono::duration duro = time_end_cpu - time_start_cpu; + prev_elapsed_time_cpu_milliseconds = + static_cast(duro.count()); + + cpu_timer_started = false; + } + + void startGpuTimer() + { + if (gpu_timer_started) { throw std::runtime_error("GPU timer already started"); } + gpu_timer_started = true; + + cudaEventRecord(event_start); + } + + void endGpuTimer() + { + cudaEventRecord(event_end); + cudaEventSynchronize(event_end); + + if (!gpu_timer_started) { throw std::runtime_error("GPU timer not started"); } + + cudaEventElapsedTime(&prev_elapsed_time_gpu_milliseconds, event_start, event_end); + gpu_timer_started = false; + } + + float getCpuElapsedTimeForPreviousOperation() //noexcept //(damn I need VS 2015 + { + return prev_elapsed_time_cpu_milliseconds; + } + + float getGpuElapsedTimeForPreviousOperation() //noexcept + { + return prev_elapsed_time_gpu_milliseconds; + } + + // remove copy and move functions + PerformanceTimer(const PerformanceTimer&) = delete; + PerformanceTimer(PerformanceTimer&&) = delete; + PerformanceTimer& operator=(const PerformanceTimer&) = delete; + PerformanceTimer& operator=(PerformanceTimer&&) = delete; + + private: + cudaEvent_t event_start = nullptr; + cudaEvent_t event_end = nullptr; + + using time_point_t = std::chrono::high_resolution_clock::time_point; + time_point_t time_start_cpu; + time_point_t time_end_cpu; + + bool cpu_timer_started = false; + bool gpu_timer_started = false; + + float prev_elapsed_time_cpu_milliseconds = 0.f; + float prev_elapsed_time_gpu_milliseconds = 0.f; + }; + } +} diff --git a/stream_compaction/cpu.cu b/stream_compaction/cpu.cu new file mode 100644 index 00000000..1643c05b --- /dev/null +++ b/stream_compaction/cpu.cu @@ -0,0 +1,128 @@ +#include +#include "cpu.h" + +#include "common.h" + +namespace StreamCompaction { + namespace CPU { + using StreamCompaction::Common::PerformanceTimer; + PerformanceTimer& timer() + { + static PerformanceTimer timer; + return timer; + } + + /** + * CPU scan (prefix sum). + * For performance analysis, this is supposed to be a simple for loop. + * (Optional) For better understanding before starting moving to GPU, you can simulate your GPU scan in this function first. + */ + void scan(int n, int *odata, const int *idata) { + + timer().startCpuTimer(); + // TODO + + odata[0] = 0; + for (int i = 1; i < n; i++) + { + odata[i] = odata[i - 1] + idata[i-1]; + } + + timer().endCpuTimer(); + } + + /** + * CPU stream compaction without using the scan function. + * + * @returns the number of elements remaining after compaction. + */ + int compactWithoutScan(int n, int *odata, const int *idata) { + timer().startCpuTimer(); + // TODO + int outIndex=0; + for (int i = 0; i < n; i++) + { + if (idata[i] != 0) + { + odata[outIndex] = idata[i]; + outIndex++; + } + } + + timer().endCpuTimer(); + return outIndex; + } + + /** + * CPU stream compaction using scan and scatter, like the parallel version. + * + * @returns the number of elements remaining after compaction. + */ + int compactWithScan(int n, int *odata, const int *idata) { + + int *arr_b = new int[n]; + int *arr_c = new int[n]; // exclusive array + timer().startCpuTimer(); + + + int *ans = new int[n]; + + int finalIndex = 0; +#pragma region NonPower2 + + int *arr_z = new int[n]; + + int nearesttwo = 1 << ilog2ceil(n); + + + int difference = nearesttwo - n; + + for (int i = 0; i < difference; i++) + { + arr_z[i] = 0; + } + + + for (int i = 0; i < n; i++) + { + arr_z[i + difference] = idata[i]; + } + + n = n + difference; + + for (int i = 0; i < n; i++) + { + if (arr_z[i] == 0) + { + arr_b[i] = 0; + continue; + } + arr_b[i] = 1; + } + + arr_c[0] = 0; + for (int i = 1; i < n; i++) + { + arr_c[i] = arr_b[i - 1] + arr_c[i - 1]; + } + + for (int i = 0; i < n; i++) + { + if (arr_b[i] == 0) + { + continue; + } + int index = arr_c[i]; + odata[index] = idata[i]; + ans[index] = idata[i]; + finalIndex = index; + } + + + +#pragma endregion + timer().endCpuTimer(); + return finalIndex+1; + } + } +} diff --git a/stream_compaction/cpu.h b/stream_compaction/cpu.h new file mode 100644 index 00000000..873c0476 --- /dev/null +++ b/stream_compaction/cpu.h @@ -0,0 +1,15 @@ +#pragma once + +#include "common.h" + +namespace StreamCompaction { + namespace CPU { + StreamCompaction::Common::PerformanceTimer& timer(); + + void scan(int n, int *odata, const int *idata); + + int compactWithoutScan(int n, int *odata, const int *idata); + + int compactWithScan(int n, int *odata, const int *idata); + } +} diff --git a/stream_compaction/efficient.cu b/stream_compaction/efficient.cu new file mode 100644 index 00000000..f0970485 --- /dev/null +++ b/stream_compaction/efficient.cu @@ -0,0 +1,320 @@ +#pragma once +#include +#include +#include "efficient.h" + +namespace StreamCompaction { + namespace Efficient { + using StreamCompaction::Common::PerformanceTimer; + PerformanceTimer& timer() + { + static PerformanceTimer timer; + return timer; + } +#define checkCUDAErrorWithLine(msg) checkCUDAError(msg, __LINE__) // We can use defines provided in this project + + int* dev_buf; + int* dev_bufloader; + int* dev_bufB; + int* dev_bufS; + int* dev_bufAnswers; + + PathSegment* dev_PTbuf; + PathSegment* dev_PTbufAnswers; + +#define blockSize 512 + + __global__ void performUpSweep(int d, int* buf, int N) + { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + int pow2_d = 1 << d; + int pow2_dplus1 = 1 << (d + 1); + if (index + pow2_dplus1 - 1 > N - 1) + { + return; + } + + if ((index + pow2_dplus1 - 1) % pow2_dplus1 == (pow2_dplus1 - 1)) + { + buf[index + pow2_dplus1 - 1] += buf[index + pow2_d - 1]; + } + + if (index + pow2_dplus1 - 1 == N - 1) + { + buf[index + pow2_dplus1 - 1] = 0; + return; + } + + } + + __global__ void performDownSweep(int d, int* buf, int N) + { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + int pow2_d = 1< N - 1) + { + return; + } + + if ((index + pow2_dplus1 - 1) % pow2_dplus1 == (pow2_dplus1 - 1)) + { + + int t = buf[index + pow2_d - 1]; + buf[index + pow2_d - 1] = buf[index + pow2_dplus1 - 1]; + buf[index + pow2_dplus1 - 1] += t; + } + } + + void AllocateMemory(int n) + { + cudaMalloc((void**)&dev_buf, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_buf failed!"); + cudaMalloc((void**)&dev_bufloader, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_bufloader failed!"); + cudaMalloc((void**)&dev_bufB, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_bufB failed!"); + cudaMalloc((void**)&dev_bufS, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_bufS failed!"); + cudaMalloc((void**)&dev_bufAnswers, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_bufAnswers failed!"); + cudaDeviceSynchronize(); + } + + void AllocateMemoryPathTracer(int n) + { + cudaMalloc((void**)&dev_PTbuf, n * sizeof(PathSegment)); + checkCUDAErrorWithLine("cudaMalloc dev_buf failed!"); + cudaMalloc((void**)&dev_bufloader, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_bufloader failed!"); + cudaMalloc((void**)&dev_bufB, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_bufB failed!"); + cudaMalloc((void**)&dev_bufS, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_bufS failed!"); + cudaMalloc((void**)&dev_PTbufAnswers, n * sizeof(PathSegment)); + checkCUDAErrorWithLine("cudaMalloc dev_bufAnswers failed!"); + cudaDeviceSynchronize(); + } + + void FreeMemory() { + cudaFree(dev_buf); + cudaFree(dev_bufloader); + cudaFree(dev_bufB); + cudaFree(dev_bufS); + cudaFree(dev_bufAnswers); + cudaFree(dev_PTbuf); + cudaFree(dev_PTbufAnswers); + + } + + __global__ void RightShiftAddZeros(int* buf,int *buf_loader, int N, int difference) + { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index > N - 1) + { + return; + } + if (index < difference) + { + buf[index] = 0; + return; + } + buf[index] = buf_loader[index-difference]; + } + + __global__ void RightShiftDeleteZeroes(int* buf, int* buf_loader, int N, int difference) + { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index > N - 1) + { + return; + } + buf[index] = buf_loader[index + difference]; + } + + /** + * Performs prefix-sum (aka scan) on idata, storing the result into odata. + */ + void scan(int n, int* odata, const int* idata) { + timer().startGpuTimer(); + // TODO + + int nearesttwo = 1<> > (dev_buf, dev_bufloader, finalMemSize, difference); + + int d = ilog2ceil(finalMemSize); + + for (int i = 0; i <= d - 1; i++) + { + performUpSweep << < fullBlocksPerGrid, blockSize >> > (i, dev_buf, finalMemSize); + } + + + for (int i = d - 1; i >= 0; i--) + { + performDownSweep << < fullBlocksPerGrid, blockSize >> > (i, dev_buf, finalMemSize); + } + + RightShiftDeleteZeroes << < fullBlocksPerGrid, blockSize >> > (dev_bufloader, dev_buf, n, difference); + cudaDeviceSynchronize(); + cudaMemcpy(odata, dev_bufloader, sizeof(int) * n, cudaMemcpyDeviceToHost); + + timer().endGpuTimer(); + + FreeMemory(); + cudaDeviceSynchronize(); + } + + int* dev_buf2; + int* dev_bufloader2; + + void AllocateMemory2(int n) + { + cudaMalloc((void**)&dev_buf2, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_buf failed!"); + cudaMalloc((void**)&dev_bufloader2, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_bufloader failed!"); + + } + + void FreeMemory2() { + cudaFree(dev_buf2); + cudaFree(dev_bufloader2); + } + + + void scanWithoutTimer(int n, int* odata, const int* idata) { + // TODO + int nearesttwo = 1 << ilog2ceil(n); + + int difference = nearesttwo - n; + + int finalMemSize = nearesttwo; + + AllocateMemory2(finalMemSize); + dim3 fullBlocksPerGrid((finalMemSize + blockSize - 1) / blockSize); + cudaMemcpy(dev_bufloader2, idata, sizeof(int) * n, cudaMemcpyHostToDevice); + RightShiftAddZeros << < fullBlocksPerGrid, blockSize >> > (dev_buf2, dev_bufloader2, finalMemSize, difference); + + int d = ilog2ceil(finalMemSize); + + for (int i = 0; i <= d - 1; i++) + { + performUpSweep << < fullBlocksPerGrid, blockSize >> > (i, dev_buf2, finalMemSize); + } + + for (int i = d - 1; i >= 0; i--) + { + performDownSweep << < fullBlocksPerGrid, blockSize >> > (i, dev_buf2, finalMemSize); + } + + RightShiftDeleteZeroes << < fullBlocksPerGrid, blockSize >> > (dev_bufloader2, dev_buf2, n, difference); + cudaMemcpy(odata, dev_bufloader2, sizeof(int) * n, cudaMemcpyDeviceToHost); + FreeMemory2(); + } + + /** + * Performs stream compaction on idata, storing the result into odata. + * All zeroes are discarded. + * + * @param n The number of elements in idata. + * @param odata The array into which to store elements. + * @param idata The array of elements to compact. + * @returns The number of elements remaining after compaction. + */ + int compact(int n, int* odata, const int* idata) { + int finalMemSize = n; + AllocateMemory(finalMemSize); + + + timer().startGpuTimer(); + // TODO + cudaMemcpy(dev_buf, idata, sizeof(int) * finalMemSize, cudaMemcpyHostToDevice); + + dim3 fullBlocksPerGrid((finalMemSize + blockSize - 1) / blockSize); + + + Common::kernMapToBoolean << < fullBlocksPerGrid, blockSize >> > (finalMemSize, dev_bufB, dev_buf); + + //Copy bool value to new array to perform scan + int* arr_boolean = new int[finalMemSize]; + cudaMemcpy(arr_boolean, dev_bufB, sizeof(int) * finalMemSize, cudaMemcpyDeviceToHost); + + //Create new array to store answers from scan + int* arr_scanResult = new int[finalMemSize]; + scanWithoutTimer(finalMemSize, arr_scanResult, arr_boolean); + + //Copy the scan answers to Dev_BufS to further process + cudaMemcpy(dev_bufS, arr_scanResult, sizeof(int) * finalMemSize, cudaMemcpyHostToDevice); + + int numElements = arr_scanResult[finalMemSize - 1]; + + Common::kernScatter << < fullBlocksPerGrid, blockSize >> > (finalMemSize, dev_bufAnswers, dev_buf, + dev_bufB, dev_bufS); + + cudaMemcpy(odata, dev_bufAnswers, sizeof(int) * finalMemSize, cudaMemcpyDeviceToHost); + + timer().endGpuTimer(); + FreeMemory(); + + if (arr_boolean[finalMemSize - 1] == 1) + { + return numElements + 1; // Since indexing start from 0 + } + return numElements; //if last element boolean is 0 its scan result include 1 extra sum counting for 0 index + } + + + int compact(int n, PathSegment* dev_iPathSegment) { + int finalMemSize = n; + AllocateMemoryPathTracer(finalMemSize); + + + //timer().startGpuTimer(); + // TODO + cudaMemcpy(dev_PTbuf, dev_iPathSegment, sizeof(PathSegment) * finalMemSize, cudaMemcpyDeviceToDevice); + + dim3 fullBlocksPerGrid((finalMemSize + blockSize - 1) / blockSize); + + + Common::kernMapToBooleanPathTracer << < fullBlocksPerGrid, blockSize >> > (finalMemSize, dev_bufB, dev_PTbuf); + + //Copy bool value to new array to perform scan + int* arr_boolean = new int[finalMemSize]; + cudaMemcpy(arr_boolean, dev_bufB, sizeof(int) * finalMemSize, cudaMemcpyDeviceToHost); + + //Create new array to store answers from scan + int* arr_scanResult = new int[finalMemSize]; + scanWithoutTimer(finalMemSize, arr_scanResult, arr_boolean); + + //Copy the scan answers to Dev_BufS to further process + cudaMemcpy(dev_bufS, arr_scanResult, sizeof(int) * finalMemSize, cudaMemcpyHostToDevice); + + int numElements = arr_scanResult[finalMemSize - 1]; + + Common::kernScatterPathTracer << < fullBlocksPerGrid, blockSize >> > (finalMemSize, dev_PTbufAnswers, dev_PTbuf, + dev_bufB, dev_bufS); + + cudaMemcpy(dev_iPathSegment, dev_PTbufAnswers, sizeof(PathSegment) * finalMemSize, cudaMemcpyDeviceToDevice); + + //timer().endGpuTimer(); + FreeMemory(); + + if (arr_boolean[finalMemSize - 1] == 1) + { + return numElements + 1; // Since indexing start from 0 + } + return numElements; //if last element boolean is 0 its scan result include 1 extra sum counting for 0 index + } + } +} diff --git a/stream_compaction/efficient.h b/stream_compaction/efficient.h new file mode 100644 index 00000000..775eb92c --- /dev/null +++ b/stream_compaction/efficient.h @@ -0,0 +1,14 @@ +#pragma once + +#include "common.h" + +namespace StreamCompaction { + namespace Efficient { + StreamCompaction::Common::PerformanceTimer& timer(); + + void scan(int n, int *odata, const int *idata); + + //int compact(int n, int *odata, const int *idata); + int compact(int n, PathSegment *iPathSegment); + } +} diff --git a/stream_compaction/naive.cu b/stream_compaction/naive.cu new file mode 100644 index 00000000..3b5d4c02 --- /dev/null +++ b/stream_compaction/naive.cu @@ -0,0 +1,149 @@ +#include +#include +#include "common.h" +#include "naive.h" +namespace StreamCompaction { + namespace Naive { + +#define checkCUDAErrorWithLine(msg) checkCUDAError(msg, __LINE__) // We can use defines provided in this project + + + using StreamCompaction::Common::PerformanceTimer; + PerformanceTimer& timer() + { + static PerformanceTimer timer; + return timer; + } + // TODO: __global__ + int* dev_buf1; + int* dev_buf2; + int* dev_bufLoader; +#define blockSize 512 + + __global__ void performScan(int d, int* buf1, int* buf2, int N) + { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index > N - 1) + { + return; + } + + //int pow2_dminus1 = std::round(pow(2, d - 1)); + int pow2_dminus1 = 1 <<(d - 1); + if (index >= pow2_dminus1) + { + buf2[index] = buf1[index - pow2_dminus1] + buf1[index]; + } + else + { + buf2[index] = buf1[index]; + } + + } + + __global__ void ShiftRight(int* buf1, int* buf2, int N, int difference) + { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index > N - 1) + { + return; + } + if (index == 0) + { + buf2[index] = 0; + return; + } + buf2[index] = buf1[index - 1]; + + } + + void FreeMemory() { + cudaFree(dev_buf1); + cudaFree(dev_buf2); + cudaFree(dev_bufLoader); + } + + void AllocateMemory(int n) + { + cudaMalloc((void**)&dev_buf1, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_buf1 failed!"); + cudaMalloc((void**)&dev_buf2, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_buf2 failed!"); + cudaMalloc((void**)&dev_bufLoader, n * sizeof(int)); + checkCUDAErrorWithLine("cudaMalloc dev_bufLoader failed!"); + cudaDeviceSynchronize(); + } + + __global__ void RightShiftAddZeros(int* buf, int* buf_loader, int N, int difference) + { + int index = (blockIdx.x * blockDim.x) + threadIdx.x; + if (index > N - 1) + { + return; + } + if (index > (N-1) - difference) + { + buf[index] = 0; + return; + } + buf[index] = buf_loader[index]; + } + + + /** + * Performs prefix-sum (aka scan) on idata, storing the result into odata. + */ + void scan(int n, int* odata, const int* idata) { + timer().startGpuTimer(); + // TODO + + int power2 = 1; + int nearesttwo = 1 << ilog2ceil(n); + + int difference = nearesttwo - n; + + int finalMemSize = nearesttwo; + + + AllocateMemory(finalMemSize); + + + dim3 fullBlocksPerGrid((finalMemSize + blockSize - 1) / blockSize); + dim3 threadsperblockSize(blockSize); + + + cudaMemcpy(dev_bufLoader, idata, sizeof(int) * n, cudaMemcpyHostToDevice); + RightShiftAddZeros << < fullBlocksPerGrid, threadsperblockSize >> > (dev_buf1, dev_bufLoader, finalMemSize, difference); + + + int d = ilog2(finalMemSize); + for (int i = 1; i <= d; i++) + { + performScan << < fullBlocksPerGrid, threadsperblockSize >> > (i, dev_buf1, dev_buf2, finalMemSize); + //cudaDeviceSynchronize(); + std::swap(dev_buf1, dev_buf2); + } + + ShiftRight << < fullBlocksPerGrid, blockSize >> > (dev_buf1, dev_buf2, finalMemSize, difference); + cudaMemcpy(odata, dev_buf2, sizeof(int) * n, cudaMemcpyDeviceToHost); + + + /*printf(" \n Array After:");*/ + /*for (int i = 0; i < finalMemSize; i++) + { + printf("%3d ", arr_z[i]); + }*/ + + /* printf("]\n"); + for (int i = 0; i < n; i++) + { + printf("%3d ", odata[i]); + }*/ + + + timer().endGpuTimer(); + cudaDeviceSynchronize(); + FreeMemory(); + } + } +} diff --git a/stream_compaction/naive.h b/stream_compaction/naive.h new file mode 100644 index 00000000..37dcb064 --- /dev/null +++ b/stream_compaction/naive.h @@ -0,0 +1,11 @@ +#pragma once + +#include "common.h" + +namespace StreamCompaction { + namespace Naive { + StreamCompaction::Common::PerformanceTimer& timer(); + + void scan(int n, int *odata, const int *idata); + } +} diff --git a/stream_compaction/thrust.cu b/stream_compaction/thrust.cu new file mode 100644 index 00000000..fc2d00e2 --- /dev/null +++ b/stream_compaction/thrust.cu @@ -0,0 +1,36 @@ +#include +#include +#include +#include +#include +#include "common.h" +#include "thrust.h" + +namespace StreamCompaction { + namespace Thrust { + using StreamCompaction::Common::PerformanceTimer; + PerformanceTimer& timer() + { + static PerformanceTimer timer; + return timer; + } + /** + * Performs prefix-sum (aka scan) on idata, storing the result into odata. + */ + void scan(int n, int *odata, const int *idata) { + + thrust::host_vectorhstIn(idata, idata + n); + thrust::device_vectordv_in = hstIn; + thrust::device_vectordv_out(n); + timer().startGpuTimer(); + // TODO use `thrust::exclusive_scan` + // example: for device_vectors dv_in and dv_out: + // thrust::exclusive_scan(dv_in.begin(), dv_in.end(), dv_out.begin()); + thrust::exclusive_scan(dv_in.begin(), dv_in.end(), dv_out.begin()); + + thrust::copy(dv_out.begin(), dv_out.end(), odata); + + timer().endGpuTimer(); + } + } +} diff --git a/stream_compaction/thrust.h b/stream_compaction/thrust.h new file mode 100644 index 00000000..fe98206b --- /dev/null +++ b/stream_compaction/thrust.h @@ -0,0 +1,11 @@ +#pragma once + +#include "common.h" + +namespace StreamCompaction { + namespace Thrust { + StreamCompaction::Common::PerformanceTimer& timer(); + + void scan(int n, int *odata, const int *idata); + } +}