diff --git a/README.md b/README.md index bec6ca4..81dc3a1 100644 --- a/README.md +++ b/README.md @@ -3,13 +3,42 @@ Vulkan Flocking: compute and shading in one pipeline! **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 6** -* (TODO) YOUR NAME HERE - Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab) +* Name: Zhan Xiong Chin +* Tested on: Windows 7 Professional, i7-4770 @ 3.40GHz 3.40GHz 16.0GB, Nvidia Quadro K600 1GB (SIG LAB) + +![](img/boids.gif) + +### Build instructions +[See here](https://github.com/CIS565-Fall-2016/Project0-CUDA-Getting-Started/blob/master/INSTRUCTION.md). Also needs [LunarG Vulkan SDK](https://vulkan.lunarg.com/). + +### Introduction + +This is a Vulkan version of a 2D boids simulation (see [here](https://github.com/czxcjx/Project1-CUDA-Flocking) for a 3D CUDA version). Each particle is influenced by 3 different rules: + +* cohesion - boids move towards the center of mass of their neighbors +* separation - boids move away from each of their neighbors +* alignment - boids adjust their velocity to the mean velocity of their neighbors + +# Performance analysis + +![](img/boids_chart.png) + +The algorithm implemented is a straightforward naive computation: it parallelizes over each particle, which loops over every other particle and updates its velocity in accordance to the 3 rules. As expected, since the naive algorithm needs O(N^2) time to do the velocity updates, the time required per frame increases by 4 times + +Performance can be improved significantly by using space partitioning data structures (e.g. uniform grid), but these were not implemented in this version. For such a reference, see the 3D CUDA version linked above. + +# Answers to questions + +* Why do you think Vulkan expects explicit descriptors for things like generating pipelines and commands? + * This allows for greater control over how Vulkan executes code. +* Describe a situation besides flip-flop buffers in which you may need multiple descriptor sets to fit one descriptor layout. + * Debug views may be able to make use of multiple descriptor sets, with different buffers (e.g. position, normals, depth) being mapped to colors. +* What are some problems to keep in mind when using multiple Vulkan queues? + * There may be different limits on the number of allowed Vulkan queues for different systems, so code will have to take that into consideration. Furthermore, synchronization between the different queues may be an issue, as different parts of each queue may have dependencies on each other. +* What is one advantage of using compute commands that can share data with a rendering pipeline? + * We can perform computations (e.g. updating velocity, position) at the same time as rendering data to screen. - ### (TODO: Your README) - Include screenshots, analysis, etc. (Remember, this is public, so don't put - anything here that you don't want to share with the world.) ### Credits diff --git a/base/vulkanexamplebase.h b/base/vulkanexamplebase.h index a30387e..e088f15 100644 --- a/base/vulkanexamplebase.h +++ b/base/vulkanexamplebase.h @@ -132,8 +132,8 @@ class VulkanExampleBase const std::string getAssetPath(); public: bool prepared = false; - uint32_t width = 1280; - uint32_t height = 720; + uint32_t width = 800; + uint32_t height = 600; VkClearColorValue defaultClearColor;// = { { 0.025f, 0.025f, 0.025f, 1.0f } }; diff --git a/data/shaders/computeparticles/particle.comp b/data/shaders/computeparticles/particle.comp index b7dc2f7..4a63700 100644 --- a/data/shaders/computeparticles/particle.comp +++ b/data/shaders/computeparticles/particle.comp @@ -43,32 +43,60 @@ layout (binding = 2) uniform UBO void main() { - // LOOK: This is very similar to a CUDA kernel. - // Right now, the compute shader only advects the particles with their - // velocity and handles wrap-around. - // TODO: implement flocking behavior. + // LOOK: This is very similar to a CUDA kernel. + // Right now, the compute shader only advects the particles with their + // velocity and handles wrap-around. // Current SSBO index uint index = gl_GlobalInvocationID.x; // Don't try to write beyond particle count - if (index >= ubo.particleCount) + if (index >= ubo.particleCount) { return; + } // Read position and velocity - vec2 vPos = particlesA[index].pos.xy; + vec2 vPos = particlesA[index].pos.xy; vec2 vVel = particlesA[index].vel.xy; + + float nearbyCount = 0.0; + vec2 sumPosition = vec2(0.0, 0.0); + vec2 sumStayAway = vec2(0.0, 0.0); + vec2 sumVelocity = vec2(0.0, 0.0); + + for (int i = 0; i < ubo.particleCount; i++) { + if (i == index) { + continue; + } + float dist = distance(vPos, particlesA[i].pos.xy); + if (dist < ubo.rule1Distance) { + nearbyCount += 1.0; + sumPosition += particlesA[i].pos; + } + if (dist < ubo.rule2Distance) { + sumStayAway += vPos - particlesA[i].pos; + } + if (dist < ubo.rule3Distance) { + sumVelocity += particlesA[i].vel; + } + } + if (nearbyCount > 0.0) { + sumPosition = sumPosition / nearbyCount; + vVel += (sumPosition - vPos) * ubo.rule1Scale; + } + vVel += sumStayAway * ubo.rule2Scale; + vVel += sumVelocity * ubo.rule3Scale; - // clamp velocity for a more pleasing simulation. - vVel = normalize(vVel) * clamp(length(vVel), 0.0, 0.1); + // clamp velocity for a more pleasing simulation. + vVel = normalize(vVel) * clamp(length(vVel), 0.0, 0.1); - // kinematic update - vPos += vVel * ubo.deltaT; + // kinematic update + vPos += vVel * ubo.deltaT; // Wrap around boundary - if (vPos.x < -1.0) vPos.x = 1.0; - if (vPos.x > 1.0) vPos.x = -1.0; - if (vPos.y < -1.0) vPos.y = 1.0; - if (vPos.y > 1.0) vPos.y = -1.0; + if (vPos.x < -1.0) vPos.x = 1.0; + if (vPos.x > 1.0) vPos.x = -1.0; + if (vPos.y < -1.0) vPos.y = 1.0; + if (vPos.y > 1.0) vPos.y = -1.0; particlesB[index].pos.xy = vPos; diff --git a/data/shaders/computeparticles/particle.comp.spv b/data/shaders/computeparticles/particle.comp.spv index 059ab59..8721985 100644 Binary files a/data/shaders/computeparticles/particle.comp.spv and b/data/shaders/computeparticles/particle.comp.spv differ diff --git a/img/boids.gif b/img/boids.gif new file mode 100644 index 0000000..ee35471 Binary files /dev/null and b/img/boids.gif differ diff --git a/img/boids_chart.png b/img/boids_chart.png new file mode 100644 index 0000000..205c23d Binary files /dev/null and b/img/boids_chart.png differ diff --git a/vulkanBoids/vulkanBoids.cpp b/vulkanBoids/vulkanBoids.cpp index 9b2f122..e472fc1 100644 --- a/vulkanBoids/vulkanBoids.cpp +++ b/vulkanBoids/vulkanBoids.cpp @@ -27,7 +27,7 @@ #include "vulkanexamplebase.h" #define VERTEX_BUFFER_BIND_ID 0 -#define ENABLE_VALIDATION true // LOOK: toggle Vulkan validation layers. These make debugging much easier! +#define ENABLE_VALIDATION false // LOOK: toggle Vulkan validation layers. These make debugging much easier! #define PARTICLE_COUNT 4 * 1024 // LOOK: change particle count here // LOOK: constants for the boids algorithm. These will be passed to the GPU compute part of the assignment @@ -157,7 +157,8 @@ class VulkanExample : public VulkanExampleBase for (auto& particle : particleBuffer) { particle.pos = glm::vec2(rDistribution(rGenerator), rDistribution(rGenerator)); - // TODO: add randomized velocities with a slight scale here, something like 0.1f. + // DONE: add randomized velocities with a slight scale here, something like 0.1f. + particle.vel = glm::vec2(rDistribution(rGenerator), rDistribution(rGenerator)) * 0.1f; } VkDeviceSize storageBufferSize = particleBuffer.size() * sizeof(Particle); @@ -244,7 +245,7 @@ class VulkanExample : public VulkanExampleBase VERTEX_BUFFER_BIND_ID, 1, VK_FORMAT_R32G32_SFLOAT, - offsetof(Particle, pos)); // TODO: change this so that we can color the particles based on velocity. + offsetof(Particle, vel)); // DONE: change this so that we can color the particles based on velocity. // vertices.inputState encapsulates everything we need for these particular buffers to // interface with the graphics pipeline. @@ -540,13 +541,30 @@ class VulkanExample : public VulkanExampleBase compute.descriptorSets[0], VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2, - &compute.uniformBuffer.descriptor) + &compute.uniformBuffer.descriptor), - // TODO: write the second descriptorSet, using the top for reference. + // DONE: write the second descriptorSet, using the top for reference. // We want the descriptorSets to be used for flip-flopping: // on one frame, we use one descriptorSet with the compute pass, // on the next frame, we use the other. // What has to be different about how the second descriptorSet is written here? + vkTools::initializers::writeDescriptorSet( + compute.descriptorSets[1], + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + 0, + &compute.storageBufferB.descriptor), + + vkTools::initializers::writeDescriptorSet( + compute.descriptorSets[1], + VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, + 1, + &compute.storageBufferA.descriptor), + + vkTools::initializers::writeDescriptorSet( + compute.descriptorSets[1], + VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, + 2, + &compute.uniformBuffer.descriptor) }; vkUpdateDescriptorSets(device, static_cast(computeWriteDescriptorSets.size()), computeWriteDescriptorSets.data(), 0, NULL); @@ -583,13 +601,15 @@ class VulkanExample : public VulkanExampleBase // are done executing. VK_CHECK_RESULT(vkQueueSubmit(compute.queue, 1, &computeSubmitInfo, compute.fence)); - // TODO: handle flip-flop logic. We want the next iteration to + // DONE: handle flip-flop logic. We want the next iteration to // run the compute pipeline with flipped SSBOs, so we have to // swap the descriptorSets, which each allow access to the SSBOs // in one configuration. // We also want to flip what SSBO we draw with in the next // pass through the graphics pipeline. // Feel free to use std::swap here. You should need it twice. + std::swap(compute.descriptorSets[0], compute.descriptorSets[1]); + std::swap(compute.storageBufferA, compute.storageBufferB); } // Record command buffers for drawing using the graphics pipeline