CIS565-Fall-2016 · xnieamo · Nov 15, 2016 · Nov 16, 2016 · Nov 16, 2016 · Nov 16, 2016
diff --git a/README.md b/README.md
@@ -3,13 +3,32 @@ Vulkan Flocking: compute and shading in one pipeline!
 
 **University of Pennsylvania, CIS 565: GPU Programming and Architecture, Project 6**
 
-* (TODO) YOUR NAME HERE
-  Windows 22, i7-2222 @ 2.22GHz 22GB, GTX 222 222MB (Moore 2222 Lab)
+* Xiaomao Ding
+* Tested on: Windows 8.1, i7-4700MQ @ 2.40GHz 8.00GB, GT 750M 2047MB (Personal Computer)
 
-  ### (TODO: Your README)
+### Introduction
+This project implements a 2D version of the [Reynolds Boid Algorithm](http://www.red3d.com/cwr/boids/). This algorithm simulates flocks of birds or schools of fish moving in groups. The primary purpose of this project is to explore the Vulkan pipeline and see its differences from OpenGL. This code only implements a naive version of this algorithm. The project in [this repository](https://github.com/xnieamo/Project1-CUDA-Flocking) implements a 3D version of the algorithm in CUDA and provides a series of performance analysis as well.
 
-  Include screenshots, analysis, etc. (Remember, this is public, so don't put
-  anything here that you don't want to share with the world.)
+<p align="center">
+  <img src="https://github.com/xnieamo/Project6-Vulkan-Flocking/blob/master/img/Boids.gif?raw=true">
+</p>
+
+### Questions
+- Why do you think Vulkan expects explicit descriptors for things like generating pipelines and commands?
+
+Vulkan puts data describing the pipelines and commands in GPU memory. Explicit descriptors allows the GPU to optimize memory usage for the commands during program execution.
+
+- Describe a situation besides flip-flop buffers in which you may need multiple descriptor sets to fit one descriptor layout.
+
+If we had a scene with many different textures, we might use multiple descriptors that each read from various textures.
+
+- What are some problems to keep in mind when using multiple Vulkan queues?
+
+Since the different queues can be backed by different hardware, we cannot guarantee that the assigned tasks in the different queues will finish at the same time. Therefore, to avoid the need of synchronization, the queues should be data independent. Additionally, the same buffer could also be used across different queues. This may lead to race conditions if two queues are writing to the buffer at the same time, or one queue may not have finished the calculations that another queue needs for its calculations.
+
+- What is one advantage of using compute commands that can share data with a rendering pipeline?
+
+Because compute commands can run in parallel with the graphics pipeline, sharing data allows calculations to be performed simultaneously while rendering instead of sequentially. This should provide a performance boost.
 
 ### Credits
 

diff --git a/data/shaders/computeparticles/particle.comp b/data/shaders/computeparticles/particle.comp
@@ -15,21 +15,21 @@ struct Particle
 // Binding 0 : Particle storage buffer (read)
 layout(std140, binding = 0) buffer ParticlesA
 {
-   Particle particlesA[ ];
+	Particle particlesA[];
 };
 
 // Binding 1 : Particle storage buffer (write)
 layout(std140, binding = 1) buffer ParticlesB
 {
-   Particle particlesB[ ];
+	Particle particlesB[];
 };
 
-layout (local_size_x = 16, local_size_y = 16) in;
+layout(local_size_x = 16, local_size_y = 16) in;
 
 // LOOK: rule weights and distances, as well as particle count, based off uniforms.
 // The deltaT here has to be updated every frame to account for changes in
 // frame rate.
-layout (binding = 2) uniform UBO
+layout(binding = 2) uniform UBO
 {
 	float deltaT;
 	float rule1Distance;
@@ -43,35 +43,70 @@ layout (binding = 2) uniform UBO
 
 void main()
 {
-		// LOOK: This is very similar to a CUDA kernel.
-		// Right now, the compute shader only advects the particles with their
-		// velocity and handles wrap-around.
-		// TODO: implement flocking behavior.
+	// LOOK: This is very similar to a CUDA kernel.
+	// Right now, the compute shader only advects the particles with their
+	// velocity and handles wrap-around.
+	// TODO: implement flocking behavior.
 
-    // Current SSBO index
-    uint index = gl_GlobalInvocationID.x;
+	// Current SSBO index
+	uint index = gl_GlobalInvocationID.x;
 	// Don't try to write beyond particle count
-    if (index >= ubo.particleCount)
+	if (index >= ubo.particleCount)
 		return;
 
-    // Read position and velocity
-		vec2 vPos = particlesA[index].pos.xy;
-    vec2 vVel = particlesA[index].vel.xy;
+	// Read position and velocity
+	vec2 vPos = particlesA[index].pos.xy;
+	vec2 vVel = particlesA[index].vel.xy;
 
-		// clamp velocity for a more pleasing simulation.
-		vVel = normalize(vVel) * clamp(length(vVel), 0.0, 0.1);
+	// Initialize boid values
+	vec2 center = vec2(0.f, 0.f);
+	vec2 separate = vec2(0.f, 0.f);
+	vec2 cohesion = vec2(0.f, 0.f);
+	float neighborCount = 0.f;
 
-		// kinematic update
-		vPos += vVel * ubo.deltaT;
+	// Boid flocking
+	for (int i = 0; i < ubo.particleCount; i++) {
+		if (index == i) continue;
+		vec2 uPos = particlesA[i].pos.xy;
+		vec2 uVel = particlesA[i].vel.xy;
 
-    // Wrap around boundary
-		if (vPos.x < -1.0) vPos.x = 1.0;
-		if (vPos.x > 1.0) vPos.x = -1.0;
-		if (vPos.y < -1.0) vPos.y = 1.0;
-		if (vPos.y > 1.0) vPos.y = -1.0;
+		float distance = length(uPos - vPos);
+		if (distance < ubo.rule1Distance) {
+			center += uPos;
+			neighborCount += 1.0;
+		}
 
-    particlesB[index].pos.xy = vPos;
+		if (distance < ubo.rule2Distance) {
+			separate -= (uPos - vPos);
+		}
 
-    // Write back
-    particlesB[index].vel.xy = vVel;
+		if (distance < ubo.rule3Distance) {
+			cohesion += uVel;
+		}
+
+	}
+
+	if (neighborCount > 0.01f) {
+		center /= neighborCount;
+		vVel += (center - vPos) * ubo.rule1Scale;
+		vVel += cohesion * ubo.rule3Scale;
+	}
+	vVel += separate * ubo.rule2Scale;
+
+	// clamp velocity for a more pleasing simulation.
+	vVel = normalize(vVel) * clamp(length(vVel), 0.0, 0.1);
+
+	// kinematic update
+	vPos += vVel * ubo.deltaT;
+
+	// Wrap around boundary
+	if (vPos.x < -1.0) vPos.x = 1.0;
+	if (vPos.x > 1.0) vPos.x = -1.0;
+	if (vPos.y < -1.0) vPos.y = 1.0;
+	if (vPos.y > 1.0) vPos.y = -1.0;
+
+	particlesB[index].pos.xy = vPos;
+
+	// Write back
+	particlesB[index].vel.xy = vVel;
 }
diff --git a/data/shaders/computeparticles/particle.comp.spv b/data/shaders/computeparticles/particle.comp.spv
diff --git a/img/Boids.gif b/img/Boids.gif
diff --git a/vulkanBoids/vulkanBoids.cpp b/vulkanBoids/vulkanBoids.cpp
@@ -27,13 +27,13 @@
 #include "vulkanexamplebase.h"
 
 #define VERTEX_BUFFER_BIND_ID 0
-#define ENABLE_VALIDATION true // LOOK: toggle Vulkan validation layers. These make debugging much easier!
+#define ENABLE_VALIDATION true  // LOOK: toggle Vulkan validation layers. These make debugging much easier!
 #define PARTICLE_COUNT 4 * 1024 // LOOK: change particle count here
 
 // LOOK: constants for the boids algorithm. These will be passed to the GPU compute part of the assignment
 // using a Uniform Buffer. These parameters should yield a stable and pleasing simulation for an
 // implementation based off the code here: http://studio.sketchpad.cc/sp/pad/view/ro.9cbgCRcgbPOI6/rev.23
-#define RULE1DISTANCE 0.1f // cohesion
+#define RULE1DISTANCE 0.1f  // cohesion
 #define RULE2DISTANCE 0.05f // separation
 #define RULE3DISTANCE 0.05f // alignment
 #define RULE1SCALE 0.02f
@@ -82,7 +82,7 @@ class VulkanExample : public VulkanExampleBase
 
 		VkDescriptorSetLayout descriptorSetLayout;	// Compute shader binding layout - how to interface with the pipeline
 		VkDescriptorSet descriptorSets[2];			// Compute shader bindings - encapsulate buffers for interfacing with the pipeline
-													// in acoordance with the descriptorSetLayout
+		// in acoordance with the descriptorSetLayout
 		VkPipelineLayout pipelineLayout;			// Layout of the compute pipeline
 		VkPipeline pipeline;						// Compute pipeline for updating particle positions
 
@@ -158,6 +158,7 @@ class VulkanExample : public VulkanExampleBase
 		{
 			particle.pos = glm::vec2(rDistribution(rGenerator), rDistribution(rGenerator));
 			// TODO: add randomized velocities with a slight scale here, something like 0.1f.
+			particle.vel = glm::vec2(rDistribution(rGenerator), rDistribution(rGenerator)) * 0.1f;
 		}
 
 		VkDeviceSize storageBufferSize = particleBuffer.size() * sizeof(Particle);
@@ -244,7 +245,7 @@ class VulkanExample : public VulkanExampleBase
 			VERTEX_BUFFER_BIND_ID,
 			1,
 			VK_FORMAT_R32G32_SFLOAT,
-			offsetof(Particle, pos)); // TODO: change this so that we can color the particles based on velocity.
+			offsetof(Particle, vel)); // TODO: change this so that we can color the particles based on velocity.
 
 		// vertices.inputState encapsulates everything we need for these particular buffers to
 		// interface with the graphics pipeline.
@@ -357,7 +358,7 @@ class VulkanExample : public VulkanExampleBase
 
 		// LOOK: set the pipeline up to interface with our buffers using the
 		// inputState from prepareStorageBuffers()
-		pipelineCreateInfo.pVertexInputState = &vertices.inputState; // indicate to pipeline how to use vertex buffer.
+		pipelineCreateInfo.pVertexInputState = &vertices.inputState;  // indicate to pipeline how to use vertex buffer.
 		pipelineCreateInfo.pInputAssemblyState = &inputAssemblyState; // speculation: is this b/c on some GPUs, vertex buffer input is still semi-fixed-function?
 		pipelineCreateInfo.pRasterizationState = &rasterizationState;
 		pipelineCreateInfo.pColorBlendState = &colorBlendState;
@@ -540,13 +541,33 @@ class VulkanExample : public VulkanExampleBase
 			compute.descriptorSets[0],
 			VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
 			2,
-			&compute.uniformBuffer.descriptor)
+			&compute.uniformBuffer.descriptor),
 
 			// TODO: write the second descriptorSet, using the top for reference.
 			// We want the descriptorSets to be used for flip-flopping:
 			// on one frame, we use one descriptorSet with the compute pass,
 			// on the next frame, we use the other.
 			// What has to be different about how the second descriptorSet is written here?
+			// Binding 0 : Particle position storage buffer
+			vkTools::initializers::writeDescriptorSet(
+			compute.descriptorSets[1], // LOOK: which descriptor set to write to?
+			VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+			0, // LOOK: which binding in the descriptor set Layout?
+			&compute.storageBufferB.descriptor), // LOOK: which SSBO?
+
+			// Binding 1 : Particle position storage buffer
+			vkTools::initializers::writeDescriptorSet(
+			compute.descriptorSets[1],
+			VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
+			1,
+			&compute.storageBufferA.descriptor),
+
+			// Binding 2 : Uniform buffer
+			vkTools::initializers::writeDescriptorSet(
+			compute.descriptorSets[1],
+			VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
+			2,
+			&compute.uniformBuffer.descriptor)
 		};
 
 		vkUpdateDescriptorSets(device, static_cast<uint32_t>(computeWriteDescriptorSets.size()), computeWriteDescriptorSets.data(), 0, NULL);
@@ -590,6 +611,7 @@ class VulkanExample : public VulkanExampleBase
 		// We also want to flip what SSBO we draw with in the next
 		// pass through the graphics pipeline.
 		// Feel free to use std::swap here. You should need it twice.
+		std::swap(compute.descriptorSets[0], compute.descriptorSets[1]);
 	}
 
 	// Record command buffers for drawing using the graphics pipeline
@@ -671,7 +693,7 @@ class VulkanExample : public VulkanExampleBase
 		bufferBarrier.size = compute.storageBufferA.descriptor.range;
 		bufferBarrier.srcAccessMask = VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT;						// Vertex shader invocations have finished reading from the buffer
 		bufferBarrier.dstAccessMask = VK_ACCESS_SHADER_WRITE_BIT;								// Compute shader wants to write to the buffer
-		
+
 		// Compute and graphics queue may have different queue families (see VulkanDevice::createLogicalDevice)
 		// For the barrier to work across different queues, we need to set their family indices
 		bufferBarrier.srcQueueFamilyIndex = vulkanDevice->queueFamilyIndices.graphics;			// Required as compute and graphics queue may have different families