Results 1 to 10 of 10

Thread: Converting SaschaWillems raytrace demo not to calc all(pixel by pixel)?

  1. #1

    Question Converting SaschaWillems raytrace demo not to calc all(pixel by pixel)?

    Hi, SaschaWillems raytrace demo calculates the image by going over the entire image, each ray being calculated by the pixel:
    Code :
    ivec2 dim = imageSize(resultImage);
    	vec2 uv = vec2(gl_GlobalInvocationID.xy) / dim;
     
    	vec3 rayO = ubo.camera.pos;
    	vec3 rayD = normalize(vec3((-1.0 + 2.0 * uv) * vec2(ubo.aspectRatio, 1.0), -1.0));

    I don't want to use the raytracer demo that way and want to rewrite the shader and not calculate the image by going over the entire image.
    How do I modify the c++ code for this not to happen?

    The shader main I want to go something like this:
    Code :
    do
    	{
    		vec3 finalColor = ...
    		imageStore(resultImage, ivec2(somePixel.xy), vec4(finalColor, 0.0));
    	}
    	until (stop);

    Here's the full c++ code:
    Code :
    /*
    * Vulkan Example - Compute shader ray tracing
    *
    * Copyright (C) 2016 by Sascha Willems - www.saschawillems.de
    *
    * This code is licensed under the MIT license (MIT) (http://opensource.org/licenses/MIT)
    */
     
    #include <stdio.h>
    #include <stdlib.h>
    #include <string.h>
    #include <assert.h>
    #include <vector>
     
    #define GLM_FORCE_RADIANS
    #define GLM_FORCE_DEPTH_ZERO_TO_ONE
    #include <glm/glm.hpp>
    #include <glm/gtc/matrix_transform.hpp>
     
    #include <vulkan/vulkan.h>
    #include "vulkanexamplebase.h"
    #include "VulkanTexture.hpp"
     
    #define VERTEX_BUFFER_BIND_ID 0
    #define ENABLE_VALIDATION false
     
    #if defined(__ANDROID__)
    #define TEX_DIM 1024
    #else
    #define TEX_DIM 2048
    #endif
     
    class VulkanExample : public VulkanExampleBase
    {
    public:
    	vks::Texture textureComputeTarget;
     
    	// Resources for the graphics part of the example
    	struct {
    		VkDescriptorSetLayout descriptorSetLayout;	// Raytraced image display shader binding layout
    		VkDescriptorSet descriptorSetPreCompute;	// Raytraced image display shader bindings before compute shader image manipulation
    		VkDescriptorSet descriptorSet;				// Raytraced image display shader bindings after compute shader image manipulation
    		VkPipeline pipeline;						// Raytraced image display pipeline
    		VkPipelineLayout pipelineLayout;			// Layout of the graphics pipeline
    	} graphics;
     
    	// Resources for the compute part of the example
    	struct {
    		struct {
    			vks::Buffer spheres;						// (Shader) storage buffer object with scene spheres
    			vks::Buffer planes;						// (Shader) storage buffer object with scene planes
    		} storageBuffers;
    		vks::Buffer uniformBuffer;					// Uniform buffer object containing scene data
    		VkQueue queue;								// Separate queue for compute commands (queue family may differ from the one used for graphics)
    		VkCommandPool commandPool;					// Use a separate command pool (queue family may differ from the one used for graphics)
    		VkCommandBuffer commandBuffer;				// Command buffer storing the dispatch commands and barriers
    		VkFence fence;								// Synchronization fence to avoid rewriting compute CB if still in use
    		VkDescriptorSetLayout descriptorSetLayout;	// Compute shader binding layout
    		VkDescriptorSet descriptorSet;				// Compute shader bindings
    		VkPipelineLayout pipelineLayout;			// Layout of the compute pipeline
    		VkPipeline pipeline;						// Compute raytracing pipeline
    		struct UBOCompute {							// Compute shader uniform block object
    			glm::vec3 lightPos;
    			float aspectRatio;						// Aspect ratio of the viewport
    			glm::vec4 fogColor = glm::vec4(0.0f);
    			struct {
    				glm::vec3 pos = glm::vec3(0.0f, 0.0f, 4.0f);
    				glm::vec3 lookat = glm::vec3(0.0f, 0.5f, 0.0f);
    				float fov = 10.0f;
    			} camera;
    		} ubo;
    	} compute;
     
    	// SSBO sphere declaration 
    	struct Sphere {									// Shader uses std140 layout (so we only use vec4 instead of vec3)
    		glm::vec3 pos;								
    		float radius;
    		glm::vec3 diffuse;
    		float specular;
    		uint32_t id;								// Id used to identify sphere for raytracing
    		glm::ivec3 _pad;
    	};
     
    	// SSBO plane declaration
    	struct Plane {
    		glm::vec3 normal;
    		float distance;
    		glm::vec3 diffuse;
    		float specular;
    		uint32_t id;
    		glm::ivec3 _pad;
    	};
     
    	VulkanExample() : VulkanExampleBase(ENABLE_VALIDATION)
    	{
    		title = "Vulkan Example - Compute shader ray tracing";
    		enableTextOverlay = true;
    		compute.ubo.aspectRatio = (float)width / (float)height;
    		timerSpeed *= 0.25f;
     
    		camera.type = Camera::CameraType::lookat;
    		camera.setPerspective(60.0f, (float)width / (float)height, 0.1f, 512.0f);
    		camera.setRotation(glm::vec3(0.0f, 0.0f, 0.0f));
    		camera.setTranslation(glm::vec3(0.0f, 0.0f, -4.0f));
    		camera.rotationSpeed = 0.0f;
    		camera.movementSpeed = 2.5f;
    	}
     
    	~VulkanExample()
    	{
    		// Graphics
    		vkDestroyPipeline(device, graphics.pipeline, nullptr);
    		vkDestroyPipelineLayout(device, graphics.pipelineLayout, nullptr);
    		vkDestroyDescriptorSetLayout(device, graphics.descriptorSetLayout, nullptr);
     
    		// Compute
    		vkDestroyPipeline(device, compute.pipeline, nullptr);
    		vkDestroyPipelineLayout(device, compute.pipelineLayout, nullptr);
    		vkDestroyDescriptorSetLayout(device, compute.descriptorSetLayout, nullptr);
    		vkDestroyFence(device, compute.fence, nullptr);
    		vkDestroyCommandPool(device, compute.commandPool, nullptr);
    		compute.uniformBuffer.destroy();
    		compute.storageBuffers.spheres.destroy();
    		compute.storageBuffers.planes.destroy();
     
    		textureComputeTarget.destroy();
    	}
     
    	// Prepare a texture target that is used to store compute shader calculations
    	void prepareTextureTarget(vks::Texture *tex, uint32_t width, uint32_t height, VkFormat format)
    	{
    		// Get device properties for the requested texture format
    		VkFormatProperties formatProperties;
    		vkGetPhysicalDeviceFormatProperties(physicalDevice, format, &formatProperties);
    		// Check if requested image format supports image storage operations
    		assert(formatProperties.optimalTilingFeatures & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT);
     
    		// Prepare blit target texture
    		tex->width = width;
    		tex->height = height;
     
    		VkImageCreateInfo imageCreateInfo = vks::initializers::imageCreateInfo();
    		imageCreateInfo.imageType = VK_IMAGE_TYPE_2D;
    		imageCreateInfo.format = format;
    		imageCreateInfo.extent = { width, height, 1 };
    		imageCreateInfo.mipLevels = 1;
    		imageCreateInfo.arrayLayers = 1;
    		imageCreateInfo.samples = VK_SAMPLE_COUNT_1_BIT;
    		imageCreateInfo.tiling = VK_IMAGE_TILING_OPTIMAL;
    		imageCreateInfo.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
    		// Image will be sampled in the fragment shader and used as storage target in the compute shader
    		imageCreateInfo.usage = VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
    		imageCreateInfo.flags = 0;
     
    		VkMemoryAllocateInfo memAllocInfo = vks::initializers::memoryAllocateInfo();
    		VkMemoryRequirements memReqs;
     
    		VK_CHECK_RESULT(vkCreateImage(device, &imageCreateInfo, nullptr, &tex->image));
    		vkGetImageMemoryRequirements(device, tex->image, &memReqs);
    		memAllocInfo.allocationSize = memReqs.size;
    		memAllocInfo.memoryTypeIndex = vulkanDevice->getMemoryType(memReqs.memoryTypeBits, VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT);
    		VK_CHECK_RESULT(vkAllocateMemory(device, &memAllocInfo, nullptr, &tex->deviceMemory));
    		VK_CHECK_RESULT(vkBindImageMemory(device, tex->image, tex->deviceMemory, 0));
     
    		VkCommandBuffer layoutCmd = VulkanExampleBase::createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true);
     
    		tex->imageLayout = VK_IMAGE_LAYOUT_GENERAL;
    		vks::tools::setImageLayout(
    			layoutCmd, 
    			tex->image,
    			VK_IMAGE_ASPECT_COLOR_BIT, 
    			VK_IMAGE_LAYOUT_UNDEFINED,
    			tex->imageLayout);
     
    		VulkanExampleBase::flushCommandBuffer(layoutCmd, queue, true);
     
    		// Create sampler
    		VkSamplerCreateInfo sampler = vks::initializers::samplerCreateInfo();
    		sampler.magFilter = VK_FILTER_LINEAR;
    		sampler.minFilter = VK_FILTER_LINEAR;
    		sampler.mipmapMode = VK_SAMPLER_MIPMAP_MODE_LINEAR;
    		sampler.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
    		sampler.addressModeV = sampler.addressModeU;
    		sampler.addressModeW = sampler.addressModeU;
    		sampler.mipLodBias = 0.0f;
    		sampler.maxAnisotropy = 1.0f;
    		sampler.compareOp = VK_COMPARE_OP_NEVER;
    		sampler.minLod = 0.0f;
    		sampler.maxLod = 0.0f;
    		sampler.borderColor = VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE;
    		VK_CHECK_RESULT(vkCreateSampler(device, &sampler, nullptr, &tex->sampler));
     
    		// Create image view
    		VkImageViewCreateInfo view = vks::initializers::imageViewCreateInfo();
    		view.viewType = VK_IMAGE_VIEW_TYPE_2D;
    		view.format = format;
    		view.components = { VK_COMPONENT_SWIZZLE_R, VK_COMPONENT_SWIZZLE_G, VK_COMPONENT_SWIZZLE_B, VK_COMPONENT_SWIZZLE_A };
    		view.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
    		view.image = tex->image;
    		VK_CHECK_RESULT(vkCreateImageView(device, &view, nullptr, &tex->view));
     
    		// Initialize a descriptor for later use
    		tex->descriptor.imageLayout = tex->imageLayout;
    		tex->descriptor.imageView = tex->view;
    		tex->descriptor.sampler = tex->sampler;
    		tex->device = vulkanDevice;
    	}
     
    	void buildCommandBuffers()
    	{
    		// Destroy command buffers if already present
    		if (!checkCommandBuffers())
    		{
    			destroyCommandBuffers();
    			createCommandBuffers();
    		}
     
    		VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo();
     
    		VkClearValue clearValues[2];
    		clearValues[0].color = defaultClearColor;
    		clearValues[0].color = { {0.0f, 0.0f, 0.2f, 0.0f} };
    		clearValues[1].depthStencil = { 1.0f, 0 };
     
    		VkRenderPassBeginInfo renderPassBeginInfo = vks::initializers::renderPassBeginInfo();
    		renderPassBeginInfo.renderPass = renderPass;
    		renderPassBeginInfo.renderArea.offset.x = 0;
    		renderPassBeginInfo.renderArea.offset.y = 0;
    		renderPassBeginInfo.renderArea.extent.width = width;
    		renderPassBeginInfo.renderArea.extent.height = height;
    		renderPassBeginInfo.clearValueCount = 2;
    		renderPassBeginInfo.pClearValues = clearValues;
     
    		for (int32_t i = 0; i < drawCmdBuffers.size(); ++i)
    		{
    			// Set target frame buffer
    			renderPassBeginInfo.framebuffer = frameBuffers[i];
     
    			VK_CHECK_RESULT(vkBeginCommandBuffer(drawCmdBuffers[i], &cmdBufInfo));
     
    			// Image memory barrier to make sure that compute shader writes are finished before sampling from the texture
    			VkImageMemoryBarrier imageMemoryBarrier = {};
    			imageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
    			imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL;
    			imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
    			imageMemoryBarrier.image = textureComputeTarget.image;
    			imageMemoryBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
    			imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
    			imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
    			vkCmdPipelineBarrier(
    				drawCmdBuffers[i],
    				VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
    				VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
    				VK_FLAGS_NONE,
    				0, nullptr,
    				0, nullptr,
    				1, &imageMemoryBarrier);
     
    			vkCmdBeginRenderPass(drawCmdBuffers[i], &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
     
    			VkViewport viewport = vks::initializers::viewport((float)width, (float)height, 0.0f, 1.0f);
    			vkCmdSetViewport(drawCmdBuffers[i], 0, 1, &viewport);
     
    			VkRect2D scissor = vks::initializers::rect2D(width, height, 0, 0);
    			vkCmdSetScissor(drawCmdBuffers[i], 0, 1, &scissor);
     
    			// Display ray traced image generated by compute shader as a full screen quad
    			// Quad vertices are generated in the vertex shader
    			vkCmdBindDescriptorSets(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, graphics.pipelineLayout, 0, 1, &graphics.descriptorSet, 0, NULL);
    			vkCmdBindPipeline(drawCmdBuffers[i], VK_PIPELINE_BIND_POINT_GRAPHICS, graphics.pipeline);
    			vkCmdDraw(drawCmdBuffers[i], 3, 1, 0, 0);
     
    			vkCmdEndRenderPass(drawCmdBuffers[i]);
     
    			VK_CHECK_RESULT(vkEndCommandBuffer(drawCmdBuffers[i]));
    		}
     
    	}
     
    	void buildComputeCommandBuffer()
    	{
    		VkCommandBufferBeginInfo cmdBufInfo = vks::initializers::commandBufferBeginInfo();
     
    		VK_CHECK_RESULT(vkBeginCommandBuffer(compute.commandBuffer, &cmdBufInfo));
     
    		vkCmdBindPipeline(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipeline);
    		vkCmdBindDescriptorSets(compute.commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, compute.pipelineLayout, 0, 1, &compute.descriptorSet, 0, 0);
     
    		vkCmdDispatch(compute.commandBuffer, textureComputeTarget.width / 16, textureComputeTarget.height / 16, 1);
     
    		vkEndCommandBuffer(compute.commandBuffer);
    	}
     
    	uint32_t currentId = 0;	// Id used to identify objects by the ray tracing shader
     
    	Sphere newSphere(glm::vec3 pos, float radius, glm::vec3 diffuse, float specular)
    	{
    		Sphere sphere;
    		sphere.id = currentId++;
    		sphere.pos = pos;
    		sphere.radius = radius;
    		sphere.diffuse = diffuse;
    		sphere.specular = specular;
    		return sphere;
    	}
     
    	Plane newPlane(glm::vec3 normal, float distance, glm::vec3 diffuse, float specular)
    	{
    		Plane plane;
    		plane.id = currentId++;
    		plane.normal = normal;
    		plane.distance = distance;
    		plane.diffuse = diffuse;
    		plane.specular = specular;
    		return plane;
    	}
     
    	// Setup and fill the compute shader storage buffers containing primitives for the raytraced scene
    	void prepareStorageBuffers()
    	{
    		// Spheres
    		std::vector<Sphere> spheres;
    		spheres.push_back(newSphere(glm::vec3(1.75f, -0.5f, 0.0f), 1.0f, glm::vec3(0.0f, 1.0f, 0.0f), 32.0f));
    		spheres.push_back(newSphere(glm::vec3(0.0f, 1.0f, -0.5f), 1.0f, glm::vec3(0.65f, 0.77f, 0.97f), 32.0f));
    		spheres.push_back(newSphere(glm::vec3(-1.75f, -0.75f, -0.5f), 1.25f, glm::vec3(0.9f, 0.76f, 0.46f), 32.0f));
    		VkDeviceSize storageBufferSize = spheres.size() * sizeof(Sphere);
     
    		// Stage
    		vks::Buffer stagingBuffer;
     
    		vulkanDevice->createBuffer(
    			VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
    			VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
    			&stagingBuffer,
    			storageBufferSize,
    			spheres.data());
     
    		vulkanDevice->createBuffer(
    			// The SSBO will be used as a storage buffer for the compute pipeline and as a vertex buffer in the graphics pipeline
    			VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
    			VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
    			&compute.storageBuffers.spheres,
    			storageBufferSize);
     
    		// Copy to staging buffer
    		VkCommandBuffer copyCmd = VulkanExampleBase::createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true);
    		VkBufferCopy copyRegion = {};
    		copyRegion.size = storageBufferSize;
    		vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, compute.storageBuffers.spheres.buffer, 1, &copyRegion);
    		VulkanExampleBase::flushCommandBuffer(copyCmd, queue, true);
     
    		stagingBuffer.destroy();
     
    		// Planes
    		std::vector<Plane> planes;
    		const float roomDim = 4.0f;
    		planes.push_back(newPlane(glm::vec3(0.0f, 1.0f, 0.0f), roomDim, glm::vec3(1.0f), 32.0f));
    		planes.push_back(newPlane(glm::vec3(0.0f, -1.0f, 0.0f), roomDim, glm::vec3(1.0f), 32.0f));
    		planes.push_back(newPlane(glm::vec3(0.0f, 0.0f, 1.0f), roomDim, glm::vec3(1.0f), 32.0f));
    		planes.push_back(newPlane(glm::vec3(0.0f, 0.0f, -1.0f), roomDim, glm::vec3(0.0f), 32.0f));
    		planes.push_back(newPlane(glm::vec3(-1.0f, 0.0f, 0.0f), roomDim, glm::vec3(1.0f, 0.0f, 0.0f), 32.0f));
    		planes.push_back(newPlane(glm::vec3(1.0f, 0.0f, 0.0f), roomDim, glm::vec3(0.0f, 1.0f, 0.0f), 32.0f));
    		storageBufferSize = planes.size() * sizeof(Plane);
     
    		// Stage
    		vulkanDevice->createBuffer(
    			VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
    			VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
    			&stagingBuffer,
    			storageBufferSize,
    			planes.data());
     
    		vulkanDevice->createBuffer(
    			// The SSBO will be used as a storage buffer for the compute pipeline and as a vertex buffer in the graphics pipeline
    			VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_DST_BIT,
    			VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
    			&compute.storageBuffers.planes,
    			storageBufferSize);
     
    		// Copy to staging buffer
    		copyCmd = VulkanExampleBase::createCommandBuffer(VK_COMMAND_BUFFER_LEVEL_PRIMARY, true);
    		copyRegion.size = storageBufferSize;
    		vkCmdCopyBuffer(copyCmd, stagingBuffer.buffer, compute.storageBuffers.planes.buffer, 1, &copyRegion);
    		VulkanExampleBase::flushCommandBuffer(copyCmd, queue, true);
     
    		stagingBuffer.destroy();
    	}
     
    	void setupDescriptorPool()
    	{
    		std::vector<VkDescriptorPoolSize> poolSizes =
    		{
    			vks::initializers::descriptorPoolSize(VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER, 2),			// Compute UBO
    			vks::initializers::descriptorPoolSize(VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER, 4),	// Graphics image samplers
    			vks::initializers::descriptorPoolSize(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1),				// Storage image for ray traced image output
    			vks::initializers::descriptorPoolSize(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2),			// Storage buffer for the scene primitives
    		};
     
    		VkDescriptorPoolCreateInfo descriptorPoolInfo =
    			vks::initializers::descriptorPoolCreateInfo(
    				poolSizes.size(),
    				poolSizes.data(),
    				3);
     
    		VK_CHECK_RESULT(vkCreateDescriptorPool(device, &descriptorPoolInfo, nullptr, &descriptorPool));
    	}
     
    	void setupDescriptorSetLayout()
    	{
    		std::vector<VkDescriptorSetLayoutBinding> setLayoutBindings =
    		{
    			// Binding 0 : Fragment shader image sampler
    			vks::initializers::descriptorSetLayoutBinding(
    				VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
    				VK_SHADER_STAGE_FRAGMENT_BIT,
    				0)
    		};
     
    		VkDescriptorSetLayoutCreateInfo descriptorLayout =
    			vks::initializers::descriptorSetLayoutCreateInfo(
    				setLayoutBindings.data(),
    				setLayoutBindings.size());
     
    		VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorLayout, nullptr, &graphics.descriptorSetLayout));
     
    		VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo =
    			vks::initializers::pipelineLayoutCreateInfo(
    				&graphics.descriptorSetLayout,
    				1);
     
    		VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pPipelineLayoutCreateInfo, nullptr, &graphics.pipelineLayout));
    	}
     
    	void setupDescriptorSet()
    	{
    		VkDescriptorSetAllocateInfo allocInfo =
    			vks::initializers::descriptorSetAllocateInfo(
    				descriptorPool,
    				&graphics.descriptorSetLayout,
    				1);
     
    		VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &graphics.descriptorSet));
     
    		std::vector<VkWriteDescriptorSet> writeDescriptorSets =
    		{
    			// Binding 0 : Fragment shader texture sampler
    			vks::initializers::writeDescriptorSet(
    				graphics.descriptorSet,
    				VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
    				0,
    				&textureComputeTarget.descriptor)
    		};
     
    		vkUpdateDescriptorSets(device, writeDescriptorSets.size(), writeDescriptorSets.data(), 0, NULL);
    	}
     
    	void preparePipelines()
    	{
    		VkPipelineInputAssemblyStateCreateInfo inputAssemblyState =
    			vks::initializers::pipelineInputAssemblyStateCreateInfo(
    				VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
    				0,
    				VK_FALSE);
     
    		VkPipelineRasterizationStateCreateInfo rasterizationState =
    			vks::initializers::pipelineRasterizationStateCreateInfo(
    				VK_POLYGON_MODE_FILL,
    				VK_CULL_MODE_FRONT_BIT,
    				VK_FRONT_FACE_COUNTER_CLOCKWISE,
    				0);
     
    		VkPipelineColorBlendAttachmentState blendAttachmentState =
    			vks::initializers::pipelineColorBlendAttachmentState(
    				0xf,
    				VK_FALSE);
     
    		VkPipelineColorBlendStateCreateInfo colorBlendState =
    			vks::initializers::pipelineColorBlendStateCreateInfo(
    				1,
    				&blendAttachmentState);
     
    		VkPipelineDepthStencilStateCreateInfo depthStencilState =
    			vks::initializers::pipelineDepthStencilStateCreateInfo(
    				VK_FALSE,
    				VK_FALSE,
    				VK_COMPARE_OP_LESS_OR_EQUAL);
     
    		VkPipelineViewportStateCreateInfo viewportState =
    			vks::initializers::pipelineViewportStateCreateInfo(1, 1, 0);
     
    		VkPipelineMultisampleStateCreateInfo multisampleState =
    			vks::initializers::pipelineMultisampleStateCreateInfo(
    				VK_SAMPLE_COUNT_1_BIT,
    				0);
     
    		std::vector<VkDynamicState> dynamicStateEnables = {
    			VK_DYNAMIC_STATE_VIEWPORT,
    			VK_DYNAMIC_STATE_SCISSOR
    		};
    		VkPipelineDynamicStateCreateInfo dynamicState =
    			vks::initializers::pipelineDynamicStateCreateInfo(
    				dynamicStateEnables.data(),
    				dynamicStateEnables.size(),
    				0);
     
    		// Display pipeline
    		std::array<VkPipelineShaderStageCreateInfo,2> shaderStages;
     
    		shaderStages[0] = loadShader(getAssetPath() + "shaders/raytracing/texture.vert.spv", VK_SHADER_STAGE_VERTEX_BIT);
    		shaderStages[1] = loadShader(getAssetPath() + "shaders/raytracing/texture.frag.spv", VK_SHADER_STAGE_FRAGMENT_BIT);
     
    		VkGraphicsPipelineCreateInfo pipelineCreateInfo =
    			vks::initializers::pipelineCreateInfo(
    				graphics.pipelineLayout,
    				renderPass,
    				0);
     
    		VkPipelineVertexInputStateCreateInfo emptyInputState{};
    		emptyInputState.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
    		emptyInputState.vertexAttributeDescriptionCount = 0;
    		emptyInputState.pVertexAttributeDescriptions = nullptr;
    		emptyInputState.vertexBindingDescriptionCount = 0;
    		emptyInputState.pVertexBindingDescriptions = nullptr;
    		pipelineCreateInfo.pVertexInputState = &emptyInputState;
     
    		pipelineCreateInfo.pInputAssemblyState = &inputAssemblyState;
    		pipelineCreateInfo.pRasterizationState = &rasterizationState;
    		pipelineCreateInfo.pColorBlendState = &colorBlendState;
    		pipelineCreateInfo.pMultisampleState = &multisampleState;
    		pipelineCreateInfo.pViewportState = &viewportState;
    		pipelineCreateInfo.pDepthStencilState = &depthStencilState;
    		pipelineCreateInfo.pDynamicState = &dynamicState;
    		pipelineCreateInfo.stageCount = shaderStages.size();
    		pipelineCreateInfo.pStages = shaderStages.data();
    		pipelineCreateInfo.renderPass = renderPass;
     
    		VK_CHECK_RESULT(vkCreateGraphicsPipelines(device, pipelineCache, 1, &pipelineCreateInfo, nullptr, &graphics.pipeline));
    	}
     
    	// Prepare the compute pipeline that generates the ray traced image
    	void prepareCompute()
    	{
    		// Create a compute capable device queue
    		// The VulkanDevice::createLogicalDevice functions finds a compute capable queue and prefers queue families that only support compute
    		// Depending on the implementation this may result in different queue family indices for graphics and computes,
    		// requiring proper synchronization (see the memory barriers in buildComputeCommandBuffer)
    		VkDeviceQueueCreateInfo queueCreateInfo = {};
    		queueCreateInfo.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
    		queueCreateInfo.pNext = NULL;
    		queueCreateInfo.queueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
    		queueCreateInfo.queueCount = 1;
    		vkGetDeviceQueue(device, vulkanDevice->queueFamilyIndices.compute, 0, &compute.queue);
     
    		std::vector<VkDescriptorSetLayoutBinding> setLayoutBindings = {
    			// Binding 0: Storage image (raytraced output)
    			vks::initializers::descriptorSetLayoutBinding(
    				VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
    				VK_SHADER_STAGE_COMPUTE_BIT,
    				0),
    			// Binding 1: Uniform buffer block
    			vks::initializers::descriptorSetLayoutBinding(
    				VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
    				VK_SHADER_STAGE_COMPUTE_BIT,
    				1),
    			// Binding 1: Shader storage buffer for the spheres
    			vks::initializers::descriptorSetLayoutBinding(
    				VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
    				VK_SHADER_STAGE_COMPUTE_BIT,
    				2),
    			// Binding 1: Shader storage buffer for the planes
    			vks::initializers::descriptorSetLayoutBinding(
    				VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
    				VK_SHADER_STAGE_COMPUTE_BIT,
    				3)
    		};
     
    		VkDescriptorSetLayoutCreateInfo descriptorLayout =
    			vks::initializers::descriptorSetLayoutCreateInfo(
    				setLayoutBindings.data(),
    				setLayoutBindings.size());
     
    		VK_CHECK_RESULT(vkCreateDescriptorSetLayout(device, &descriptorLayout, nullptr,	&compute.descriptorSetLayout));
     
    		VkPipelineLayoutCreateInfo pPipelineLayoutCreateInfo =
    			vks::initializers::pipelineLayoutCreateInfo(
    				&compute.descriptorSetLayout,
    				1);
     
    		VK_CHECK_RESULT(vkCreatePipelineLayout(device, &pPipelineLayoutCreateInfo, nullptr, &compute.pipelineLayout));
     
    		VkDescriptorSetAllocateInfo allocInfo =
    			vks::initializers::descriptorSetAllocateInfo(
    				descriptorPool,
    				&compute.descriptorSetLayout,
    				1);
     
    		VK_CHECK_RESULT(vkAllocateDescriptorSets(device, &allocInfo, &compute.descriptorSet));
     
    		std::vector<VkWriteDescriptorSet> computeWriteDescriptorSets =
    		{
    			// Binding 0: Output storage image
    			vks::initializers::writeDescriptorSet(
    				compute.descriptorSet,
    				VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
    				0,
    				&textureComputeTarget.descriptor),
    			// Binding 1: Uniform buffer block
    			vks::initializers::writeDescriptorSet(
    				compute.descriptorSet,
    				VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER,
    				1,
    				&compute.uniformBuffer.descriptor),
    			// Binding 2: Shader storage buffer for the spheres
    			vks::initializers::writeDescriptorSet(
    				compute.descriptorSet,
    				VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
    				2,
    				&compute.storageBuffers.spheres.descriptor),
    			// Binding 2: Shader storage buffer for the planes
    			vks::initializers::writeDescriptorSet(
    				compute.descriptorSet,
    				VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,
    				3,
    				&compute.storageBuffers.planes.descriptor)
    		};
     
    		vkUpdateDescriptorSets(device, computeWriteDescriptorSets.size(), computeWriteDescriptorSets.data(), 0, NULL);
     
    		// Create compute shader pipelines
    		VkComputePipelineCreateInfo computePipelineCreateInfo =
    			vks::initializers::computePipelineCreateInfo(
    				compute.pipelineLayout,
    				0);
     
    		computePipelineCreateInfo.stage = loadShader(getAssetPath() + "shaders/raytracing/raytracing.comp.spv", VK_SHADER_STAGE_COMPUTE_BIT);
    		VK_CHECK_RESULT(vkCreateComputePipelines(device, pipelineCache, 1, &computePipelineCreateInfo, nullptr, &compute.pipeline));
     
    		// Separate command pool as queue family for compute may be different than graphics
    		VkCommandPoolCreateInfo cmdPoolInfo = {};
    		cmdPoolInfo.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
    		cmdPoolInfo.queueFamilyIndex = vulkanDevice->queueFamilyIndices.compute;
    		cmdPoolInfo.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
    		VK_CHECK_RESULT(vkCreateCommandPool(device, &cmdPoolInfo, nullptr, &compute.commandPool));
     
    		// Create a command buffer for compute operations
    		VkCommandBufferAllocateInfo cmdBufAllocateInfo =
    			vks::initializers::commandBufferAllocateInfo(
    				compute.commandPool,
    				VK_COMMAND_BUFFER_LEVEL_PRIMARY,
    				1);
     
    		VK_CHECK_RESULT(vkAllocateCommandBuffers(device, &cmdBufAllocateInfo, &compute.commandBuffer));
     
    		// Fence for compute CB sync
    		VkFenceCreateInfo fenceCreateInfo = vks::initializers::fenceCreateInfo(VK_FENCE_CREATE_SIGNALED_BIT);
    		VK_CHECK_RESULT(vkCreateFence(device, &fenceCreateInfo, nullptr, &compute.fence));
     
    		// Build a single command buffer containing the compute dispatch commands
    		buildComputeCommandBuffer();
    	}
     
    	// Prepare and initialize uniform buffer containing shader uniforms
    	void prepareUniformBuffers()
    	{
    		// Compute shader parameter uniform buffer block
    		vulkanDevice->createBuffer(
    			VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT,
    			VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
    			&compute.uniformBuffer,
    			sizeof(compute.ubo));
     
    		updateUniformBuffers();
    	}
     
    	void updateUniformBuffers()
    	{
    		compute.ubo.lightPos.x = 0.0f + sin(glm::radians(timer * 360.0f)) * cos(glm::radians(timer * 360.0f)) * 2.0f;
    		compute.ubo.lightPos.y = 0.0f + sin(glm::radians(timer * 360.0f)) * 2.0f;
    		compute.ubo.lightPos.z = 0.0f + cos(glm::radians(timer * 360.0f)) * 2.0f;
    		compute.ubo.camera.pos = camera.position * -1.0f;
    		VK_CHECK_RESULT(compute.uniformBuffer.map());
    		memcpy(compute.uniformBuffer.mapped, &compute.ubo, sizeof(compute.ubo));
    		compute.uniformBuffer.unmap();
    	}
     
    	void draw()
    	{
    		VulkanExampleBase::prepareFrame();
     
    		// Command buffer to be sumitted to the queue
    		submitInfo.commandBufferCount = 1;
    		submitInfo.pCommandBuffers = &drawCmdBuffers[currentBuffer];
    		VK_CHECK_RESULT(vkQueueSubmit(queue, 1, &submitInfo, VK_NULL_HANDLE));
     
    		VulkanExampleBase::submitFrame();
     
    		// Submit compute commands
    		// Use a fence to ensure that compute command buffer has finished executing before using it again
    		vkWaitForFences(device, 1, &compute.fence, VK_TRUE, UINT64_MAX);
    		vkResetFences(device, 1, &compute.fence);
     
    		VkSubmitInfo computeSubmitInfo = vks::initializers::submitInfo();
    		computeSubmitInfo.commandBufferCount = 1;
    		computeSubmitInfo.pCommandBuffers = &compute.commandBuffer;
     
    		VK_CHECK_RESULT(vkQueueSubmit(compute.queue, 1, &computeSubmitInfo, compute.fence));
    	}
     
    	void prepare()
    	{
    		VulkanExampleBase::prepare();
    		prepareStorageBuffers();
    		prepareUniformBuffers();
    		prepareTextureTarget(&textureComputeTarget, TEX_DIM, TEX_DIM, VK_FORMAT_R8G8B8A8_UNORM);
    		setupDescriptorSetLayout();
    		preparePipelines();
    		setupDescriptorPool();
    		setupDescriptorSet();
    		prepareCompute();
    		buildCommandBuffers(); 
    		prepared = true;
    	}
     
    	virtual void render()
    	{
    		if (!prepared)
    			return;
    		draw();
    		if (!paused)
    		{
    			updateUniformBuffers();
    		}
    	}
     
    	virtual void viewChanged()
    	{
    		compute.ubo.aspectRatio = (float)width / (float)height;
    		updateUniformBuffers();
    	}
    };
     
    VULKAN_EXAMPLE_MAIN()

    And the shader:
    Code :
    // Shader is looseley based on the ray tracing coding session by Inigo Quilez (www.iquilezles.org)
     
    #version 450
     
    #extension GL_ARB_separate_shader_objects : enable
    #extension GL_ARB_shading_language_420pack : enable
     
    layout (local_size_x = 16, local_size_y = 16) in;
    layout (binding = 0, rgba8) uniform writeonly image2D resultImage;
     
    #define EPSILON 0.0001
    #define MAXLEN 1000.0
    #define SHADOW 0.5
    #define RAYBOUNCES 2
    #define REFLECTIONS true
    #define REFLECTIONSTRENGTH 0.4
    #define REFLECTIONFALLOFF 0.5
     
    struct Camera 
    {
    	vec3 pos;   
    	vec3 lookat;
    	float fov; 
    };
     
    layout (binding = 1) uniform UBO 
    {
    	vec3 lightPos;
    	float aspectRatio;
    	vec4 fogColor;
    	Camera camera;
    	mat4 rotMat;
    } ubo;
     
    struct Sphere 
    {
    	vec3 pos;
    	float radius;
    	vec3 diffuse;
    	float specular;
    	int id;
    };
     
    struct Plane
    {
    	vec3 normal;
    	float distance;
    	vec3 diffuse;
    	float specular;
    	int id;
    };
     
    layout (std140, binding = 2) buffer Spheres
    {
    	Sphere spheres[ ];
    };
     
    layout (std140, binding = 3) buffer Planes
    {
    	Plane planes[ ];
    };
     
    void reflectRay(inout vec3 rayD, in vec3 mormal)
    {
    	rayD = rayD + 2.0 * -dot(mormal, rayD) * mormal;
    }
     
    // Lighting =========================================================
     
    float lightDiffuse(vec3 normal, vec3 lightDir) 
    {
    	return clamp(dot(normal, lightDir), 0.1, 1.0);
    }
     
    float lightSpecular(vec3 normal, vec3 lightDir, float specularFactor)
    {
    	vec3 viewVec = normalize(ubo.camera.pos);
    	vec3 halfVec = normalize(lightDir + viewVec);
    	return pow(clamp(dot(normal, halfVec), 0.0, 1.0), specularFactor);
    }
     
    // Sphere ===========================================================
     
    float sphereIntersect(in vec3 rayO, in vec3 rayD, in Sphere sphere)
    {
    	vec3 oc = rayO - sphere.pos;
    	float b = 2.0 * dot(oc, rayD);
    	float c = dot(oc, oc) - sphere.radius*sphere.radius;
    	float h = b*b - 4.0*c;
    	if (h < 0.0) 
    	{
    		return -1.0;
    	}
    	float t = (-b - sqrt(h)) / 2.0;
     
    	return t;
    }
     
    vec3 sphereNormal(in vec3 pos, in Sphere sphere)
    {
    	return (pos - sphere.pos) / sphere.radius;
    }
     
    // Plane ===========================================================
     
    float planeIntersect(vec3 rayO, vec3 rayD, Plane plane)
    {
    	float d = dot(rayD, plane.normal);
     
    	if (d == 0.0)
    		return 0.0;
     
    	float t = -(plane.distance + dot(rayO, plane.normal)) / d;
     
    	if (t < 0.0)
    		return 0.0;
     
    	return t;
    }
     
     
    int intersect(in vec3 rayO, in vec3 rayD, inout float resT)
    {
    	int id = -1;
     
    	for (int i = 0; i < spheres.length(); i++)
    	{
    		float tSphere = sphereIntersect(rayO, rayD, spheres[i]);
    		if ((tSphere > EPSILON) && (tSphere < resT))
    		{
    			id = spheres[i].id;
    			resT = tSphere;
    		}
    	}	
     
    	for (int i = 0; i < planes.length(); i++)
    	{
    		float tplane = planeIntersect(rayO, rayD, planes[i]);
    		if ((tplane > EPSILON) && (tplane < resT))
    		{
    			id = planes[i].id;
    			resT = tplane;
    		}	
    	}
     
    	return id;
    }
     
    float calcShadow(in vec3 rayO, in vec3 rayD, in int objectId, inout float t)
    {
    	for (int i = 0; i < spheres.length(); i++)
    	{
    		if (spheres[i].id == objectId)
    			continue;
    		float tSphere = sphereIntersect(rayO, rayD, spheres[i]);
    		if ((tSphere > EPSILON) && (tSphere < t))
    		{
    			t = tSphere;
    			return SHADOW;
    		}
    	}		
    	return 1.0;
    }
     
    vec3 fog(in float t, in vec3 color)
    {
    	return mix(color, ubo.fogColor.rgb, clamp(sqrt(t*t)/20.0, 0.0, 1.0));
    }
     
    vec3 renderScene(inout vec3 rayO, inout vec3 rayD, inout int id)
    {
    	vec3 color = vec3(0.0);
    	float t = MAXLEN;
     
    	// Get intersected object ID
    	int objectID = intersect(rayO, rayD, t);
     
    	if (objectID == -1)
    	{
    		return color;
    	}
     
    	vec3 pos = rayO + t * rayD;
    	vec3 lightVec = normalize(ubo.lightPos - pos);				
    	vec3 normal;
     
    	// Planes
     
    	// Spheres
     
    	for (int i = 0; i < planes.length(); i++)
    	{
    		if (objectID == planes[i].id)
    		{
    			normal = planes[i].normal;
    			float diffuse = lightDiffuse(normal, lightVec);
    			float specular = lightSpecular(normal, lightVec, planes[i].specular);
    			color = diffuse * planes[i].diffuse + specular;	
    		}
    	}
     
    	for (int i = 0; i < spheres.length(); i++)
    	{
    		if (objectID == spheres[i].id)
    		{
    			normal = sphereNormal(pos, spheres[i]);	
    			float diffuse = lightDiffuse(normal, lightVec);
    			float specular = lightSpecular(normal, lightVec, spheres[i].specular);
    			color = diffuse * spheres[i].diffuse + specular;	
    		}
    	}
     
    	if (id == -1)
    		return color;
     
    	id = objectID;
     
    	// Shadows
    	t = length(ubo.lightPos - pos);
    	color *= calcShadow(pos, lightVec, id, t);
     
    	// Fog
    	color = fog(t, color);	
     
    	// Reflect ray for next render pass
    	reflectRay(rayD, normal);
    	rayO = pos;	
     
    	return color;
    }
     
    void main()
    {
    	ivec2 dim = imageSize(resultImage);
    	vec2 uv = vec2(gl_GlobalInvocationID.xy) / dim;
     
    	vec3 rayO = ubo.camera.pos;
    	vec3 rayD = normalize(vec3((-1.0 + 2.0 * uv) * vec2(ubo.aspectRatio, 1.0), -1.0));
     
    	// Basic color path
    	int id = 0;
    	vec3 finalColor = renderScene(rayO, rayD, id);
     
    	// Reflection
    	if (REFLECTIONS)
    	{
    		float reflectionStrength = REFLECTIONSTRENGTH;
    		for (int i = 0; i < RAYBOUNCES; i++)
    		{
    			vec3 reflectionColor = renderScene(rayO, rayD, id);
    			finalColor = (1.0 - reflectionStrength) * finalColor + reflectionStrength * mix(reflectionColor, finalColor, 1.0 - reflectionStrength);			
    			reflectionStrength *= REFLECTIONFALLOFF;
    		}
    	}
     
    	imageStore(resultImage, ivec2(gl_GlobalInvocationID.xy), vec4(finalColor, 0.0));
    }

    Pointing me in the right direction, Thanks.
    Paul.
    Last edited by paul.g.griffiths; 10-10-2017 at 07:16 AM.

  2. #2
    All I want is a basic draw the image oldskool way anyway I want(not entire image pixel by pixel) and then display it.
    Last edited by paul.g.griffiths; 10-10-2017 at 08:06 AM.

  3. #3
    If I replace:
    vkCmdDispatch(compute.commandBuffer, textureComputeTarget.width / 16, textureComputeTarget.height / 16, 1);
    with:
    vkCmdDispatch(compute.commandBuffer, 1,1, 1);
    Then it only draws little square to bottom left corner, so I'm guessing the compute shader is only being invoked once?
    Is this the correct way? Have to yet to modify the shader because yet to learn to compile it.

  4. #4
    I'm only getting 5fps coloring in entire image with this shader code:
    Code :
    // Shader is looseley based on the ray tracing coding session by Inigo Quilez (www.iquilezles.org)
     
    #version 450
     
    #extension GL_ARB_separate_shader_objects : enable
    #extension GL_ARB_shading_language_420pack : enable
     
    layout (local_size_x = 1, local_size_y = 1) in;
    layout (binding = 0, rgba8) uniform writeonly image2D resultImage;
     
    #define EPSILON 0.0001
    #define MAXLEN 1000.0
    #define SHADOW 0.5
    #define RAYBOUNCES 2
    #define REFLECTIONS true
    #define REFLECTIONSTRENGTH 0.4
    #define REFLECTIONFALLOFF 0.5
     
    struct Camera 
    {
    	vec3 pos;   
    	vec3 lookat;
    	float fov; 
    };
     
    layout (binding = 1) uniform UBO 
    {
    	vec3 lightPos;
    	float aspectRatio;
    	vec4 fogColor;
    	Camera camera;
    	mat4 rotMat;
    } ubo;
     
    struct Sphere 
    {
    	vec3 pos;
    	float radius;
    	vec3 diffuse;
    	float specular;
    	int id;
    };
     
    struct Plane
    {
    	vec3 normal;
    	float distance;
    	vec3 diffuse;
    	float specular;
    	int id;
    };
     
    layout (std140, binding = 2) buffer Spheres
    {
    	Sphere spheres[ ];
    };
     
    layout (std140, binding = 3) buffer Planes
    {
    	Plane planes[ ];
    };
     
    void main()
    {
    	for (int x = 0; x < imageSize(resultImage).x; x++)
    	{
    		for (int y = 0; y < imageSize(resultImage).y; y++)
    			imageStore(resultImage, ivec2(x,y), vec4((1.0/imageSize(resultImage).x) * x,(1.0/imageSize(resultImage).y) * y, 0.0,0.0));
    	}
    }

    I have set the localsize = 1;

    And in c++:as in above post:
    Code :
    vkCmdDispatch(compute.commandBuffer,  1,1,1);

    This is terrible, any ideas?

  5. #5
    Senior Member
    Join Date
    Mar 2016
    Posts
    227
    Quote Originally Posted by paul.g.griffiths View Post
    All I want is a basic draw the image oldskool way anyway I want(not entire image pixel by pixel) and then display it.
    What way is the "oldskool" way?

    I'm only getting 5fps coloring in entire image with this shader code
    That tends to happen if you have nested fors. What are you trying to achieve?
    For that case it would be faster to do:
    Code :
    vkCmdDispatch( compute.commandBuffer, resultImage.width, resultImage.height, 1 );

    and

    Code :
    void main(){
    	const vec4 whateverColor = ...;
     
    	imageStore( resultImage, ivec2(gl_WorkGroupID.xy), whateverColor );
     
    }

  6. #6
    Quote Originally Posted by krOoze View Post
    What way is the "oldskool" way?



    That tends to happen if you have nested fors. What are you trying to achieve?
    For that case it would be faster to do:
    Code :
    vkCmdDispatch( compute.commandBuffer, resultImage.width, resultImage.height, 1 );

    and

    Code :
    void main(){
    	const vec4 whateverColor = ...;
     
    	imageStore( resultImage, ivec2(gl_WorkGroupID.xy), whateverColor );
     
    }
    The "oldskool" way is like win32 setPixel() which can be called as many times as you like to create an image.
    I want to use imageStore multiple times within the same compute shader invocation.
    I don't want vulkan to automatically go over all the pixels in the image.
    I want to write a raycaster another way.
    The nested for's is just temporary to see how fast it can set pixels in the image.
    Thanks.
    Last edited by paul.g.griffiths; 10-10-2017 at 01:17 PM.

  7. #7
    Senior Member
    Join Date
    Mar 2016
    Posts
    227
    Quote Originally Posted by paul.g.griffiths View Post
    The "oldskool" way is like win32 setPixel() which can be called as many times as you like to create an image.
    That happens to be pretty much how the imageStore function works.

    Quote Originally Posted by paul.g.griffiths View Post
    I want to use imageStore multiple times within the same compute shader invocation.
    Well, you can.

    Quote Originally Posted by paul.g.griffiths View Post
    I don't want vulkan to automatically go over all the pixels in the image.
    It does not using compute shader. All you are doing is saying how many threads you want. And you do want some >1 amount, otherwise the performance will be bad.

    Quote Originally Posted by paul.g.griffiths View Post
    The nested for's is just temporary to see how fast it can set pixels in the image.
    Done serially as you did, you found out the result. It would be a few times slower than even on CPU (which is built for serial work).

  8. #8
    Quote Originally Posted by paul.g.griffiths View Post
    The "oldskool" way is like win32 setPixel() which can be called as many times as you like to create an image.
    I want to use imageStore multiple times within the same compute shader invocation.
    I don't want vulkan to automatically go over all the pixels in the image.
    Well, do you want to do it the "oldskool way" or do you want performance? The reason we don't do it that way is because it's slow.

    A single invocation writing each pixel is not going to achieve performance. The ability to execute a thread for each pixel is why GPUs are fast. Throw that away, and you'd be better off using the CPU.

  9. #9
    Quote Originally Posted by Alfonse Reinheart View Post
    Well, do you want to do it the "oldskool way" or do you want performance? The reason we don't do it that way is because it's slow.

    A single invocation writing each pixel is not going to achieve performance. The ability to execute a thread for each pixel is why GPUs are fast. Throw that away, and you'd be better off using the CPU.
    Single invocation? can read a pixel fast as you well know, should be able to write just as quickly.
    My card only has 2 threads?

    The problem I'm guessing is the code for the image memory barrier: It continues once the image has changed by a single pixel.
    I need it to continue once finished the compute shader.
    Any ideas?
    Code :
    // Image memory barrier to make sure that compute shader writes are finished before sampling from the texture
    			VkImageMemoryBarrier imageMemoryBarrier = {};
    			imageMemoryBarrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
    			imageMemoryBarrier.oldLayout = VK_IMAGE_LAYOUT_GENERAL;
    			imageMemoryBarrier.newLayout = VK_IMAGE_LAYOUT_GENERAL;
    			imageMemoryBarrier.image = textureComputeTarget.image;
    			imageMemoryBarrier.subresourceRange = { VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1 };
    			imageMemoryBarrier.srcAccessMask = VK_ACCESS_SHADER_WRITE_BIT;
    			imageMemoryBarrier.dstAccessMask = VK_ACCESS_SHADER_READ_BIT;
    			vkCmdPipelineBarrier(
    				drawCmdBuffers[i],
    				VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
    				VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT,
    				VK_FLAGS_NONE,
    				0, nullptr,
    				0, nullptr,
    				1, &imageMemoryBarrier);
    Last edited by paul.g.griffiths; 10-10-2017 at 09:39 PM.

  10. #10
    I said 2 threads, but forgot about the7000+ cuda cores.
    Ill have to try it another way.
    1 compute shader to calculate the image area to be filled.
    Many cores to fill the part image in the corner.
    Then draw the part image to final render.
    Thanks.
    Last edited by paul.g.griffiths; 10-10-2017 at 10:07 PM.

Posting Permissions

  • You may not post new threads
  • You may not post replies
  • You may not post attachments
  • You may not edit your posts
  •  
Proudly hosted by Digital Ocean