Hello,
I am receiving a segfault from inside of vkCreateComputePipelines. From my research online, it seems like this is commonly caused by an error during shader compilation. My shader validates using spirv-val, but from what I’ve read that doesn’t necessarily mean it’s still not a “bad” shader. I will attach the api dump log from the program I am running as well as the source code of the shader.
This is a hand written spirv shader. I am writing this as a reference for a compilation target (don’t worry, not writing a whole large application in raw spirv). The shader program is supposed to work off of a buffer of bignums, with a bignum leaf count provided as a specialization constant (although right now the program provides none, so it defaults), a worksize provided as a push constant (how many bignums for each shader to process), and only two descriptors for an input buffer and an output buffer.
Thank you in advance for any help you can provide. I’m not sure where to go next on debugging this.
Thread 0, Frame 0:
vkCreateInstance(pCreateInfo, pAllocator, pInstance) returns VkResult VK_SUCCESS (0):
pCreateInfo: const VkInstanceCreateInfo* = 0x7ffe1c0c0270:
sType: VkStructureType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO (1)
pNext: const void* = 0x7ffe1c0c0240
flags: VkInstanceCreateFlags = 0
pApplicationInfo: const VkApplicationInfo* = 0x7ffe1c0c03c8:
sType: VkStructureType = VK_STRUCTURE_TYPE_APPLICATION_INFO (0)
pNext: const void* = NULL
pApplicationName: const char* = "Test"
applicationVersion: uint32_t = 0
pEngineName: const char* = NULL
engineVersion: uint32_t = 0
apiVersion: uint32_t = 4194306
enabledLayerCount: uint32_t = 2
ppEnabledLayerNames: const char* const* = 0x55ce3e4cb4d0
ppEnabledLayerNames[0]: const char* const = "VK_LAYER_LUNARG_api_dump"
ppEnabledLayerNames[1]: const char* const = "VK_LAYER_LUNARG_standard_validation"
enabledExtensionCount: uint32_t = 1
ppEnabledExtensionNames: const char* const* = 0x55ce3e4cb4e0
ppEnabledExtensionNames[0]: const char* const = "VK_EXT_debug_report"
pAllocator: const VkAllocationCallbacks* = NULL
pInstance: VkInstance* = 0x55ce3ebbbec0
Thread 0, Frame 0:
vkCreateDebugReportCallbackEXT(instance, pCreateInfo, pAllocator, pCallback) returns VkResult VK_SUCCESS (0):
instance: VkInstance = 0x55ce3ebbbec0
pCreateInfo: const VkDebugReportCallbackCreateInfoEXT* = 0x7ffe1c0c0438:
sType: VkStructureType = UNKNOWN (1000011000)
pNext: const void* = NULL
flags: VkDebugReportFlagsEXT = 10 (VK_DEBUG_REPORT_WARNING_BIT_EXT | VK_DEBUG_REPORT_ERROR_BIT_EXT)
pfnCallback: PFN_vkDebugReportCallbackEXT = 1
pUserData: void* = NULL
pAllocator: const VkAllocationCallbacks* = NULL
pCallback: VkDebugReportCallbackEXT* = 0x55ce3ef062e0
Thread 0, Frame 0:
vkEnumeratePhysicalDevices(instance, pPhysicalDeviceCount, pPhysicalDevices) returns VkResult VK_SUCCESS (0):
instance: VkInstance = 0x55ce3ebbbec0
pPhysicalDeviceCount: uint32_t* = 1
pPhysicalDevices: VkPhysicalDevice* = NULL
Thread 0, Frame 0:
vkEnumeratePhysicalDevices(instance, pPhysicalDeviceCount, pPhysicalDevices) returns VkResult VK_SUCCESS (0):
instance: VkInstance = 0x55ce3ebbbec0
pPhysicalDeviceCount: uint32_t* = 1
pPhysicalDevices: VkPhysicalDevice* = 0x7ffe1c0c02f0
pPhysicalDevices[0]: VkPhysicalDevice = 0x55ce3ef01710
Thread 0, Frame 0:
vkEnumeratePhysicalDevices(instance, pPhysicalDeviceCount, pPhysicalDevices) returns VkResult VK_SUCCESS (0):
instance: VkInstance = 0x55ce3ebbbec0
pPhysicalDeviceCount: uint32_t* = 1
pPhysicalDevices: VkPhysicalDevice* = NULL
Thread 0, Frame 0:
vkEnumeratePhysicalDevices(instance, pPhysicalDeviceCount, pPhysicalDevices) returns VkResult VK_SUCCESS (0):
instance: VkInstance = 0x55ce3ebbbec0
pPhysicalDeviceCount: uint32_t* = 1
pPhysicalDevices: VkPhysicalDevice* = 0x7ffe1c0c02f0
pPhysicalDevices[0]: VkPhysicalDevice = 0x55ce3ef01710
Thread 0, Frame 0:
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, pQueueFamilyPropertyCount, pQueueFamilyProperties) returns void:
physicalDevice: VkPhysicalDevice = 0x55ce3ef01710
pQueueFamilyPropertyCount: uint32_t* = 2
pQueueFamilyProperties: VkQueueFamilyProperties* = NULL
Thread 0, Frame 0:
vkGetPhysicalDeviceQueueFamilyProperties(physicalDevice, pQueueFamilyPropertyCount, pQueueFamilyProperties) returns void:
physicalDevice: VkPhysicalDevice = 0x55ce3ef01710
pQueueFamilyPropertyCount: uint32_t* = 2
pQueueFamilyProperties: VkQueueFamilyProperties* = 0x7f8ceb518000
pQueueFamilyProperties[0]: VkQueueFamilyProperties = 0x7f8ceb518000:
queueFlags: VkQueueFlags = 15 (VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT)
queueCount: uint32_t = 16
timestampValidBits: uint32_t = 64
minImageTransferGranularity: VkExtent3D = 0x7f8ceb51800c:
width: uint32_t = 1
height: uint32_t = 1
depth: uint32_t = 1
pQueueFamilyProperties[1]: VkQueueFamilyProperties = 0x7f8ceb518018:
queueFlags: VkQueueFlags = 4 (VK_QUEUE_TRANSFER_BIT)
queueCount: uint32_t = 1
timestampValidBits: uint32_t = 64
minImageTransferGranularity: VkExtent3D = 0x7f8ceb518024:
width: uint32_t = 1
height: uint32_t = 1
depth: uint32_t = 1
Thread 0, Frame 0:
vkCreateDevice(physicalDevice, pCreateInfo, pAllocator, pDevice) returns VkResult VK_SUCCESS (0):
physicalDevice: VkPhysicalDevice = 0x55ce3ef01710
pCreateInfo: const VkDeviceCreateInfo* = 0x7ffe1c0c02a0:
sType: VkStructureType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO (3)
pNext: const void* = 0x7ffe1c0c0270
flags: VkDeviceCreateFlags = 0
queueCreateInfoCount: uint32_t = 1
pQueueCreateInfos: const VkDeviceQueueCreateInfo* = 0x7f8ceb518040
pQueueCreateInfos[0]: const VkDeviceQueueCreateInfo = 0x7f8ceb518040:
sType: VkStructureType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO (2)
pNext: const void* = NULL
flags: VkDeviceQueueCreateFlags = 0
queueFamilyIndex: uint32_t = 0
queueCount: uint32_t = 1
pQueuePriorities: const float* = 0x7ffe1c0c03e0
pQueuePriorities[0]: const float = 1
enabledLayerCount: uint32_t = 0
ppEnabledLayerNames: const char* const* = NULL
enabledExtensionCount: uint32_t = 0
ppEnabledExtensionNames: const char* const* = NULL
pEnabledFeatures: const VkPhysicalDeviceFeatures* = NULL
pAllocator: const VkAllocationCallbacks* = NULL
pDevice: VkDevice* = 0x55ce3ef0fac0
Thread 0, Frame 0:
vkGetDeviceQueue(device, queueFamilyIndex, queueIndex, pQueue) returns void:
device: VkDevice = 0x55ce3ef0fac0
queueFamilyIndex: uint32_t = 0
queueIndex: uint32_t = 0
pQueue: VkQueue* = 0x55ce3ebb0d40
Thread 0, Frame 0:
vkCreateShaderModule(device, pCreateInfo, pAllocator, pShaderModule) returns VkResult VK_SUCCESS (0):
device: VkDevice = 0x55ce3ef0fac0
pCreateInfo: const VkShaderModuleCreateInfo* = 0x7ffe1c0bfe18:
sType: VkStructureType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO (16)
pNext: const void* = NULL
flags: VkShaderModuleCreateFlags = 0
codeSize: size_t = 1840
pCode: const uint32_t* = SHADER DATA
pAllocator: const VkAllocationCallbacks* = NULL
pShaderModule: VkShaderModule* = 0x1
Thread 0, Frame 0:
vkCreateDescriptorSetLayout(device, pCreateInfo, pAllocator, pSetLayout) returns VkResult VK_SUCCESS (0):
device: VkDevice = 0x55ce3ef0fac0
pCreateInfo: const VkDescriptorSetLayoutCreateInfo* = 0x7ffe1c0bfa30:
sType: VkStructureType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO (32)
pNext: const void* = NULL
flags: VkDescriptorSetLayoutCreateFlags = 0
bindingCount: uint32_t = 2
pBindings: const VkDescriptorSetLayoutBinding* = 0x7f8ceb518200
pBindings[0]: const VkDescriptorSetLayoutBinding = 0x7f8ceb518200:
binding: uint32_t = 0
descriptorType: VkDescriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER (7)
descriptorCount: uint32_t = 1
stageFlags: VkShaderStageFlags = 32 (VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_ALL)
pImmutableSamplers: const VkSampler* = UNUSED
pBindings[1]: const VkDescriptorSetLayoutBinding = 0x7f8ceb518218:
binding: uint32_t = 1
descriptorType: VkDescriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER (7)
descriptorCount: uint32_t = 1
stageFlags: VkShaderStageFlags = 32 (VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_ALL)
pImmutableSamplers: const VkSampler* = UNUSED
pAllocator: const VkAllocationCallbacks* = NULL
pSetLayout: VkDescriptorSetLayout* = 0x2
Thread 0, Frame 0:
vkCreatePipelineLayout(device, pCreateInfo, pAllocator, pPipelineLayout) returns VkResult VK_SUCCESS (0):
device: VkDevice = 0x55ce3ef0fac0
pCreateInfo: const VkPipelineLayoutCreateInfo* = 0x7ffe1c0bf9f0:
sType: VkStructureType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO (30)
pNext: const void* = NULL
flags: VkPipelineLayoutCreateFlags = 0
setLayoutCount: uint32_t = 1
pSetLayouts: const VkDescriptorSetLayout* = 0x7f8ceb5170b0
pSetLayouts[0]: const VkDescriptorSetLayout = 0x2
pushConstantRangeCount: uint32_t = 1
pPushConstantRanges: const VkPushConstantRange* = 0x7f8ceb517050
pPushConstantRanges[0]: const VkPushConstantRange = 0x7f8ceb517050:
stageFlags: VkShaderStageFlags = 32 (VK_SHADER_STAGE_COMPUTE_BIT | VK_SHADER_STAGE_ALL)
offset: uint32_t = 0
size: uint32_t = 4
pAllocator: const VkAllocationCallbacks* = NULL
pPipelineLayout: VkPipelineLayout* = 0x3
OpCapability Shader
OpMemoryModel Logical Simple
OpEntryPoint GLCompute %f_main "main" %v_g_inv
OpExecutionMode %f_main LocalSize 1 1 1
; -- DECORATIONS
OpDecorate %v_g_inv BuiltIn GlobalInvocationId
OpDecorate %t_buf BufferBlock
OpMemberDecorate %t_buf 0 Offset 0
OpDecorate %v_in_buf DescriptorSet 0
OpDecorate %v_in_buf Binding 0
OpDecorate %v_out_buf DescriptorSet 0
OpDecorate %v_out_buf Binding 1
OpDecorate %v_pc DescriptorSet 0
OpDecorate %t_pc Block
OpMemberDecorate %t_pc 0 Offset 0
OpMemberDecorate %t_carry_struct 0 Offset 0
OpMemberDecorate %t_carry_struct 0 Offset 4
; -- TYPES
%t_void = OpTypeVoid
%t_bool = OpTypeBool
%t_i32 = OpTypeInt 32 1
%t_u32 = OpTypeInt 32 0
%t_u32_arr = OpTypeRuntimeArray %t_u32
%t_vec3 = OpTypeVector %t_i32 3
%t_pc = OpTypeStruct %t_u32
%t_buf = OpTypeStruct %t_u32_arr
%t_carry_struct = OpTypeStruct %t_u32 %t_u32
%t_i32_f_p = OpTypePointer Function %t_i32
%t_u32_f_p = OpTypePointer Function %t_u32
%t_pc_p = OpTypePointer PushConstant %t_pc
%t_i32_pc_p = OpTypePointer PushConstant %t_i32
%t_u32_pc_p = OpTypePointer PushConstant %t_u32
%t_vec3_in_p = OpTypePointer Input %t_vec3
%t_i32_in_p = OpTypePointer Input %t_i32
%t_buf_un_p = OpTypePointer Uniform %t_buf
%t_u32_un_p = OpTypePointer Uniform %t_u32
%t_carry_struct_f_p = OpTypePointer Function %t_carry_struct
%t_f_main = OpTypeFunction %t_void
; -- CONSTANTS
; !!!!! TMP !!!!!
%sc_bignum_sz = OpSpecConstant %t_i32 4
%c_u32_zero = OpConstant %t_u32 0
%c_u32_one = OpConstant %t_u32 1
%c_i32_zero = OpConstant %t_i32 0
%c_i32_one = OpConstant %t_i32 1
%c_i32_two = OpConstant %t_i32 2
; -- VARIABLES
%v_in_buf = OpVariable %t_buf_un_p Uniform
%v_out_buf = OpVariable %t_buf_un_p Uniform
%v_g_inv = OpVariable %t_vec3_in_p Input
%v_pc = OpVariable %t_pc_p PushConstant
; -- FUNCTIONS
%f_main = OpFunction %t_void None %t_f_main
%l_start = OpLabel
%v_i = OpVariable %t_u32_f_p Function
%v_j = OpVariable %t_i32_f_p Function
%v_in_base = OpVariable %t_i32_f_p Function
%v_prev_carry = OpVariable %t_u32_f_p Function
%v_carry_res = OpVariable %t_carry_struct_f_p Function
; load execution information
%g_inv_p = OpInBoundsAccessChain %t_i32_in_p %v_g_inv %c_u32_zero
%g_inv = OpLoad %t_i32 %g_inv_p
%work_sz_p = OpInBoundsAccessChain %t_u32_pc_p %v_pc %c_u32_zero
%work_sz = OpLoad %t_u32 %work_sz_p
; initialize outer loop
%bignum_sz_pred = OpISub %t_i32 %sc_bignum_sz %c_i32_one
%bignum_sz_x2 = OpIMul %t_i32 %sc_bignum_sz %c_i32_two
%in_base_1 = OpIMul %t_i32 %bignum_sz_x2 %g_inv
OpStore %v_in_base %in_base_1
OpStore %v_i %c_u32_zero
OpBranch %l_l1_head
%l_l1_head = OpLabel
OpLoopMerge %l_l1_merge %l_l1_cont None
OpBranch %l_l1_merge
%l_l1_merge = OpLabel
%i_1 = OpLoad %t_u32 %v_i
%l1_cont = OpULessThan %t_bool %i_1 %work_sz
OpBranchConditional %l1_cont %l_l1_body %l_end
%l_l1_body = OpLabel
%in_base_2 = OpLoad %t_i32 %v_in_base
OpStore %v_j %bignum_sz_pred
OpStore %v_prev_carry %c_u32_zero
OpBranch %l_l2_head
%l_l2_head = OpLabel
OpLoopMerge %l_l2_merge %l_l2_cont None
OpBranch %l_l2_merge
%l_l2_merge = OpLabel
%j_1 = OpLoad %t_i32 %v_j
%l2_cont = OpSGreaterThanEqual %t_bool %j_1 %c_i32_zero
OpBranchConditional %l2_cont %l_l2_body %l_l1_cont
%l_l2_body = OpLabel
%prev_carry_1 = OpLoad %t_u32 %v_prev_carry
%x_index = OpIAdd %t_i32 %in_base_2 %j_1
%y_index = OpIAdd %t_i32 %x_index %sc_bignum_sz
%x_p = OpAccessChain %t_u32_un_p %v_in_buf %c_i32_zero %x_index
%x = OpLoad %t_u32 %x_p
%y_p = OpAccessChain %t_u32_un_p %v_in_buf %c_i32_zero %x_index
%y = OpLoad %t_u32 %y_p
%carry_res_1 = OpIAddCarry %t_carry_struct %x %y
OpStore %v_carry_res %carry_res_1
%sum_1_p = OpInBoundsAccessChain %t_u32_f_p %v_carry_res %c_u32_zero
%sum_1 = OpLoad %t_u32 %sum_1_p
%curr_carry_p = OpInBoundsAccessChain %t_u32_f_p %v_carry_res %c_u32_one
%curr_carry = OpLoad %t_u32 %curr_carry_p
%carry_res_2 = OpIAddCarry %t_carry_struct %sum_1 %prev_carry_1
OpStore %v_carry_res %carry_res_2
%sum_2_p = OpInBoundsAccessChain %t_u32_f_p %v_carry_res %c_u32_zero
%sum_2 = OpLoad %t_u32 %sum_2_p
%prev_carry_2_p = OpInBoundsAccessChain %t_u32_f_p %v_carry_res %c_u32_one
%prev_carry_2 = OpLoad %t_u32 %prev_carry_2_p
OpStore %v_prev_carry %prev_carry_2
%half_in_base = OpSDiv %t_i32 %in_base_2 %c_i32_two
%r_index = OpIAdd %t_i32 %half_in_base %j_1
%r_p = OpAccessChain %t_u32_un_p %v_out_buf %c_u32_zero %r_index
OpStore %r_p %sum_2
OpBranch %l_l2_cont
%l_l2_cont = OpLabel
%j_2 = OpISub %t_i32 %j_1 %c_i32_one
OpStore %v_j %j_2
OpBranch %l_l2_head
%l_l1_cont = OpLabel
%in_base_3 = OpIAdd %t_i32 %in_base_2 %bignum_sz_x2
OpStore %v_in_base %in_base_3
%i_2 = OpIAdd %t_u32 %i_1 %c_u32_one
OpStore %v_i %i_2
OpBranch %l_l1_head
%l_end = OpLabel
OpReturn
OpFunctionEnd