From e8aa881d40f8241d4ecbdf538bc98f91b7490c42 Mon Sep 17 00:00:00 2001 From: Elias Naur Date: Fri, 11 Mar 2022 09:47:07 +0100 Subject: [PATCH] gpu/internal/vulkan: [Vulkan] replace Device/QueueWaitIdle with fences vkDeviceWaitIdle and vkQueueWaitIdle are expensive; a vkFence is cheaper and the usual way to ensure a previous frame has completed before starting another. References: https://todo.sr.ht/~eliasnaur/gio/375 Signed-off-by: Elias Naur --- app/vulkan.go | 14 ++++++++++++- gpu/internal/driver/api.go | 2 ++ gpu/internal/vulkan/vulkan.go | 37 +++++++++++++++++++++-------------- internal/vk/vulkan.go | 5 ++++- 4 files changed, 41 insertions(+), 17 deletions(-) diff --git a/app/vulkan.go b/app/vulkan.go index d0030460..db7d1492 100644 --- a/app/vulkan.go +++ b/app/vulkan.go @@ -22,6 +22,7 @@ type vkContext struct { queue vk.Queue acquireSem vk.Semaphore presentSem vk.Semaphore + fence vk.Fence swchain vk.Swapchain imgs []vk.Image @@ -51,6 +52,13 @@ func newVulkanContext(inst vk.Instance, surf vk.Surface) (*vkContext, error) { vk.DestroyDevice(dev) return nil, err } + fence, err := vk.CreateFence(dev, vk.FENCE_CREATE_SIGNALED_BIT) + if err != nil { + vk.DestroySemaphore(dev, presentSem) + vk.DestroySemaphore(dev, acquireSem) + vk.DestroyDevice(dev) + return nil, err + } c := &vkContext{ physDev: physDev, inst: inst, @@ -59,12 +67,14 @@ func newVulkanContext(inst vk.Instance, surf vk.Surface) (*vkContext, error) { queue: vk.GetDeviceQueue(dev, qFam, 0), acquireSem: acquireSem, presentSem: presentSem, + fence: fence, } return c, nil } func (c *vkContext) RenderTarget() (gpu.RenderTarget, error) { - vk.DeviceWaitIdle(c.dev) + vk.WaitForFences(c.dev, c.fence) + vk.ResetFences(c.dev, c.fence) imgIdx, err := vk.AcquireNextImage(c.dev, c.swchain, c.acquireSem, 0) if err := mapSurfaceErr(err); err != nil { @@ -74,6 +84,7 @@ func (c *vkContext) RenderTarget() (gpu.RenderTarget, error) { return gpu.VulkanRenderTarget{ WaitSem: uint64(c.acquireSem), SignalSem: uint64(c.presentSem), + Fence: uint64(c.fence), Framebuffer: uint64(c.fbos[imgIdx]), Image: uint64(c.imgs[imgIdx]), }, nil @@ -122,6 +133,7 @@ func (c *vkContext) release() { vk.DeviceWaitIdle(c.dev) c.destroySwapchain() + vk.DestroyFence(c.dev, c.fence) vk.DestroySemaphore(c.dev, c.acquireSem) vk.DestroySemaphore(c.dev, c.presentSem) vk.DestroyDevice(c.dev) diff --git a/gpu/internal/driver/api.go b/gpu/internal/driver/api.go index 9a762a67..e0628133 100644 --- a/gpu/internal/driver/api.go +++ b/gpu/internal/driver/api.go @@ -36,6 +36,8 @@ type VulkanRenderTarget struct { WaitSem uint64 // SignalSem is a VkSemaphore that signal access to Framebuffer is complete. SignalSem uint64 + // Fence is a VkFence that is set when all commands to Framebuffer has completed. + Fence uint64 // Image is the VkImage to render into. Image uint64 // Framebuffer is a VkFramebuffer for Image. diff --git a/gpu/internal/vulkan/vulkan.go b/gpu/internal/vulkan/vulkan.go index 1af8e095..e6b127a5 100644 --- a/gpu/internal/vulkan/vulkan.go +++ b/gpu/internal/vulkan/vulkan.go @@ -35,6 +35,7 @@ type Backend struct { } defers []func(d vk.Device) frameSig vk.Semaphore + frameFence vk.Fence waitSems []vk.Semaphore waitStages []vk.PipelineStageFlags sigSems []vk.Semaphore @@ -159,7 +160,7 @@ func newVulkanDevice(api driver.Vulkan) (driver.Device, error) { if props&reqs == reqs { b.caps |= driver.FeatureSRGB } - fence, err := vk.CreateFence(b.dev) + fence, err := vk.CreateFence(b.dev, 0) if err != nil { return nil, mapErr(err) } @@ -168,7 +169,6 @@ func newVulkanDevice(api driver.Vulkan) (driver.Device, error) { } func (b *Backend) BeginFrame(target driver.RenderTarget, clear bool, viewport image.Point) driver.Texture { - vk.QueueWaitIdle(b.queue) b.staging.size = 0 b.cmdPool.used = 0 b.runDefers() @@ -184,6 +184,7 @@ func (b *Backend) BeginFrame(target driver.RenderTarget, clear bool, viewport im layout = vk.IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL } b.frameSig = vk.Semaphore(t.SignalSem) + b.frameFence = vk.Fence(t.Fence) tex := &Texture{ img: vk.Image(t.Image), fbo: vk.Framebuffer(t.Framebuffer), @@ -234,7 +235,16 @@ func (b *Backend) EndFrame() { b.sigSems = append(b.sigSems, b.frameSig) b.frameSig = 0 } - b.submitCmdBuf(false) + fence := b.frameFence + if fence == 0 { + // We're internally synchronized. + fence = b.fence + } + b.submitCmdBuf(fence) + if b.frameFence == 0 { + vk.WaitForFences(b.dev, fence) + vk.ResetFences(b.dev, fence) + } } func (b *Backend) Caps() driver.Caps { @@ -858,7 +868,9 @@ func (b *Buffer) Download(data []byte) error { vk.PIPELINE_STAGE_HOST_BIT, vk.ACCESS_HOST_READ_BIT, ) - b.backend.submitCmdBuf(true) + b.backend.submitCmdBuf(b.backend.fence) + vk.WaitForFences(b.backend.dev, b.backend.fence) + vk.ResetFences(b.backend.dev, b.backend.fence) copy(data, mem) return nil } @@ -940,7 +952,9 @@ func (t *Texture) ReadPixels(src image.Rectangle, pixels []byte, stride int) err vk.PIPELINE_STAGE_HOST_BIT, vk.ACCESS_HOST_READ_BIT, ) - t.backend.submitCmdBuf(true) + t.backend.submitCmdBuf(t.backend.fence) + vk.WaitForFences(t.backend.dev, t.backend.fence) + vk.ResetFences(t.backend.dev, t.backend.fence) var srcOff, dstOff int for y := 0; y < sz.Y; y++ { dstRow := pixels[srcOff : srcOff+stageStride] @@ -1043,29 +1057,22 @@ func (b *Backend) lookupPass(fmt vk.Format, loadAct vk.AttachmentLoadOp, initLay return pass } -func (b *Backend) submitCmdBuf(sync bool) { +func (b *Backend) submitCmdBuf(fence vk.Fence) { buf := b.cmdPool.current - if buf == nil { + if buf == nil && fence == 0 { return } + buf = b.ensureCmdBuf() b.cmdPool.current = nil if err := vk.EndCommandBuffer(buf); err != nil { panic(err) } - var fence vk.Fence - if sync { - fence = b.fence - } if err := vk.QueueSubmit(b.queue, buf, b.waitSems, b.waitStages, b.sigSems, fence); err != nil { panic(err) } b.waitSems = b.waitSems[:0] b.sigSems = b.sigSems[:0] b.waitStages = b.waitStages[:0] - if sync { - vk.WaitForFences(b.dev, b.fence) - vk.ResetFences(b.dev, b.fence) - } } func (b *Backend) stagingBuffer(size int) (*Buffer, []byte, int) { diff --git a/internal/vk/vulkan.go b/internal/vk/vulkan.go index 1bdffd88..4616d50d 100644 --- a/internal/vk/vulkan.go +++ b/internal/vk/vulkan.go @@ -519,6 +519,8 @@ const ( ERROR_DEVICE_LOST = Error(C.VK_ERROR_DEVICE_LOST) SUBOPTIMAL_KHR = Error(C.VK_SUBOPTIMAL_KHR) + FENCE_CREATE_SIGNALED_BIT = 0x00000001 + BLEND_FACTOR_ZERO BlendFactor = C.VK_BLEND_FACTOR_ZERO BLEND_FACTOR_ONE BlendFactor = C.VK_BLEND_FACTOR_ONE BLEND_FACTOR_ONE_MINUS_SRC_ALPHA BlendFactor = C.VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA @@ -1771,9 +1773,10 @@ func CreateComputePipeline(d Device, mod ShaderModule, layout PipelineLayout) (P return pipe, nil } -func CreateFence(d Device) (Fence, error) { +func CreateFence(d Device, flags int) (Fence, error) { inf := C.VkFenceCreateInfo{ sType: C.VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, + flags: C.VkFenceCreateFlags(flags), } var f C.VkFence if err := vkErr(C.vkCreateFence(funcs.vkCreateFence, d, &inf, nil, &f)); err != nil {