From afaa31eca817308a0f95f75547356e97eea76234 Mon Sep 17 00:00:00 2001 From: Elias Naur Date: Tue, 10 Aug 2021 16:18:47 +0200 Subject: [PATCH] gpu: introduce pipeline abstraction Modern API such as Metal and Vulkan want clients to compile expensive state changes into pipeline objects. Change our GPU driver abstraction to match, thereby paving the way for future drivers. Signed-off-by: Elias Naur --- gpu/compute.go | 91 +++++----- gpu/gpu.go | 152 +++++++++------- gpu/headless/driver_test.go | 62 +++++-- gpu/internal/d3d11/d3d11_windows.go | 218 +++++++++++------------ gpu/internal/driver/driver.go | 42 +++-- gpu/internal/opengl/opengl.go | 260 ++++++++++++++++------------ gpu/path.go | 105 ++++++----- internal/d3d11/d3d11_windows.go | 10 ++ internal/gl/util.go | 8 +- 9 files changed, 548 insertions(+), 400 deletions(-) diff --git a/gpu/compute.go b/gpu/compute.go index 33f9c7c8..4e700943 100644 --- a/gpu/compute.go +++ b/gpu/compute.go @@ -61,8 +61,7 @@ type compute struct { memory sizedBuffer } output struct { - blitProg driver.Program - layout driver.InputLayout + blitPipeline driver.Pipeline buffer sizedBuffer @@ -89,8 +88,7 @@ type compute struct { // offsets maps texture ops to the offsets to put in their FillImage commands. offsets map[textureKey]image.Point - prog driver.Program - layout driver.InputLayout + pipeline driver.Pipeline packer packer @@ -414,21 +412,32 @@ func newCompute(ctx driver.Device) (*compute, error) { // Large enough for reasonable fill sizes, yet still spannable by the compute programs. g.output.packer.maxDim = 4096 - blitProg, err := ctx.NewProgram(gio.Shader_copy_vert, gio.Shader_copy_frag) + copyVert, copyFrag, err := newShaders(ctx, gio.Shader_copy_vert, gio.Shader_copy_frag) if err != nil { g.Release() return nil, err } - g.output.blitProg = blitProg - progLayout, err := ctx.NewInputLayout(gio.Shader_copy_vert, []shader.InputDesc{ - {Type: shader.DataTypeFloat, Size: 2, Offset: 0}, - {Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2}, + defer copyVert.Release() + defer copyFrag.Release() + pipe, err := ctx.NewPipeline(driver.PipelineDesc{ + VertexShader: copyVert, + FragmentShader: copyFrag, + VertexLayout: []shader.InputDesc{ + {Type: shader.DataTypeFloat, Size: 2, Offset: 0}, + {Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2}, + }, + PixelFormat: driver.TextureFormatOutput, + BlendDesc: driver.BlendDesc{ + Enable: true, + SrcFactor: driver.BlendFactorOne, + DstFactor: driver.BlendFactorOneMinusSrcAlpha, + }, }) if err != nil { g.Release() return nil, err } - g.output.layout = progLayout + g.output.blitPipeline = pipe g.output.uniforms = new(copyUniforms) buf, err := ctx.NewBuffer(driver.BufferBindingUniforms, int(unsafe.Sizeof(*g.output.uniforms))) @@ -437,23 +446,28 @@ func newCompute(ctx driver.Device) (*compute, error) { return nil, err } g.output.uniBuf = buf - g.output.blitProg.SetVertexUniforms(buf) - materialProg, err := ctx.NewProgram(gio.Shader_material_vert, gio.Shader_material_frag) + materialVert, materialFrag, err := newShaders(ctx, gio.Shader_material_vert, gio.Shader_material_frag) if err != nil { g.Release() return nil, err } - g.materials.prog = materialProg - progLayout, err = ctx.NewInputLayout(gio.Shader_material_vert, []shader.InputDesc{ - {Type: shader.DataTypeFloat, Size: 2, Offset: 0}, - {Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2}, + defer materialVert.Release() + defer materialFrag.Release() + pipe, err = ctx.NewPipeline(driver.PipelineDesc{ + VertexShader: materialVert, + FragmentShader: materialFrag, + VertexLayout: []shader.InputDesc{ + {Type: shader.DataTypeFloat, Size: 2, Offset: 0}, + {Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2}, + }, + PixelFormat: driver.TextureFormatRGBA8, }) if err != nil { g.Release() return nil, err } - g.materials.layout = progLayout + g.materials.pipeline = pipe g.materials.vert.uniforms = new(materialVertUniforms) buf, err = ctx.NewBuffer(driver.BufferBindingUniforms, int(unsafe.Sizeof(*g.materials.vert.uniforms))) @@ -462,7 +476,6 @@ func newCompute(ctx driver.Device) (*compute, error) { return nil, err } g.materials.vert.buf = buf - g.materials.prog.SetVertexUniforms(buf) var emulateSRGB materialFragUniforms if !g.srgb { emulateSRGB.emulateSRGB = 1.0 @@ -474,7 +487,6 @@ func newCompute(ctx driver.Device) (*compute, error) { } buf.Upload(byteslice.Struct(&emulateSRGB)) g.materials.frag.buf = buf - g.materials.prog.SetFragmentUniforms(buf) for _, shader := range shaders { if !g.useCPU { @@ -529,6 +541,18 @@ func newCompute(ctx driver.Device) (*compute, error) { return g, nil } +func newShaders(ctx driver.Device, vsrc, fsrc shader.Sources) (vert driver.VertexShader, frag driver.FragmentShader, err error) { + vert, err = ctx.NewVertexShader(vsrc) + if err != nil { + return + } + frag, err = ctx.NewFragmentShader(fsrc) + if err != nil { + vert.Release() + } + return +} + func (g *compute) Collect(viewport image.Point, ops *op.Ops) { g.viewport = viewport g.collector.reset() @@ -758,12 +782,9 @@ func (g *compute) blitLayers(viewport image.Point) { return } layers := g.collector.frame.layers - g.ctx.BlendFunc(driver.BlendFactorOne, driver.BlendFactorOneMinusSrcAlpha) - g.ctx.SetBlend(true) - defer g.ctx.SetBlend(false) g.ctx.Viewport(0, 0, viewport.X, viewport.Y) - g.ctx.BindProgram(g.output.blitProg) - g.ctx.BindInputLayout(g.output.layout) + g.ctx.BindPipeline(g.output.blitPipeline) + g.ctx.BindVertexUniforms(g.output.uniBuf) for len(layers) > 0 { g.output.layerVertices = g.output.layerVertices[:0] atlas := layers[0].place.atlas @@ -898,6 +919,8 @@ restart: g.materials.vert.uniforms.scale = [2]float32{2 / float32(texSize), -2 / float32(texSize)} g.materials.vert.uniforms.pos = [2]float32{-1, +1} g.materials.vert.buf.Upload(byteslice.Struct(g.materials.vert.uniforms)) + g.ctx.BindVertexUniforms(g.materials.vert.buf) + g.ctx.BindFragmentUniforms(g.materials.frag.buf) vertexData := byteslice.Slice(m.quads) n := pow2Ceil(len(vertexData)) m.buffer.ensureCapacity(false, g.ctx, driver.BufferBindingVertices, n) @@ -908,9 +931,8 @@ restart: if reclaimed { g.ctx.Clear(0, 0, 0, 0) } - g.ctx.BindProgram(m.prog) + g.ctx.BindPipeline(m.pipeline) g.ctx.BindVertexBuffer(m.buffer.buffer, int(unsafe.Sizeof(m.quads[0])), 0) - g.ctx.BindInputLayout(m.layout) g.ctx.DrawArrays(driver.DrawModeTriangles, 0, len(m.quads)) return nil } @@ -1096,10 +1118,8 @@ func (g *compute) render(dst driver.Texture, cpuDst cpu.ImageDescriptor, tileDim scenePadding := partitionSize - len(enc.scene)%partitionSize enc.scene = append(enc.scene, make([]scene.Command, scenePadding)...) - realloced := false scene := byteslice.Slice(enc.scene) if s := len(scene); s > g.buffers.scene.size { - realloced = true paddedCap := s * 11 / 10 if err := g.buffers.scene.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorage, paddedCap); err != nil { return err @@ -1136,7 +1156,6 @@ func (g *compute) render(dst driver.Texture, cpuDst cpu.ImageDescriptor, tileDim // clearSize is the atomic partition counter plus flag and 2 states per partition. clearSize := 4 + numPartitions*stateStride if clearSize > g.buffers.state.size { - realloced = true paddedCap := clearSize * 11 / 10 if err := g.buffers.state.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorage, paddedCap); err != nil { return err @@ -1149,7 +1168,6 @@ func (g *compute) render(dst driver.Texture, cpuDst cpu.ImageDescriptor, tileDim minSize := int(unsafe.Sizeof(memoryHeader{})) + int(alloc) if minSize > g.buffers.memory.size { - realloced = true // Add space for dynamic GPU allocations. const sizeBump = 4 * 1024 * 1024 minSize += sizeBump @@ -1175,10 +1193,7 @@ func (g *compute) render(dst driver.Texture, cpuDst cpu.ImageDescriptor, tileDim g.buffers.memory.upload(byteslice.Struct(g.memHeader)) g.buffers.state.upload(g.zeros(clearSize)) - if realloced { - realloced = false - g.bindBuffers() - } + g.bindBuffers() g.memoryBarrier() g.dispatch(g.programs.elements, numPartitions, 1, 1) g.memoryBarrier() @@ -1214,7 +1229,6 @@ func (g *compute) render(dst driver.Texture, cpuDst cpu.ImageDescriptor, tileDim return nil case memMallocFailed: // Resize memory and try again. - realloced = true sz := g.buffers.memory.size * 15 / 10 if err := g.buffers.memory.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorage, sz); err != nil { return err @@ -1327,7 +1341,7 @@ func (g *compute) Release() { &g.programs.binning, &g.programs.coarse, &g.programs.kernel4, - g.output.blitProg, + g.output.blitPipeline, &g.output.buffer, g.output.uniBuf, &g.buffers.scene, @@ -1335,8 +1349,7 @@ func (g *compute) Release() { &g.buffers.memory, &g.buffers.config, g.images.tex, - g.materials.layout, - g.materials.prog, + g.materials.pipeline, g.materials.fbo, g.materials.tex, &g.materials.buffer, @@ -1429,7 +1442,7 @@ func (b *sizedBuffer) upload(data []byte) { func (g *compute) bindStorageBuffers(prog computeProgram, buffers ...sizedBuffer) { for i, buf := range buffers { if !g.useCPU { - prog.prog.SetStorageBuffer(i, buf.buffer) + g.ctx.BindStorageBuffer(i, buf.buffer) } else { *prog.buffers[i] = buf.cpuBuf } diff --git a/gpu/gpu.go b/gpu/gpu.go index 4bdf5347..185162b1 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -281,8 +281,7 @@ type texture struct { type blitter struct { ctx driver.Device viewport image.Point - prog [3]*program - layout driver.InputLayout + pipelines [3]*pipeline colUniforms *blitColUniforms texUniforms *blitTexUniforms linearGradientUniforms *blitLinearGradientUniforms @@ -321,8 +320,8 @@ type uniformBuffer struct { ptr []byte } -type program struct { - prog driver.Program +type pipeline struct { + pipeline driver.Pipeline vertUniforms *uniformBuffer fragUniforms *uniformBuffer } @@ -453,7 +452,6 @@ func (g *gpu) Frame(target RenderTarget) error { } g.ctx.Viewport(0, 0, viewport.X, viewport.Y) g.stencilTimer.begin() - g.ctx.SetBlend(true) g.renderer.packStencils(&g.drawOps.pathOps) g.renderer.stencilClips(g.drawOps.pathCache, g.drawOps.pathOps) g.renderer.packIntersections(g.drawOps.imageOps) @@ -463,7 +461,6 @@ func (g *gpu) Frame(target RenderTarget) error { g.ctx.BindFramebuffer(defFBO) g.ctx.Viewport(0, 0, viewport.X, viewport.Y) g.renderer.drawOps(g.cache, g.drawOps.imageOps) - g.ctx.SetBlend(false) g.renderer.pather.stenciler.invalidateFBO() g.coverTimer.end() g.ctx.BindFramebuffer(defFBO) @@ -558,90 +555,128 @@ func newBlitter(ctx driver.Device) *blitter { b.colUniforms = new(blitColUniforms) b.texUniforms = new(blitTexUniforms) b.linearGradientUniforms = new(blitLinearGradientUniforms) - prog, layout, err := createColorPrograms(ctx, gio.Shader_blit_vert, gio.Shader_blit_frag, + pipelines, err := createColorPrograms(ctx, gio.Shader_blit_vert, gio.Shader_blit_frag, [3]interface{}{&b.colUniforms.vert, &b.linearGradientUniforms.vert, &b.texUniforms.vert}, [3]interface{}{&b.colUniforms.frag, &b.linearGradientUniforms.frag, nil}, ) if err != nil { panic(err) } - b.prog = prog - b.layout = layout + b.pipelines = pipelines return b } func (b *blitter) release() { b.quadVerts.Release() - for _, p := range b.prog { + for _, p := range b.pipelines { p.Release() } - b.layout.Release() } -func createColorPrograms(b driver.Device, vsSrc shader.Sources, fsSrc [3]shader.Sources, vertUniforms, fragUniforms [3]interface{}) ([3]*program, driver.InputLayout, error) { - var progs [3]*program +func createColorPrograms(b driver.Device, vsSrc shader.Sources, fsSrc [3]shader.Sources, vertUniforms, fragUniforms [3]interface{}) ([3]*pipeline, error) { + var pipelines [3]*pipeline + blend := driver.BlendDesc{ + Enable: true, + SrcFactor: driver.BlendFactorOne, + DstFactor: driver.BlendFactorOneMinusSrcAlpha, + } + layout := []shader.InputDesc{ + {Type: shader.DataTypeFloat, Size: 2, Offset: 0}, + {Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2}, + } + vsh, err := b.NewVertexShader(vsSrc) + if err != nil { + return pipelines, err + } + defer vsh.Release() { - prog, err := b.NewProgram(vsSrc, fsSrc[materialTexture]) + fsh, err := b.NewFragmentShader(fsSrc[materialTexture]) if err != nil { - return progs, nil, err + return pipelines, err + } + defer fsh.Release() + pipe, err := b.NewPipeline(driver.PipelineDesc{ + VertexShader: vsh, + FragmentShader: fsh, + BlendDesc: blend, + VertexLayout: layout, + PixelFormat: driver.TextureFormatOutput, + }) + if err != nil { + return pipelines, err } var vertBuffer, fragBuffer *uniformBuffer if u := vertUniforms[materialTexture]; u != nil { vertBuffer = newUniformBuffer(b, u) - prog.SetVertexUniforms(vertBuffer.buf) } if u := fragUniforms[materialTexture]; u != nil { fragBuffer = newUniformBuffer(b, u) - prog.SetFragmentUniforms(fragBuffer.buf) } - progs[materialTexture] = newProgram(prog, vertBuffer, fragBuffer) + pipelines[materialTexture] = &pipeline{pipe, vertBuffer, fragBuffer} } { var vertBuffer, fragBuffer *uniformBuffer - prog, err := b.NewProgram(vsSrc, fsSrc[materialColor]) + fsh, err := b.NewFragmentShader(fsSrc[materialColor]) if err != nil { - progs[materialTexture].Release() - return progs, nil, err + pipelines[materialTexture].Release() + return pipelines, err + } + defer fsh.Release() + pipe, err := b.NewPipeline(driver.PipelineDesc{ + VertexShader: vsh, + FragmentShader: fsh, + BlendDesc: blend, + VertexLayout: layout, + PixelFormat: driver.TextureFormatOutput, + }) + if err != nil { + pipelines[materialTexture].Release() + return pipelines, err } if u := vertUniforms[materialColor]; u != nil { vertBuffer = newUniformBuffer(b, u) - prog.SetVertexUniforms(vertBuffer.buf) } if u := fragUniforms[materialColor]; u != nil { fragBuffer = newUniformBuffer(b, u) - prog.SetFragmentUniforms(fragBuffer.buf) } - progs[materialColor] = newProgram(prog, vertBuffer, fragBuffer) + pipelines[materialColor] = &pipeline{pipe, vertBuffer, fragBuffer} } { var vertBuffer, fragBuffer *uniformBuffer - prog, err := b.NewProgram(vsSrc, fsSrc[materialLinearGradient]) + fsh, err := b.NewFragmentShader(fsSrc[materialLinearGradient]) if err != nil { - progs[materialTexture].Release() - progs[materialColor].Release() - return progs, nil, err + pipelines[materialTexture].Release() + pipelines[materialColor].Release() + return pipelines, err + } + defer fsh.Release() + pipe, err := b.NewPipeline(driver.PipelineDesc{ + VertexShader: vsh, + FragmentShader: fsh, + BlendDesc: blend, + VertexLayout: layout, + PixelFormat: driver.TextureFormatOutput, + }) + if err != nil { + pipelines[materialTexture].Release() + pipelines[materialColor].Release() + return pipelines, err } if u := vertUniforms[materialLinearGradient]; u != nil { vertBuffer = newUniformBuffer(b, u) - prog.SetVertexUniforms(vertBuffer.buf) } if u := fragUniforms[materialLinearGradient]; u != nil { fragBuffer = newUniformBuffer(b, u) - prog.SetFragmentUniforms(fragBuffer.buf) } - progs[materialLinearGradient] = newProgram(prog, vertBuffer, fragBuffer) + pipelines[materialLinearGradient] = &pipeline{pipe, vertBuffer, fragBuffer} } - layout, err := b.NewInputLayout(vsSrc, []shader.InputDesc{ - {Type: shader.DataTypeFloat, Size: 2, Offset: 0}, - {Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2}, - }) if err != nil { - progs[materialTexture].Release() - progs[materialColor].Release() - progs[materialLinearGradient].Release() - return progs, nil, err + for _, p := range pipelines { + p.Release() + } + return pipelines, err } - return progs, layout, nil + return pipelines, nil } func (r *renderer) stencilClips(pathCache *opCache, ops []*pathOp) { @@ -669,7 +704,6 @@ func (r *renderer) intersect(ops []imageOp) { fbo := -1 r.pather.stenciler.beginIntersect(r.intersections.sizes) r.ctx.BindVertexBuffer(r.blitter.quadVerts, 4*4, 0) - r.ctx.BindInputLayout(r.pather.stenciler.iprog.layout) for _, img := range ops { if img.clipType != clipTypeIntersection { continue @@ -705,9 +739,9 @@ func (r *renderer) intersectPath(p *pathOp, clip image.Rectangle) { r.ctx.BindTexture(0, fbo.tex) coverScale, coverOff := texSpaceTransform(layout.FRect(uv), fbo.size) subScale, subOff := texSpaceTransform(layout.FRect(sub), p.clip.Size()) - r.pather.stenciler.iprog.uniforms.vert.uvTransform = [4]float32{coverScale.X, coverScale.Y, coverOff.X, coverOff.Y} - r.pather.stenciler.iprog.uniforms.vert.subUVTransform = [4]float32{subScale.X, subScale.Y, subOff.X, subOff.Y} - r.pather.stenciler.iprog.prog.UploadUniforms() + r.pather.stenciler.ipipeline.uniforms.vert.uvTransform = [4]float32{coverScale.X, coverScale.Y, coverOff.X, coverOff.Y} + r.pather.stenciler.ipipeline.uniforms.vert.subUVTransform = [4]float32{subScale.X, subScale.Y, subOff.X, subOff.Y} + r.pather.stenciler.ipipeline.pipeline.UploadUniforms(r.ctx) r.ctx.DrawArrays(driver.DrawModeTriangleStrip, 0, 4) } @@ -1059,9 +1093,7 @@ func (d *drawState) materialFor(rect f32.Rectangle, off f32.Point, partTrans f32 } func (r *renderer) drawOps(cache *resourceCache, ops []imageOp) { - r.ctx.BlendFunc(driver.BlendFactorOne, driver.BlendFactorOneMinusSrcAlpha) r.ctx.BindVertexBuffer(r.blitter.quadVerts, 4*4, 0) - r.ctx.BindInputLayout(r.pather.coverer.layout) var coverTex driver.Texture for _, img := range ops { m := img.material @@ -1096,8 +1128,8 @@ func (r *renderer) drawOps(cache *resourceCache, ops []imageOp) { } func (b *blitter) blit(mat materialType, col f32color.RGBA, col1, col2 f32color.RGBA, scale, off f32.Point, uvTrans f32.Affine2D) { - p := b.prog[mat] - b.ctx.BindProgram(p.prog) + p := b.pipelines[mat] + b.ctx.BindPipeline(p.pipeline) var uniforms *blitUniforms switch mat { case materialColor: @@ -1118,7 +1150,7 @@ func (b *blitter) blit(mat materialType, col f32color.RGBA, col1, col2 f32color. uniforms = &b.linearGradientUniforms.vert.blitUniforms } uniforms.transform = [4]float32{scale.X, scale.Y, off.X, off.Y} - p.UploadUniforms() + p.UploadUniforms(b.ctx) b.ctx.DrawArrays(driver.DrawModeTriangleStrip, 0, 4) } @@ -1146,36 +1178,26 @@ func (u *uniformBuffer) Release() { u.buf = nil } -func newProgram(prog driver.Program, vertUniforms, fragUniforms *uniformBuffer) *program { - if vertUniforms != nil { - prog.SetVertexUniforms(vertUniforms.buf) - } - if fragUniforms != nil { - prog.SetFragmentUniforms(fragUniforms.buf) - } - return &program{prog: prog, vertUniforms: vertUniforms, fragUniforms: fragUniforms} -} - -func (p *program) UploadUniforms() { +func (p *pipeline) UploadUniforms(ctx driver.Device) { if p.vertUniforms != nil { + ctx.BindVertexUniforms(p.vertUniforms.buf) p.vertUniforms.Upload() } if p.fragUniforms != nil { + ctx.BindFragmentUniforms(p.fragUniforms.buf) p.fragUniforms.Upload() } } -func (p *program) Release() { - p.prog.Release() - p.prog = nil +func (p *pipeline) Release() { + p.pipeline.Release() if p.vertUniforms != nil { p.vertUniforms.Release() - p.vertUniforms = nil } if p.fragUniforms != nil { p.fragUniforms.Release() - p.fragUniforms = nil } + *p = pipeline{} } // texSpaceTransform return the scale and offset that transforms the given subimage diff --git a/gpu/headless/driver_test.go b/gpu/headless/driver_test.go index a81eef05..5ec5a197 100644 --- a/gpu/headless/driver_test.go +++ b/gpu/headless/driver_test.go @@ -38,12 +38,22 @@ func TestSimpleShader(t *testing.T) { b := newDriver(t) sz := image.Point{X: 800, Y: 600} fbo := setupFBO(t, b, sz) - p, err := b.NewProgram(gio.Shader_simple_vert, gio.Shader_simple_frag) + vsh, fsh, err := newShaders(b, gio.Shader_simple_vert, gio.Shader_simple_frag) + if err != nil { + t.Fatal(err) + } + defer vsh.Release() + defer fsh.Release() + p, err := b.NewPipeline(driver.PipelineDesc{ + VertexShader: vsh, + FragmentShader: fsh, + PixelFormat: driver.TextureFormatSRGBA, + }) if err != nil { t.Fatal(err) } defer p.Release() - b.BindProgram(p) + b.BindPipeline(p) b.DrawArrays(driver.DrawModeTriangles, 0, 3) img := screenshot(t, b, fbo, sz) if got := img.RGBAAt(0, 0); got != clearColExpect { @@ -61,12 +71,30 @@ func TestInputShader(t *testing.T) { b := newDriver(t) sz := image.Point{X: 800, Y: 600} fbo := setupFBO(t, b, sz) - p, err := b.NewProgram(gio.Shader_input_vert, gio.Shader_simple_frag) + vsh, fsh, err := newShaders(b, gio.Shader_input_vert, gio.Shader_simple_frag) if err != nil { t.Fatal(err) } - defer p.Release() - b.BindProgram(p) + defer vsh.Release() + defer fsh.Release() + layout := []shader.InputDesc{ + { + Type: shader.DataTypeFloat, + Size: 4, + Offset: 0, + }, + } + pipe, err := b.NewPipeline(driver.PipelineDesc{ + VertexShader: vsh, + FragmentShader: fsh, + VertexLayout: layout, + PixelFormat: driver.TextureFormatSRGBA, + }) + if err != nil { + t.Fatal(err) + } + defer pipe.Release() + b.BindPipeline(pipe) buf, err := b.NewImmutableBuffer(driver.BufferBindingVertices, byteslice.Slice([]float32{ 0, .5, .5, 1, @@ -79,18 +107,6 @@ func TestInputShader(t *testing.T) { } defer buf.Release() b.BindVertexBuffer(buf, 4*4, 0) - layout, err := b.NewInputLayout(gio.Shader_input_vert, []shader.InputDesc{ - { - Type: shader.DataTypeFloat, - Size: 4, - Offset: 0, - }, - }) - if err != nil { - t.Fatal(err) - } - defer layout.Release() - b.BindInputLayout(layout) b.DrawArrays(driver.DrawModeTriangles, 0, 3) img := screenshot(t, b, fbo, sz) if got := img.RGBAAt(0, 0); got != clearColExpect { @@ -103,6 +119,18 @@ func TestInputShader(t *testing.T) { } } +func newShaders(ctx driver.Device, vsrc, fsrc shader.Sources) (vert driver.VertexShader, frag driver.FragmentShader, err error) { + vert, err = ctx.NewVertexShader(vsrc) + if err != nil { + return + } + frag, err = ctx.NewFragmentShader(fsrc) + if err != nil { + vert.Release() + } + return +} + func TestFramebuffers(t *testing.T) { b := newDriver(t) sz := image.Point{X: 800, Y: 600} diff --git a/gpu/internal/d3d11/d3d11_windows.go b/gpu/internal/d3d11/d3d11_windows.go index 4ee0e952..7c38116b 100644 --- a/gpu/internal/d3d11/d3d11_windows.go +++ b/gpu/internal/d3d11/d3d11_windows.go @@ -24,10 +24,8 @@ type Backend struct { // Temporary storage to avoid garbage. clearColor [4]float32 viewport d3d11.VIEWPORT - blendState blendState - // Current program. - prog *Program + pipeline *Pipeline caps driver.Caps @@ -35,15 +33,13 @@ type Backend struct { fbo *Framebuffer floatFormat uint32 - - // cached state objects. - blendStates map[blendState]*d3d11.BlendState } -type blendState struct { - enable bool - sfactor driver.BlendFactor - dfactor driver.BlendFactor +type Pipeline struct { + vert *d3d11.VertexShader + frag *d3d11.PixelShader + layout *d3d11.InputLayout + blend *d3d11.BlendState } type Texture struct { @@ -57,17 +53,15 @@ type Texture struct { height int } -type Program struct { +type VertexShader struct { backend *Backend + shader *d3d11.VertexShader + src shader.Sources +} - vert struct { - shader *d3d11.VertexShader - uniforms *Buffer - } - frag struct { - shader *d3d11.PixelShader - uniforms *Buffer - } +type FragmentShader struct { + backend *Backend + shader *d3d11.PixelShader } type Framebuffer struct { @@ -86,10 +80,6 @@ type Buffer struct { immutable bool } -type InputLayout struct { - layout *d3d11.InputLayout -} - func init() { driver.NewDirect3D11Device = newDirect3D11Device } @@ -122,7 +112,6 @@ func newDirect3D11Device(api driver.Direct3D11) (driver.Device, error) { MaxTextureSize: 2048, // 9.1 maximum Features: driver.FeatureSRGB, }, - blendStates: make(map[blendState]*d3d11.BlendState), } featLvl := dev.GetFeatureLevel() if featLvl < d3d11.FEATURE_LEVEL_9_1 { @@ -191,9 +180,6 @@ func (b *Backend) IsTimeContinuous() bool { } func (b *Backend) Release() { - for _, state := range b.blendStates { - d3d11.IUnknownRelease(unsafe.Pointer(state), state.Vtbl.Release) - } d3d11.IUnknownRelease(unsafe.Pointer(b.ctx), b.ctx.Vtbl.Release) *b = Backend{} } @@ -288,7 +274,7 @@ func (b *Backend) NewFramebuffer(tex driver.Texture) (driver.Framebuffer, error) return fbo, nil } -func (b *Backend) NewInputLayout(vertexShader shader.Sources, layout []shader.InputDesc) (driver.InputLayout, error) { +func (b *Backend) newInputLayout(vertexShader shader.Sources, layout []shader.InputDesc) (*d3d11.InputLayout, error) { if len(vertexShader.Inputs) != len(layout) { return nil, fmt.Errorf("NewInputLayout: got %d inputs, expected %d", len(layout), len(vertexShader.Inputs)) } @@ -333,11 +319,7 @@ func (b *Backend) NewInputLayout(vertexShader shader.Sources, layout []shader.In AlignedByteOffset: uint32(l.Offset), } } - l, err := b.dev.CreateInputLayout(descs, []byte(vertexShader.DXBC)) - if err != nil { - return nil, err - } - return &InputLayout{layout: l}, nil + return b.dev.CreateInputLayout(descs, []byte(vertexShader.DXBC)) } func (b *Backend) NewBuffer(typ driver.BufferBinding, size int) (driver.Buffer, error) { @@ -385,19 +367,69 @@ func (b *Backend) NewComputeProgram(shader shader.Sources) (driver.Program, erro panic("not implemented") } -func (b *Backend) NewProgram(vertexShader, fragmentShader shader.Sources) (driver.Program, error) { - vs, err := b.dev.CreateVertexShader([]byte(vertexShader.DXBC)) +func (b *Backend) NewPipeline(desc driver.PipelineDesc) (driver.Pipeline, error) { + vsh := desc.VertexShader.(*VertexShader) + fsh := desc.FragmentShader.(*FragmentShader) + blend, err := b.newBlendState(desc.BlendDesc) if err != nil { return nil, err } - ps, err := b.dev.CreatePixelShader([]byte(fragmentShader.DXBC)) + var layout *d3d11.InputLayout + if l := desc.VertexLayout; l != nil { + var err error + layout, err = b.newInputLayout(vsh.src, l) + if err != nil { + d3d11.IUnknownRelease(unsafe.Pointer(blend), blend.Vtbl.AddRef) + return nil, err + } + } + + // Retain shaders. + vshRef := vsh.shader + fshRef := fsh.shader + d3d11.IUnknownAddRef(unsafe.Pointer(vshRef), vshRef.Vtbl.AddRef) + d3d11.IUnknownAddRef(unsafe.Pointer(fshRef), fshRef.Vtbl.AddRef) + + return &Pipeline{ + vert: vshRef, + frag: fshRef, + layout: layout, + blend: blend, + }, nil +} + +func (b *Backend) newBlendState(desc driver.BlendDesc) (*d3d11.BlendState, error) { + var d3ddesc d3d11.BLEND_DESC + t0 := &d3ddesc.RenderTarget[0] + t0.RenderTargetWriteMask = d3d11.COLOR_WRITE_ENABLE_ALL + t0.BlendOp = d3d11.BLEND_OP_ADD + t0.BlendOpAlpha = d3d11.BLEND_OP_ADD + if desc.Enable { + t0.BlendEnable = 1 + } + scol, salpha := toBlendFactor(desc.SrcFactor) + dcol, dalpha := toBlendFactor(desc.DstFactor) + t0.SrcBlend = scol + t0.SrcBlendAlpha = salpha + t0.DestBlend = dcol + t0.DestBlendAlpha = dalpha + return b.dev.CreateBlendState(&d3ddesc) +} + +func (b *Backend) NewVertexShader(src shader.Sources) (driver.VertexShader, error) { + vs, err := b.dev.CreateVertexShader([]byte(src.DXBC)) if err != nil { return nil, err } - p := &Program{backend: b} - p.vert.shader = vs - p.frag.shader = ps - return p, nil + return &VertexShader{b, vs, src}, nil +} + +func (b *Backend) NewFragmentShader(src shader.Sources) (driver.FragmentShader, error) { + fs, err := b.dev.CreatePixelShader([]byte(src.DXBC)) + if err != nil { + return nil, err + } + return &FragmentShader{b, fs}, nil } func (b *Backend) Clear(colr, colg, colb, cola float32) { @@ -428,15 +460,11 @@ func (b *Backend) DrawElements(mode driver.DrawMode, off, count int) { } func (b *Backend) prepareDraw(mode driver.DrawMode) { - if p := b.prog; p != nil { - b.ctx.VSSetShader(p.vert.shader) - b.ctx.PSSetShader(p.frag.shader) - if buf := p.vert.uniforms; buf != nil { - b.ctx.VSSetConstantBuffers(buf.buf) - } - if buf := p.frag.uniforms; buf != nil { - b.ctx.PSSetConstantBuffers(buf.buf) - } + if p := b.pipeline; p != nil { + b.ctx.VSSetShader(p.vert) + b.ctx.PSSetShader(p.frag) + b.ctx.IASetInputLayout(p.layout) + b.ctx.OMSetBlendState(p.blend, nil, 0xffffffff) } var topology uint32 switch mode { @@ -448,40 +476,6 @@ func (b *Backend) prepareDraw(mode driver.DrawMode) { panic("unsupported draw mode") } b.ctx.IASetPrimitiveTopology(topology) - - blendState, ok := b.blendStates[b.blendState] - if !ok { - var desc d3d11.BLEND_DESC - t0 := &desc.RenderTarget[0] - t0.RenderTargetWriteMask = d3d11.COLOR_WRITE_ENABLE_ALL - t0.BlendOp = d3d11.BLEND_OP_ADD - t0.BlendOpAlpha = d3d11.BLEND_OP_ADD - if b.blendState.enable { - t0.BlendEnable = 1 - } - scol, salpha := toBlendFactor(b.blendState.sfactor) - dcol, dalpha := toBlendFactor(b.blendState.dfactor) - t0.SrcBlend = scol - t0.SrcBlendAlpha = salpha - t0.DestBlend = dcol - t0.DestBlendAlpha = dalpha - var err error - blendState, err = b.dev.CreateBlendState(&desc) - if err != nil { - panic(err) - } - b.blendStates[b.blendState] = blendState - } - b.ctx.OMSetBlendState(blendState, nil, 0xffffffff) -} - -func (b *Backend) SetBlend(enable bool) { - b.blendState.enable = enable -} - -func (b *Backend) BlendFunc(sfactor, dfactor driver.BlendFactor) { - b.blendState.sfactor = sfactor - b.blendState.dfactor = dfactor } func (b *Backend) BindImageTexture(unit int, tex driver.Texture, access driver.AccessBits, f driver.TextureFormat) { @@ -531,27 +525,46 @@ func (b *Backend) BindTexture(unit int, tex driver.Texture) { b.ctx.PSSetShaderResources(uint32(unit), t.resView) } +func (b *Backend) BindPipeline(pipe driver.Pipeline) { + b.pipeline = pipe.(*Pipeline) +} + func (b *Backend) BindProgram(prog driver.Program) { - b.prog = prog.(*Program) -} - -func (p *Program) Release() { - d3d11.IUnknownRelease(unsafe.Pointer(p.vert.shader), p.vert.shader.Vtbl.Release) - d3d11.IUnknownRelease(unsafe.Pointer(p.frag.shader), p.frag.shader.Vtbl.Release) - p.vert.shader = nil - p.frag.shader = nil -} - -func (p *Program) SetStorageBuffer(binding int, buffer driver.Buffer) { panic("not implemented") } -func (p *Program) SetVertexUniforms(buf driver.Buffer) { - p.vert.uniforms = buf.(*Buffer) +func (s *VertexShader) Release() { + d3d11.IUnknownRelease(unsafe.Pointer(s.shader), s.shader.Vtbl.Release) + *s = VertexShader{} } -func (p *Program) SetFragmentUniforms(buf driver.Buffer) { - p.frag.uniforms = buf.(*Buffer) +func (s *FragmentShader) Release() { + d3d11.IUnknownRelease(unsafe.Pointer(s.shader), s.shader.Vtbl.Release) + *s = FragmentShader{} +} + +func (p *Pipeline) Release() { + d3d11.IUnknownRelease(unsafe.Pointer(p.vert), p.vert.Vtbl.Release) + d3d11.IUnknownRelease(unsafe.Pointer(p.frag), p.frag.Vtbl.Release) + d3d11.IUnknownRelease(unsafe.Pointer(p.blend), p.blend.Vtbl.Release) + if l := p.layout; l != nil { + d3d11.IUnknownRelease(unsafe.Pointer(l), l.Vtbl.Release) + } + *p = Pipeline{} +} + +func (b *Backend) BindStorageBuffer(binding int, buffer driver.Buffer) { + panic("not implemented") +} + +func (b *Backend) BindVertexUniforms(buffer driver.Buffer) { + buf := buffer.(*Buffer) + b.ctx.VSSetConstantBuffers(buf.buf) +} + +func (b *Backend) BindFragmentUniforms(buffer driver.Buffer) { + buf := buffer.(*Buffer) + b.ctx.PSSetConstantBuffers(buf.buf) } func (b *Backend) BindVertexBuffer(buf driver.Buffer, stride, offset int) { @@ -650,15 +663,6 @@ func (f *Framebuffer) Release() { func (f *Framebuffer) ImplementsRenderTarget() {} -func (b *Backend) BindInputLayout(layout driver.InputLayout) { - b.ctx.IASetInputLayout(layout.(*InputLayout).layout) -} - -func (l *InputLayout) Release() { - d3d11.IUnknownRelease(unsafe.Pointer(l.layout), l.layout.Vtbl.Release) - l.layout = nil -} - func convBufferBinding(typ driver.BufferBinding) uint32 { var bindings uint32 if typ&driver.BufferBindingVertices != 0 { diff --git a/gpu/internal/driver/driver.go b/gpu/internal/driver/driver.go index 1ffb33d5..547d34a1 100644 --- a/gpu/internal/driver/driver.go +++ b/gpu/internal/driver/driver.go @@ -26,23 +26,25 @@ type Device interface { NewImmutableBuffer(typ BufferBinding, data []byte) (Buffer, error) NewBuffer(typ BufferBinding, size int) (Buffer, error) NewComputeProgram(shader shader.Sources) (Program, error) - NewProgram(vertexShader, fragmentShader shader.Sources) (Program, error) - NewInputLayout(vertexShader shader.Sources, layout []shader.InputDesc) (InputLayout, error) + NewVertexShader(src shader.Sources) (VertexShader, error) + NewFragmentShader(src shader.Sources) (FragmentShader, error) + NewPipeline(desc PipelineDesc) (Pipeline, error) Clear(r, g, b, a float32) Viewport(x, y, width, height int) DrawArrays(mode DrawMode, off, count int) DrawElements(mode DrawMode, off, count int) - SetBlend(enable bool) - BlendFunc(sfactor, dfactor BlendFactor) - BindInputLayout(i InputLayout) BindProgram(p Program) + BindPipeline(p Pipeline) BindFramebuffer(f Framebuffer) BindTexture(unit int, t Texture) BindVertexBuffer(b Buffer, stride, offset int) BindIndexBuffer(b Buffer) BindImageTexture(unit int, texture Texture, access AccessBits, format TextureFormat) + BindVertexUniforms(buf Buffer) + BindFragmentUniforms(buf Buffer) + BindStorageBuffer(binding int, buf Buffer) BlitFramebuffer(dst, src Framebuffer, srect, drect image.Rectangle) MemoryBarrier() @@ -51,12 +53,23 @@ type Device interface { Release() } -// InputLayout is the driver specific representation of the mapping -// between Buffers and shader attributes. -type InputLayout interface { +type Pipeline interface { Release() } +type PipelineDesc struct { + VertexShader VertexShader + FragmentShader FragmentShader + VertexLayout []shader.InputDesc + BlendDesc BlendDesc + PixelFormat TextureFormat +} + +type BlendDesc struct { + Enable bool + SrcFactor, DstFactor BlendFactor +} + type AccessBits uint8 type BlendFactor uint8 @@ -78,11 +91,16 @@ type Caps struct { MaxTextureSize int } +type VertexShader interface { + Release() +} + +type FragmentShader interface { + Release() +} + type Program interface { Release() - SetStorageBuffer(binding int, buf Buffer) - SetVertexUniforms(buf Buffer) - SetFragmentUniforms(buf Buffer) } type Buffer interface { @@ -123,6 +141,8 @@ const ( TextureFormatSRGBA TextureFormat = iota TextureFormatFloat TextureFormatRGBA8 + // TextureFormatOutput denotes the format used by the output framebuffer. + TextureFormatOutput ) const ( diff --git a/gpu/internal/opengl/opengl.go b/gpu/internal/opengl/opengl.go index 792e9760..00d2c8ff 100644 --- a/gpu/internal/opengl/opengl.go +++ b/gpu/internal/opengl/opengl.go @@ -32,8 +32,11 @@ type Backend struct { // textures. floatTriple textureTriple // Single channel alpha textures. - alphaTriple textureTriple - srgbaTriple textureTriple + alphaTriple textureTriple + srgbaTriple textureTriple + vertUniforms *buffer + fragUniforms *buffer + storage [storageBindings]*buffer sRGBFBO *SRGBFBO @@ -80,9 +83,8 @@ type glState struct { } type state struct { - prog *program - layout *inputLayout - buffer bufferBinding + pipeline *pipeline + buffer bufferBinding } type bufferBinding struct { @@ -110,6 +112,13 @@ type framebuffer struct { foreign bool } +type pipeline struct { + prog *program + inputs []shader.InputLocation + layout []shader.InputDesc + blend driver.BlendDesc +} + type buffer struct { backend *Backend hasBuffer bool @@ -117,24 +126,26 @@ type buffer struct { typ driver.BufferBinding size int immutable bool - version int // For emulation of uniform buffers. data []byte } +type glshader struct { + backend *Backend + obj gl.Shader + src shader.Sources +} + type program struct { backend *Backend obj gl.Program - vertUniforms uniformsTracker - fragUniforms uniformsTracker - storage [storageBindings]*buffer + vertUniforms uniforms + fragUniforms uniforms } -type uniformsTracker struct { - locs []uniformLocation - size int - buf *buffer - version int +type uniforms struct { + locs []uniformLocation + size int } type uniformLocation struct { @@ -749,11 +760,9 @@ func (b *Backend) MemoryBarrier() { } func (b *Backend) DispatchCompute(x, y, z int) { - if p := b.state.prog; p != nil { - for binding, buf := range p.storage { - if buf != nil { - b.glstate.bindBufferBase(b.funcs, gl.SHADER_STORAGE_BUFFER, binding, buf.obj) - } + for binding, buf := range b.storage { + if buf != nil { + b.glstate.bindBufferBase(b.funcs, gl.SHADER_STORAGE_BUFFER, binding, buf.obj) } } b.funcs.DispatchCompute(x, y, z) @@ -780,11 +789,6 @@ func (b *Backend) BindImageTexture(unit int, tex driver.Texture, access driver.A b.funcs.BindImageTexture(unit, t.obj, 0, false, 0, acc, format) } -func (b *Backend) useProgram(p *program) { - b.glstate.useProgram(b.funcs, p.obj) - b.state.prog = p -} - func (b *Backend) BlendFunc(sfactor, dfactor driver.BlendFactor) { src, dst := toGLBlendFactor(sfactor), toGLBlendFactor(dfactor) b.glstate.setBlendFuncSeparate(b.funcs, src, dst, src, dst) @@ -822,12 +826,12 @@ func (b *Backend) DrawArrays(mode driver.DrawMode, off, count int) { } func (b *Backend) prepareDraw() { - p := b.state.prog + p := b.state.pipeline if p == nil { return } b.setupVertexArrays() - p.updateUniforms() + p.prog.updateUniforms() } func toGLDrawMode(mode driver.DrawMode) gl.Enum { @@ -850,21 +854,6 @@ func (b *Backend) Clear(colR, colG, colB, colA float32) { b.funcs.Clear(gl.COLOR_BUFFER_BIT) } -func (b *Backend) NewInputLayout(vs shader.Sources, layout []shader.InputDesc) (driver.InputLayout, error) { - if len(vs.Inputs) != len(layout) { - return nil, fmt.Errorf("NewInputLayout: got %d inputs, expected %d", len(layout), len(vs.Inputs)) - } - for i, inp := range vs.Inputs { - if exp, got := inp.Size, layout[i].Size; exp != got { - return nil, fmt.Errorf("NewInputLayout: data size mismatch for %q: got %d expected %d", inp.Name, got, exp) - } - } - return &inputLayout{ - inputs: vs.Inputs, - layout: layout, - }, nil -} - func (b *Backend) NewComputeProgram(src shader.Sources) (driver.Program, error) { p, err := gl.CreateComputeProgram(b.funcs, src.GLSL310ES) if err != nil { @@ -876,48 +865,94 @@ func (b *Backend) NewComputeProgram(src shader.Sources) (driver.Program, error) }, nil } -func (b *Backend) NewProgram(vertShader, fragShader shader.Sources) (driver.Program, error) { - attr := make([]string, len(vertShader.Inputs)) - for _, inp := range vertShader.Inputs { - attr[inp.Location] = inp.Name +func (b *Backend) NewVertexShader(src shader.Sources) (driver.VertexShader, error) { + glslSrc := b.glslFor(src) + sh, err := gl.CreateShader(b.funcs, gl.VERTEX_SHADER, glslSrc) + return &glshader{backend: b, obj: sh, src: src}, err +} + +func (b *Backend) NewFragmentShader(src shader.Sources) (driver.FragmentShader, error) { + glslSrc := b.glslFor(src) + sh, err := gl.CreateShader(b.funcs, gl.FRAGMENT_SHADER, glslSrc) + return &glshader{backend: b, obj: sh, src: src}, err +} + +func (b *Backend) glslFor(src shader.Sources) string { + if b.glver[0] < 3 { + return src.GLSL100ES } - vsrc, fsrc := vertShader.GLSL100ES, fragShader.GLSL100ES - if b.glver[0] >= 3 { - // OpenGL (ES) 3.0. - switch { - case b.gles: - vsrc, fsrc = vertShader.GLSL300ES, fragShader.GLSL300ES - case b.glver[0] >= 4 || b.glver[1] >= 2: - // OpenGL 3.2 Core only accepts glsl 1.50 or newer. - vsrc, fsrc = vertShader.GLSL150, fragShader.GLSL150 - default: - vsrc, fsrc = vertShader.GLSL130, fragShader.GLSL130 - } + // OpenGL (ES) 3.0. + switch { + case b.gles: + return src.GLSL300ES + case b.glver[0] >= 4 || b.glver[1] >= 2: + // OpenGL 3.2 Core only accepts glsl 1.50 or newer. + return src.GLSL150 + default: + return src.GLSL130 } - p, err := gl.CreateProgram(b.funcs, vsrc, fsrc, attr) +} + +func (b *Backend) NewPipeline(desc driver.PipelineDesc) (driver.Pipeline, error) { + p, err := b.newProgram(desc) if err != nil { return nil, err } + layout := desc.VertexLayout + vsrc := desc.VertexShader.(*glshader).src + if len(vsrc.Inputs) != len(layout) { + return nil, fmt.Errorf("opengl: got %d inputs, expected %d", len(layout), len(vsrc.Inputs)) + } + for i, inp := range vsrc.Inputs { + if exp, got := inp.Size, layout[i].Size; exp != got { + return nil, fmt.Errorf("opengl: data size mismatch for %q: got %d expected %d", inp.Name, got, exp) + } + } + return &pipeline{ + prog: p, + inputs: vsrc.Inputs, + layout: layout, + blend: desc.BlendDesc, + }, nil +} + +func (b *Backend) newProgram(desc driver.PipelineDesc) (*program, error) { + p := b.funcs.CreateProgram() + if !p.Valid() { + return nil, errors.New("opengl: glCreateProgram failed") + } + vsh, fsh := desc.VertexShader.(*glshader), desc.FragmentShader.(*glshader) + b.funcs.AttachShader(p, vsh.obj) + b.funcs.AttachShader(p, fsh.obj) + for _, inp := range vsh.src.Inputs { + b.funcs.BindAttribLocation(p, gl.Attrib(inp.Location), inp.Name) + } + b.funcs.LinkProgram(p) + if b.funcs.GetProgrami(p, gl.LINK_STATUS) == 0 { + log := b.funcs.GetProgramInfoLog(p) + b.funcs.DeleteProgram(p) + return nil, fmt.Errorf("opengl: program link failed: %s", strings.TrimSpace(log)) + } prog := &program{ backend: b, obj: p, } - b.BindProgram(prog) + b.glstate.useProgram(b.funcs, p) // Bind texture uniforms. - for _, tex := range vertShader.Textures { + for _, tex := range vsh.src.Textures { u := b.funcs.GetUniformLocation(p, tex.Name) if u.Valid() { b.funcs.Uniform1i(u, tex.Binding) } } - for _, tex := range fragShader.Textures { + for _, tex := range fsh.src.Textures { u := b.funcs.GetUniformLocation(p, tex.Name) if u.Valid() { b.funcs.Uniform1i(u, tex.Binding) } } if b.ubo { - for _, block := range vertShader.Uniforms.Blocks { + for _, block := range vsh.src.Uniforms.Blocks { blockIdx := b.funcs.GetUniformBlockIndex(p, block.Name) if blockIdx != gl.INVALID_INDEX { b.funcs.UniformBlockBinding(p, blockIdx, uint(block.Binding)) @@ -926,16 +961,16 @@ func (b *Backend) NewProgram(vertShader, fragShader shader.Sources) (driver.Prog // To match Direct3D 11 with separate vertex and fragment // shader uniform buffers, offset all fragment blocks to be // located after the vertex blocks. - off := len(vertShader.Uniforms.Blocks) - for _, block := range fragShader.Uniforms.Blocks { + off := len(vsh.src.Uniforms.Blocks) + for _, block := range fsh.src.Uniforms.Blocks { blockIdx := b.funcs.GetUniformBlockIndex(p, block.Name) if blockIdx != gl.INVALID_INDEX { b.funcs.UniformBlockBinding(p, blockIdx, uint(block.Binding+off)) } } } else { - prog.vertUniforms.setup(b.funcs, p, vertShader.Uniforms.Size, vertShader.Uniforms.Locations) - prog.fragUniforms.setup(b.funcs, p, fragShader.Uniforms.Size, fragShader.Uniforms.Locations) + prog.vertUniforms.setup(b.funcs, p, vsh.src.Uniforms.Size, vsh.src.Uniforms.Locations) + prog.fragUniforms.setup(b.funcs, p, fsh.src.Uniforms.Size, fsh.src.Uniforms.Locations) } return prog, nil } @@ -948,47 +983,59 @@ func lookupUniform(funcs *gl.Functions, p gl.Program, loc shader.UniformLocation return uniformLocation{uniform: u, offset: loc.Offset, typ: loc.Type, size: loc.Size} } -func (p *program) SetStorageBuffer(binding int, buf driver.Buffer) { - b := buf.(*buffer) - if b.typ&driver.BufferBindingShaderStorage == 0 { +func (b *Backend) BindStorageBuffer(binding int, buf driver.Buffer) { + bf := buf.(*buffer) + if bf.typ&driver.BufferBindingShaderStorage == 0 { panic("not a shader storage buffer") } - p.storage[binding] = b + b.storage[binding] = bf } -func (p *program) SetVertexUniforms(buf driver.Buffer) { - p.vertUniforms.setBuffer(buf) +func (b *Backend) BindVertexUniforms(buf driver.Buffer) { + bf := buf.(*buffer) + if bf.typ&driver.BufferBindingUniforms == 0 { + panic("not a uniform buffer") + } + b.vertUniforms = bf } -func (p *program) SetFragmentUniforms(buf driver.Buffer) { - p.fragUniforms.setBuffer(buf) +func (b *Backend) BindFragmentUniforms(buf driver.Buffer) { + bf := buf.(*buffer) + if bf.typ&driver.BufferBindingUniforms == 0 { + panic("not a uniform buffer") + } + b.fragUniforms = bf } func (p *program) updateUniforms() { f := p.backend.funcs if p.backend.ubo { - if b := p.vertUniforms.buf; b != nil { + if b := p.backend.vertUniforms; b != nil { p.backend.glstate.bindBufferBase(f, gl.UNIFORM_BUFFER, 0, b.obj) } - if b := p.fragUniforms.buf; b != nil { + if b := p.backend.fragUniforms; b != nil { p.backend.glstate.bindBufferBase(f, gl.UNIFORM_BUFFER, 1, b.obj) } } else { - p.vertUniforms.update(f) - p.fragUniforms.update(f) + p.vertUniforms.update(f, p.backend.vertUniforms) + p.fragUniforms.update(f, p.backend.fragUniforms) } } func (b *Backend) BindProgram(prog driver.Program) { p := prog.(*program) - b.useProgram(p) + b.glstate.useProgram(b.funcs, p.obj) +} + +func (s *glshader) Release() { + s.backend.funcs.DeleteShader(s.obj) } func (p *program) Release() { p.backend.glstate.deleteProgram(p.backend.funcs, p.obj) } -func (u *uniformsTracker) setup(funcs *gl.Functions, p gl.Program, uniformSize int, uniforms []shader.UniformLocation) { +func (u *uniforms) setup(funcs *gl.Functions, p gl.Program, uniformSize int, uniforms []shader.UniformLocation) { u.locs = make([]uniformLocation, len(uniforms)) for i, uniform := range uniforms { u.locs[i] = lookupUniform(funcs, p, uniform) @@ -996,26 +1043,11 @@ func (u *uniformsTracker) setup(funcs *gl.Functions, p gl.Program, uniformSize i u.size = uniformSize } -func (u *uniformsTracker) setBuffer(buf driver.Buffer) { - b := buf.(*buffer) - if b.typ&driver.BufferBindingUniforms == 0 { - panic("not a uniform buffer") +func (p *uniforms) update(funcs *gl.Functions, buf *buffer) { + if buf.size < p.size { + panic(fmt.Errorf("uniform buffer too small, got %d need %d", buf.size, p.size)) } - if b.size < u.size { - panic(fmt.Errorf("uniform buffer too small, got %d need %d", b.size, u.size)) - } - u.buf = b - // Force update. - u.version = b.version - 1 -} - -func (p *uniformsTracker) update(funcs *gl.Functions) { - b := p.buf - if b == nil || b.version == p.version { - return - } - p.version = b.version - data := b.data + data := buf.data for _, u := range p.locs { data := data[u.offset:] switch { @@ -1048,7 +1080,6 @@ func (b *buffer) Upload(data []byte) { if len(data) > b.size { panic("buffer size overflow") } - b.version++ copy(b.data, data) if b.hasBuffer { firstBinding := firstBufferType(b.typ) @@ -1100,15 +1131,17 @@ func (b *Backend) BindVertexBuffer(buf driver.Buffer, stride, offset int) { } func (b *Backend) setupVertexArrays() { - layout := b.state.layout - if layout == nil { + p := b.state.pipeline + inputs := p.inputs + if len(inputs) == 0 { return } + layout := p.layout const max = len(b.glstate.vertAttribs) var enabled [max]bool buf := b.state.buffer - for i, inp := range layout.inputs { - l := layout.layout[i] + for i, inp := range inputs { + l := layout[i] var gltyp gl.Enum switch l.Type { case shader.DataTypeFloat: @@ -1154,6 +1187,14 @@ func (f *framebuffer) ReadPixels(src image.Rectangle, pixels []byte) error { return glErr(f.backend.funcs) } +func (b *Backend) BindPipeline(pl driver.Pipeline) { + p := pl.(*pipeline) + b.state.pipeline = p + b.glstate.useProgram(b.funcs, p.prog.obj) + b.SetBlend(p.blend.Enable) + b.BlendFunc(p.blend.SrcFactor, p.blend.DstFactor) +} + func (b *Backend) BindFramebuffer(fbo driver.Framebuffer) { b.glstate.bindFramebuffer(b.funcs, gl.FRAMEBUFFER, fbo.(*framebuffer).obj) } @@ -1172,6 +1213,11 @@ func (f *framebuffer) Release() { func (f *framebuffer) ImplementsRenderTarget() {} +func (p *pipeline) Release() { + p.prog.Release() + *p = pipeline{} +} + func toTexFilter(f driver.TextureFilter) int { switch f { case driver.FilterNearest: @@ -1224,12 +1270,6 @@ func (t *timer) Duration() (time.Duration, bool) { return time.Duration(nanos), true } -func (b *Backend) BindInputLayout(l driver.InputLayout) { - b.state.layout = l.(*inputLayout) -} - -func (l *inputLayout) Release() {} - // floatTripleFor determines the best texture triple for floating point FBOs. func floatTripleFor(f *gl.Functions, ver [2]int, exts []string) (textureTriple, error) { var triples []textureTriple diff --git a/gpu/path.go b/gpu/path.go index 32c4980d..e638f7de 100644 --- a/gpu/path.go +++ b/gpu/path.go @@ -30,11 +30,10 @@ type pather struct { type coverer struct { ctx driver.Device - prog [3]*program + pipelines [3]*pipeline texUniforms *coverTexUniforms colUniforms *coverColUniforms linearGradientUniforms *coverLinearGradientUniforms - layout driver.InputLayout } type coverTexUniforms struct { @@ -73,16 +72,14 @@ type coverUniforms struct { } type stenciler struct { - ctx driver.Device - prog struct { - prog *program + ctx driver.Device + pipeline struct { + pipeline *pipeline uniforms *stencilUniforms - layout driver.InputLayout } - iprog struct { - prog *program + ipipeline struct { + pipeline *pipeline uniforms *intersectUniforms - layout driver.InputLayout } fbos fboSet intersections fboSet @@ -163,15 +160,14 @@ func newCoverer(ctx driver.Device) *coverer { c.colUniforms = new(coverColUniforms) c.texUniforms = new(coverTexUniforms) c.linearGradientUniforms = new(coverLinearGradientUniforms) - prog, layout, err := createColorPrograms(ctx, gio.Shader_cover_vert, gio.Shader_cover_frag, + pipelines, err := createColorPrograms(ctx, gio.Shader_cover_vert, gio.Shader_cover_frag, [3]interface{}{&c.colUniforms.vert, &c.linearGradientUniforms.vert, &c.texUniforms.vert}, [3]interface{}{&c.colUniforms.frag, &c.linearGradientUniforms.frag, nil}, ) if err != nil { panic(err) } - c.prog = prog - c.layout = layout + c.pipelines = pipelines return c } @@ -191,43 +187,64 @@ func newStenciler(ctx driver.Device) *stenciler { if err != nil { panic(err) } - progLayout, err := ctx.NewInputLayout(gio.Shader_stencil_vert, []shader.InputDesc{ + progLayout := []shader.InputDesc{ {Type: shader.DataTypeFloat, Size: 1, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).Corner))}, {Type: shader.DataTypeFloat, Size: 1, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).MaxY))}, {Type: shader.DataTypeFloat, Size: 2, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).FromX))}, {Type: shader.DataTypeFloat, Size: 2, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).CtrlX))}, {Type: shader.DataTypeFloat, Size: 2, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).ToX))}, - }) - if err != nil { - panic(err) } - iprogLayout, err := ctx.NewInputLayout(gio.Shader_intersect_vert, []shader.InputDesc{ + iprogLayout := []shader.InputDesc{ {Type: shader.DataTypeFloat, Size: 2, Offset: 0}, {Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2}, - }) - if err != nil { - panic(err) } st := &stenciler{ ctx: ctx, indexBuf: indexBuf, } - prog, err := ctx.NewProgram(gio.Shader_stencil_vert, gio.Shader_stencil_frag) + vsh, fsh, err := newShaders(ctx, gio.Shader_stencil_vert, gio.Shader_stencil_frag) if err != nil { panic(err) } - st.prog.uniforms = new(stencilUniforms) - vertUniforms := newUniformBuffer(ctx, &st.prog.uniforms.vert) - st.prog.prog = newProgram(prog, vertUniforms, nil) - st.prog.layout = progLayout - iprog, err := ctx.NewProgram(gio.Shader_intersect_vert, gio.Shader_intersect_frag) + defer vsh.Release() + defer fsh.Release() + st.pipeline.uniforms = new(stencilUniforms) + vertUniforms := newUniformBuffer(ctx, &st.pipeline.uniforms.vert) + pipe, err := st.ctx.NewPipeline(driver.PipelineDesc{ + VertexShader: vsh, + FragmentShader: fsh, + VertexLayout: progLayout, + BlendDesc: driver.BlendDesc{ + Enable: true, + SrcFactor: driver.BlendFactorOne, + DstFactor: driver.BlendFactorOne, + }, + PixelFormat: driver.TextureFormatFloat, + }) + st.pipeline.pipeline = &pipeline{pipe, vertUniforms, nil} if err != nil { panic(err) } - st.iprog.uniforms = new(intersectUniforms) - vertUniforms = newUniformBuffer(ctx, &st.iprog.uniforms.vert) - st.iprog.prog = newProgram(iprog, vertUniforms, nil) - st.iprog.layout = iprogLayout + vsh, fsh, err = newShaders(ctx, gio.Shader_intersect_vert, gio.Shader_intersect_frag) + if err != nil { + panic(err) + } + defer vsh.Release() + defer fsh.Release() + st.ipipeline.uniforms = new(intersectUniforms) + vertUniforms = newUniformBuffer(ctx, &st.ipipeline.uniforms.vert) + ipipe, err := st.ctx.NewPipeline(driver.PipelineDesc{ + VertexShader: vsh, + FragmentShader: fsh, + VertexLayout: iprogLayout, + BlendDesc: driver.BlendDesc{ + Enable: true, + SrcFactor: driver.BlendFactorDstColor, + DstFactor: driver.BlendFactorZero, + }, + PixelFormat: driver.TextureFormatFloat, + }) + st.ipipeline.pipeline = &pipeline{ipipe, vertUniforms, nil} return st } @@ -284,10 +301,8 @@ func (s *fboSet) delete(ctx driver.Device, idx int) { func (s *stenciler) release() { s.fbos.delete(s.ctx, 0) - s.prog.layout.Release() - s.prog.prog.Release() - s.iprog.layout.Release() - s.iprog.prog.Release() + s.pipeline.pipeline.Release() + s.ipipeline.pipeline.Release() s.indexBuf.Release() } @@ -297,10 +312,9 @@ func (p *pather) release() { } func (c *coverer) release() { - for _, p := range c.prog { + for _, p := range c.pipelines { p.Release() } - c.layout.Release() } func buildPath(ctx driver.Device, p []byte) pathData { @@ -327,12 +341,11 @@ func (p *pather) stencilPath(bounds image.Rectangle, offset f32.Point, uv image. } func (s *stenciler) beginIntersect(sizes []image.Point) { - s.ctx.BlendFunc(driver.BlendFactorDstColor, driver.BlendFactorZero) // 8 bit coverage is enough, but OpenGL ES only supports single channel // floating point formats. Replace with GL_RGB+GL_UNSIGNED_BYTE if // no floating point support is available. s.intersections.resize(s.ctx, sizes) - s.ctx.BindProgram(s.iprog.prog.prog) + s.ctx.BindPipeline(s.ipipeline.pipeline.pipeline) } func (s *stenciler) invalidateFBO() { @@ -345,10 +358,8 @@ func (s *stenciler) cover(idx int) stencilFBO { } func (s *stenciler) begin(sizes []image.Point) { - s.ctx.BlendFunc(driver.BlendFactorOne, driver.BlendFactorOne) s.fbos.resize(s.ctx, sizes) - s.ctx.BindProgram(s.prog.prog.prog) - s.ctx.BindInputLayout(s.prog.layout) + s.ctx.BindPipeline(s.pipeline.pipeline.pipeline) s.ctx.BindIndexBuffer(s.indexBuf) } @@ -358,9 +369,9 @@ func (s *stenciler) stencilPath(bounds image.Rectangle, offset f32.Point, uv ima texSize := f32.Point{X: float32(bounds.Dx()), Y: float32(bounds.Dy())} scale := f32.Point{X: 2 / texSize.X, Y: 2 / texSize.Y} orig := f32.Point{X: -1 - float32(bounds.Min.X)*2/texSize.X, Y: -1 - float32(bounds.Min.Y)*2/texSize.Y} - s.prog.uniforms.vert.transform = [4]float32{scale.X, scale.Y, orig.X, orig.Y} - s.prog.uniforms.vert.pathOffset = [2]float32{offset.X, offset.Y} - s.prog.prog.UploadUniforms() + s.pipeline.uniforms.vert.transform = [4]float32{scale.X, scale.Y, orig.X, orig.Y} + s.pipeline.uniforms.vert.pathOffset = [2]float32{offset.X, offset.Y} + s.pipeline.pipeline.UploadUniforms(s.ctx) // Draw in batches that fit in uint16 indices. start := 0 nquads := data.ncurves / 4 @@ -381,8 +392,8 @@ func (p *pather) cover(mat materialType, col f32color.RGBA, col1, col2 f32color. } func (c *coverer) cover(mat materialType, col f32color.RGBA, col1, col2 f32color.RGBA, scale, off f32.Point, uvTrans f32.Affine2D, coverScale, coverOff f32.Point) { - p := c.prog[mat] - c.ctx.BindProgram(p.prog) + p := c.pipelines[mat] + c.ctx.BindPipeline(p.pipeline) var uniforms *coverUniforms switch mat { case materialColor: @@ -404,7 +415,7 @@ func (c *coverer) cover(mat materialType, col f32color.RGBA, col1, col2 f32color } uniforms.transform = [4]float32{scale.X, scale.Y, off.X, off.Y} uniforms.uvCoverTransform = [4]float32{coverScale.X, coverScale.Y, coverOff.X, coverOff.Y} - p.UploadUniforms() + p.UploadUniforms(c.ctx) c.ctx.DrawArrays(driver.DrawModeTriangleStrip, 0, 4) } diff --git a/internal/d3d11/d3d11_windows.go b/internal/d3d11/d3d11_windows.go index abae2ab9..4d26611b 100644 --- a/internal/d3d11/d3d11_windows.go +++ b/internal/d3d11/d3d11_windows.go @@ -1449,6 +1449,16 @@ func IUnknownQueryInterface(obj unsafe.Pointer, queryInterfaceMethod uintptr, gu return ref, nil } +func IUnknownAddRef(obj unsafe.Pointer, addRefMethod uintptr) { + syscall.Syscall( + addRefMethod, + 1, + uintptr(obj), + 0, + 0, + ) +} + func IUnknownRelease(obj unsafe.Pointer, releaseMethod uintptr) { syscall.Syscall( releaseMethod, diff --git a/internal/gl/util.go b/internal/gl/util.go index 3d5b44b4..c696b691 100644 --- a/internal/gl/util.go +++ b/internal/gl/util.go @@ -9,12 +9,12 @@ import ( ) func CreateProgram(ctx *Functions, vsSrc, fsSrc string, attribs []string) (Program, error) { - vs, err := createShader(ctx, VERTEX_SHADER, vsSrc) + vs, err := CreateShader(ctx, VERTEX_SHADER, vsSrc) if err != nil { return Program{}, err } defer ctx.DeleteShader(vs) - fs, err := createShader(ctx, FRAGMENT_SHADER, fsSrc) + fs, err := CreateShader(ctx, FRAGMENT_SHADER, fsSrc) if err != nil { return Program{}, err } @@ -38,7 +38,7 @@ func CreateProgram(ctx *Functions, vsSrc, fsSrc string, attribs []string) (Progr } func CreateComputeProgram(ctx *Functions, src string) (Program, error) { - cs, err := createShader(ctx, COMPUTE_SHADER, src) + cs, err := CreateShader(ctx, COMPUTE_SHADER, src) if err != nil { return Program{}, err } @@ -57,7 +57,7 @@ func CreateComputeProgram(ctx *Functions, src string) (Program, error) { return prog, nil } -func createShader(ctx *Functions, typ Enum, src string) (Shader, error) { +func CreateShader(ctx *Functions, typ Enum, src string) (Shader, error) { sh := ctx.CreateShader(typ) if !sh.Valid() { return Shader{}, errors.New("glCreateShader failed")