From ea456f42c7b3327117020111948dc30786d5df90 Mon Sep 17 00:00:00 2001 From: Elias Naur Date: Tue, 26 Nov 2024 08:49:48 +0000 Subject: [PATCH] gpu: remove compute renderer The compute renderer is a failed experiment: a better port of the Vello vector renderer exists[0] and the upcoming Go 1.24 release no longer builds the gioui.org/cpu module because of #60725. Remove it. [0] https://github.com/dominikh/jello Signed-off-by: Elias Naur --- go.mod | 1 - go.sum | 2 - gpu/compute.go | 2193 ------------------------------------------------ gpu/cpu.go | 129 --- gpu/gpu.go | 21 +- 5 files changed, 16 insertions(+), 2330 deletions(-) delete mode 100644 gpu/compute.go delete mode 100644 gpu/cpu.go diff --git a/go.mod b/go.mod index 574497ed..5a0c412b 100644 --- a/go.mod +++ b/go.mod @@ -4,7 +4,6 @@ go 1.21 require ( eliasnaur.com/font v0.0.0-20230308162249-dd43949cb42d - gioui.org/cpu v0.0.0-20210817075930-8d6a761490d2 gioui.org/shader v1.0.8 github.com/go-text/typesetting v0.1.2 golang.org/x/exp v0.0.0-20240707233637-46b078467d37 diff --git a/go.sum b/go.sum index f0c3a02d..9584c5aa 100644 --- a/go.sum +++ b/go.sum @@ -1,8 +1,6 @@ eliasnaur.com/font v0.0.0-20230308162249-dd43949cb42d h1:ARo7NCVvN2NdhLlJE9xAbKweuI9L6UgfTbYb0YwPacY= eliasnaur.com/font v0.0.0-20230308162249-dd43949cb42d/go.mod h1:OYVuxibdk9OSLX8vAqydtRPP87PyTFcT9uH3MlEGBQA= gioui.org/cpu v0.0.0-20210808092351-bfe733dd3334/go.mod h1:A8M0Cn5o+vY5LTMlnRoK3O5kG+rH0kWfJjeKd9QpBmQ= -gioui.org/cpu v0.0.0-20210817075930-8d6a761490d2 h1:AGDDxsJE1RpcXTAxPG2B4jrwVUJGFDjINIPi1jtO6pc= -gioui.org/cpu v0.0.0-20210817075930-8d6a761490d2/go.mod h1:A8M0Cn5o+vY5LTMlnRoK3O5kG+rH0kWfJjeKd9QpBmQ= gioui.org/shader v1.0.8 h1:6ks0o/A+b0ne7RzEqRZK5f4Gboz2CfG+mVliciy6+qA= gioui.org/shader v1.0.8/go.mod h1:mWdiME581d/kV7/iEhLmUgUK5iZ09XR5XpduXzbePVM= github.com/go-text/typesetting v0.1.2 h1:KmZOfoxrrYgghohzXgNY7aQPgQ4W+QeKPeRI8yqpDDE= diff --git a/gpu/compute.go b/gpu/compute.go deleted file mode 100644 index 298a5b87..00000000 --- a/gpu/compute.go +++ /dev/null @@ -1,2193 +0,0 @@ -// SPDX-License-Identifier: Unlicense OR MIT - -package gpu - -import ( - "bytes" - "encoding/binary" - "errors" - "fmt" - "hash/maphash" - "image" - "image/color" - "image/draw" - "image/png" - "math" - "math/bits" - "os" - "runtime" - "sort" - "time" - "unsafe" - - "gioui.org/cpu" - "gioui.org/gpu/internal/driver" - "gioui.org/internal/byteslice" - "gioui.org/internal/f32" - "gioui.org/internal/f32color" - "gioui.org/internal/ops" - "gioui.org/internal/scene" - "gioui.org/layout" - "gioui.org/op" - "gioui.org/shader" - "gioui.org/shader/gio" - "gioui.org/shader/piet" -) - -type compute struct { - ctx driver.Device - - collector collector - enc encoder - texOps []textureOp - viewport image.Point - maxTextureDim int - srgb bool - atlases []*textureAtlas - frameCount uint - moves []atlasMove - - programs struct { - elements computeProgram - tileAlloc computeProgram - pathCoarse computeProgram - backdrop computeProgram - binning computeProgram - coarse computeProgram - kernel4 computeProgram - } - buffers struct { - config sizedBuffer - scene sizedBuffer - state sizedBuffer - memory sizedBuffer - } - output struct { - blitPipeline driver.Pipeline - - buffer sizedBuffer - - uniforms *copyUniforms - uniBuf driver.Buffer - - layerVertices []layerVertex - descriptors *piet.Kernel4DescriptorSetLayout - - nullMaterials driver.Texture - } - // imgAllocs maps imageOpData.handles to allocs. - imgAllocs map[interface{}]*atlasAlloc - // materials contains the pre-processed materials (transformed images for - // now, gradients etc. later) packed in a texture atlas. The atlas is used - // as source in kernel4. - materials struct { - // allocs maps texture ops the their atlases and FillImage offsets. - allocs map[textureKey]materialAlloc - - pipeline driver.Pipeline - buffer sizedBuffer - quads []materialVertex - uniforms struct { - u *materialUniforms - buf driver.Buffer - } - } - timers struct { - t *timers - compact *timer - render *timer - blit *timer - } - - // CPU fallback fields. - useCPU bool - dispatcher *dispatcher - - // The following fields hold scratch space to avoid garbage. - zeroSlice []byte - memHeader *memoryHeader - conf *config -} - -type materialAlloc struct { - alloc *atlasAlloc - offset image.Point -} - -type layer struct { - rect image.Rectangle - alloc *atlasAlloc - ops []paintOp - materials *textureAtlas -} - -type allocQuery struct { - atlas *textureAtlas - size image.Point - empty bool - format driver.TextureFormat - bindings driver.BufferBinding - nocompact bool -} - -type atlasAlloc struct { - atlas *textureAtlas - rect image.Rectangle - cpu bool - dead bool - frameCount uint -} - -type atlasMove struct { - src *textureAtlas - dstPos image.Point - srcRect image.Rectangle - cpu bool -} - -type textureAtlas struct { - image driver.Texture - format driver.TextureFormat - bindings driver.BufferBinding - hasCPU bool - cpuImage cpu.ImageDescriptor - size image.Point - allocs []*atlasAlloc - packer packer - realized bool - lastFrame uint - compact bool -} - -type copyUniforms struct { - scale [2]float32 - pos [2]float32 - uvScale [2]float32 - _ [8]byte // Pad to 16 bytes. -} - -type materialUniforms struct { - scale [2]float32 - pos [2]float32 - emulatesRGB float32 - _ [12]byte // Pad to 16 bytes -} - -type collector struct { - hasher maphash.Hash - reader ops.Reader - states []f32.Affine2D - clear bool - clearColor f32color.RGBA - clipStates []clipState - order []hashIndex - transStack []transEntry - prevFrame opsCollector - frame opsCollector -} - -type transEntry struct { - t f32.Affine2D - relTrans f32.Affine2D -} - -type hashIndex struct { - index int - hash uint64 -} - -type opsCollector struct { - paths []byte - clipCmds []clipCmd - ops []paintOp - layers []layer -} - -type paintOp struct { - clipStack []clipCmd - offset image.Point - state paintKey - intersect f32.Rectangle - hash uint64 - layer int - texOpIdx int -} - -// clipCmd describes a clipping command ready to be used for the compute -// pipeline. -type clipCmd struct { - // union of the bounds of the operations that are clipped. - union f32.Rectangle - state clipKey - path []byte - pathKey ops.Key - absBounds f32.Rectangle -} - -type encoderState struct { - relTrans f32.Affine2D - clip *clipState - - paintKey -} - -// clipKey completely describes a clip operation (along with its path) and is appropriate -// for hashing and equality checks. -type clipKey struct { - bounds f32.Rectangle - strokeWidth float32 - relTrans f32.Affine2D - pathHash uint64 -} - -// paintKey completely defines a paint operation. It is suitable for hashing and -// equality checks. -type paintKey struct { - t f32.Affine2D - matType materialType - // Current paint.ImageOp - image imageOpData - // Current paint.ColorOp, if any. - color color.NRGBA - - // Current paint.LinearGradientOp. - stop1 f32.Point - stop2 f32.Point - color1 color.NRGBA - color2 color.NRGBA -} - -type clipState struct { - absBounds f32.Rectangle - parent *clipState - path []byte - pathKey ops.Key - intersect f32.Rectangle - - clipKey -} - -type layerVertex struct { - posX, posY float32 - u, v float32 -} - -// materialVertex describes a vertex of a quad used to render a transformed -// material. -type materialVertex struct { - posX, posY float32 - u, v float32 -} - -// textureKey identifies textureOp. -type textureKey struct { - handle interface{} - transform f32.Affine2D - bounds image.Rectangle -} - -// textureOp represents an paintOp that requires texture space. -type textureOp struct { - img imageOpData - key textureKey - // offset is the integer offset separated from key.transform to increase cache hit rate. - off image.Point - // matAlloc is the atlas placement for material. - matAlloc materialAlloc - // imgAlloc is the atlas placement for the source image - imgAlloc *atlasAlloc -} - -type encoder struct { - scene []scene.Command - npath int - npathseg int - ntrans int -} - -// sizedBuffer holds a GPU buffer, or its equivalent CPU memory. -type sizedBuffer struct { - size int - buffer driver.Buffer - // cpuBuf is initialized when useCPU is true. - cpuBuf cpu.BufferDescriptor -} - -// computeProgram holds a compute program, or its equivalent CPU implementation. -type computeProgram struct { - prog driver.Program - - // CPU fields. - progInfo *cpu.ProgramInfo - descriptors unsafe.Pointer - buffers []*cpu.BufferDescriptor -} - -// config matches Config in setup.h -type config struct { - n_elements uint32 // paths - n_pathseg uint32 - width_in_tiles uint32 - height_in_tiles uint32 - tile_alloc memAlloc - bin_alloc memAlloc - ptcl_alloc memAlloc - pathseg_alloc memAlloc - anno_alloc memAlloc - trans_alloc memAlloc -} - -// memAlloc matches Alloc in mem.h -type memAlloc struct { - offset uint32 - //size uint32 -} - -// memoryHeader matches the header of Memory in mem.h. -type memoryHeader struct { - mem_offset uint32 - mem_error uint32 -} - -// rect is a oriented rectangle. -type rectangle [4]f32.Point - -const ( - layersBindings = driver.BufferBindingShaderStorageWrite | driver.BufferBindingTexture - materialsBindings = driver.BufferBindingFramebuffer | driver.BufferBindingShaderStorageRead - // Materials and layers can share texture storage if their bindings match. - combinedBindings = layersBindings | materialsBindings -) - -// GPU structure sizes and constants. -const ( - tileWidthPx = 32 - tileHeightPx = 32 - ptclInitialAlloc = 1024 - kernel4OutputUnit = 2 - kernel4AtlasUnit = 3 - - pathSize = 12 - binSize = 8 - pathsegSize = 52 - annoSize = 32 - transSize = 24 - stateSize = 60 - stateStride = 4 + 2*stateSize -) - -// mem.h constants. -const ( - memNoError = 0 // NO_ERROR - memMallocFailed = 1 // ERR_MALLOC_FAILED -) - -func newCompute(ctx driver.Device) (*compute, error) { - caps := ctx.Caps() - maxDim := caps.MaxTextureSize - // Large atlas textures cause artifacts due to precision loss in - // shaders. - if cap := 8192; maxDim > cap { - maxDim = cap - } - // The compute programs can only span 128x64 tiles. Limit to 64 for now, and leave the - // complexity of a rectangular limit for later. - if computeCap := 4096; maxDim > computeCap { - maxDim = computeCap - } - g := &compute{ - ctx: ctx, - maxTextureDim: maxDim, - srgb: caps.Features.Has(driver.FeatureSRGB), - conf: new(config), - memHeader: new(memoryHeader), - } - shaders := []struct { - prog *computeProgram - src shader.Sources - info *cpu.ProgramInfo - }{ - {&g.programs.elements, piet.Shader_elements_comp, piet.ElementsProgramInfo}, - {&g.programs.tileAlloc, piet.Shader_tile_alloc_comp, piet.Tile_allocProgramInfo}, - {&g.programs.pathCoarse, piet.Shader_path_coarse_comp, piet.Path_coarseProgramInfo}, - {&g.programs.backdrop, piet.Shader_backdrop_comp, piet.BackdropProgramInfo}, - {&g.programs.binning, piet.Shader_binning_comp, piet.BinningProgramInfo}, - {&g.programs.coarse, piet.Shader_coarse_comp, piet.CoarseProgramInfo}, - {&g.programs.kernel4, piet.Shader_kernel4_comp, piet.Kernel4ProgramInfo}, - } - if !caps.Features.Has(driver.FeatureCompute) { - if !cpu.Supported { - return nil, errors.New("gpu: missing support for compute programs") - } - g.useCPU = true - } - if g.useCPU { - g.dispatcher = newDispatcher(runtime.NumCPU()) - } else { - null, err := ctx.NewTexture(driver.TextureFormatRGBA8, 1, 1, driver.FilterNearest, driver.FilterNearest, driver.BufferBindingShaderStorageRead) - if err != nil { - g.Release() - return nil, err - } - g.output.nullMaterials = null - } - - copyVert, copyFrag, err := newShaders(ctx, gio.Shader_copy_vert, gio.Shader_copy_frag) - if err != nil { - g.Release() - return nil, err - } - defer copyVert.Release() - defer copyFrag.Release() - pipe, err := ctx.NewPipeline(driver.PipelineDesc{ - VertexShader: copyVert, - FragmentShader: copyFrag, - VertexLayout: driver.VertexLayout{ - Inputs: []driver.InputDesc{ - {Type: shader.DataTypeFloat, Size: 2, Offset: 0}, - {Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2}, - }, - Stride: int(unsafe.Sizeof(g.output.layerVertices[0])), - }, - PixelFormat: driver.TextureFormatOutput, - BlendDesc: driver.BlendDesc{ - Enable: true, - SrcFactor: driver.BlendFactorOne, - DstFactor: driver.BlendFactorOneMinusSrcAlpha, - }, - Topology: driver.TopologyTriangles, - }) - if err != nil { - g.Release() - return nil, err - } - g.output.blitPipeline = pipe - g.output.uniforms = new(copyUniforms) - - buf, err := ctx.NewBuffer(driver.BufferBindingUniforms, int(unsafe.Sizeof(*g.output.uniforms))) - if err != nil { - g.Release() - return nil, err - } - g.output.uniBuf = buf - - materialVert, materialFrag, err := newShaders(ctx, gio.Shader_material_vert, gio.Shader_material_frag) - if err != nil { - g.Release() - return nil, err - } - defer materialVert.Release() - defer materialFrag.Release() - pipe, err = ctx.NewPipeline(driver.PipelineDesc{ - VertexShader: materialVert, - FragmentShader: materialFrag, - VertexLayout: driver.VertexLayout{ - Inputs: []driver.InputDesc{ - {Type: shader.DataTypeFloat, Size: 2, Offset: 0}, - {Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2}, - }, - Stride: int(unsafe.Sizeof(g.materials.quads[0])), - }, - PixelFormat: driver.TextureFormatRGBA8, - Topology: driver.TopologyTriangles, - }) - if err != nil { - g.Release() - return nil, err - } - g.materials.pipeline = pipe - g.materials.uniforms.u = new(materialUniforms) - - buf, err = ctx.NewBuffer(driver.BufferBindingUniforms, int(unsafe.Sizeof(*g.materials.uniforms.u))) - if err != nil { - g.Release() - return nil, err - } - g.materials.uniforms.buf = buf - - for _, shader := range shaders { - if !g.useCPU { - p, err := ctx.NewComputeProgram(shader.src) - if err != nil { - g.Release() - return nil, err - } - shader.prog.prog = p - } else { - shader.prog.progInfo = shader.info - } - } - if g.useCPU { - { - desc := new(piet.ElementsDescriptorSetLayout) - g.programs.elements.descriptors = unsafe.Pointer(desc) - g.programs.elements.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1(), desc.Binding2(), desc.Binding3()} - } - { - desc := new(piet.Tile_allocDescriptorSetLayout) - g.programs.tileAlloc.descriptors = unsafe.Pointer(desc) - g.programs.tileAlloc.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} - } - { - desc := new(piet.Path_coarseDescriptorSetLayout) - g.programs.pathCoarse.descriptors = unsafe.Pointer(desc) - g.programs.pathCoarse.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} - } - { - desc := new(piet.BackdropDescriptorSetLayout) - g.programs.backdrop.descriptors = unsafe.Pointer(desc) - g.programs.backdrop.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} - } - { - desc := new(piet.BinningDescriptorSetLayout) - g.programs.binning.descriptors = unsafe.Pointer(desc) - g.programs.binning.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} - } - { - desc := new(piet.CoarseDescriptorSetLayout) - g.programs.coarse.descriptors = unsafe.Pointer(desc) - g.programs.coarse.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} - } - { - desc := new(piet.Kernel4DescriptorSetLayout) - g.programs.kernel4.descriptors = unsafe.Pointer(desc) - g.programs.kernel4.buffers = []*cpu.BufferDescriptor{desc.Binding0(), desc.Binding1()} - g.output.descriptors = desc - } - } - return g, nil -} - -func newShaders(ctx driver.Device, vsrc, fsrc shader.Sources) (vert driver.VertexShader, frag driver.FragmentShader, err error) { - vert, err = ctx.NewVertexShader(vsrc) - if err != nil { - return - } - frag, err = ctx.NewFragmentShader(fsrc) - if err != nil { - vert.Release() - } - return -} - -func (g *compute) Frame(frameOps *op.Ops, target RenderTarget, viewport image.Point) error { - g.frameCount++ - g.collect(viewport, frameOps) - return g.frame(target) -} - -func (g *compute) collect(viewport image.Point, ops *op.Ops) { - g.viewport = viewport - g.collector.reset() - - g.texOps = g.texOps[:0] - g.collector.collect(ops, viewport, &g.texOps) -} - -func (g *compute) Clear(col color.NRGBA) { - g.collector.clear = true - g.collector.clearColor = f32color.LinearFromSRGB(col) -} - -func (g *compute) frame(target RenderTarget) error { - viewport := g.viewport - defFBO := g.ctx.BeginFrame(target, g.collector.clear, viewport) - defer g.ctx.EndFrame() - - t := &g.timers - if false && t.t == nil && g.ctx.Caps().Features.Has(driver.FeatureTimers) { - t.t = newTimers(g.ctx) - t.compact = t.t.newTimer() - t.render = t.t.newTimer() - t.blit = t.t.newTimer() - } - - if err := g.uploadImages(); err != nil { - return err - } - if err := g.renderMaterials(); err != nil { - return err - } - g.layer(viewport, g.texOps) - t.render.begin() - if err := g.renderLayers(viewport); err != nil { - return err - } - t.render.end() - d := driver.LoadDesc{ - ClearColor: g.collector.clearColor, - } - if g.collector.clear { - g.collector.clear = false - d.Action = driver.LoadActionClear - } - t.blit.begin() - g.blitLayers(d, defFBO, viewport) - t.blit.end() - t.compact.begin() - if err := g.compactAllocs(); err != nil { - return err - } - t.compact.end() - if false && t.t.ready() { - com, ren, blit := t.compact.Elapsed, t.render.Elapsed, t.blit.Elapsed - ft := com + ren + blit - q := 100 * time.Microsecond - ft = ft.Round(q) - com, ren, blit = com.Round(q), ren.Round(q), blit.Round(q) - // t.profile = fmt.Sprintf("ft:%7s com: %7s ren:%7s blit:%7s", ft, com, ren, blit) - } - return nil -} - -func (g *compute) dumpAtlases() { - for i, a := range g.atlases { - dump := image.NewRGBA(image.Rectangle{Max: a.size}) - err := driver.DownloadImage(g.ctx, a.image, dump) - if err != nil { - panic(err) - } - nrgba := image.NewNRGBA(dump.Bounds()) - draw.Draw(nrgba, image.Rectangle{}, dump, image.Point{}, draw.Src) - var buf bytes.Buffer - if err := png.Encode(&buf, nrgba); err != nil { - panic(err) - } - if err := os.WriteFile(fmt.Sprintf("dump-%d.png", i), buf.Bytes(), 0600); err != nil { - panic(err) - } - } -} - -func (g *compute) compactAllocs() error { - const ( - maxAllocAge = 3 - maxAtlasAge = 10 - ) - atlases := g.atlases - for _, a := range atlases { - if len(a.allocs) > 0 && g.frameCount-a.lastFrame > maxAtlasAge { - a.compact = true - } - } - for len(atlases) > 0 { - var ( - dstAtlas *textureAtlas - format driver.TextureFormat - bindings driver.BufferBinding - ) - g.moves = g.moves[:0] - addedLayers := false - useCPU := false - fill: - for len(atlases) > 0 { - srcAtlas := atlases[0] - allocs := srcAtlas.allocs - if !srcAtlas.compact { - atlases = atlases[1:] - continue - } - if addedLayers && (format != srcAtlas.format || srcAtlas.bindings&bindings != srcAtlas.bindings) { - break - } - format = srcAtlas.format - bindings = srcAtlas.bindings - for len(srcAtlas.allocs) > 0 { - a := srcAtlas.allocs[0] - n := len(srcAtlas.allocs) - if g.frameCount-a.frameCount > maxAllocAge { - a.dead = true - srcAtlas.allocs[0] = srcAtlas.allocs[n-1] - srcAtlas.allocs = srcAtlas.allocs[:n-1] - continue - } - size := a.rect.Size() - alloc, fits := g.atlasAlloc(allocQuery{ - atlas: dstAtlas, - size: size, - format: format, - bindings: bindings, - nocompact: true, - }) - if !fits { - break fill - } - dstAtlas = alloc.atlas - allocs = append(allocs, a) - addedLayers = true - useCPU = useCPU || a.cpu - dstAtlas.allocs = append(dstAtlas.allocs, a) - pos := alloc.rect.Min - g.moves = append(g.moves, atlasMove{ - src: srcAtlas, dstPos: pos, srcRect: a.rect, cpu: a.cpu, - }) - a.atlas = dstAtlas - a.rect = image.Rectangle{Min: pos, Max: pos.Add(a.rect.Size())} - srcAtlas.allocs[0] = srcAtlas.allocs[n-1] - srcAtlas.allocs = srcAtlas.allocs[:n-1] - } - srcAtlas.compact = false - srcAtlas.realized = false - srcAtlas.packer.clear() - srcAtlas.packer.newPage() - srcAtlas.packer.maxDims = image.Pt(g.maxTextureDim, g.maxTextureDim) - atlases = atlases[1:] - } - if !addedLayers { - break - } - outputSize := dstAtlas.packer.sizes[0] - if err := g.realizeAtlas(dstAtlas, useCPU, outputSize); err != nil { - return err - } - for _, move := range g.moves { - if !move.cpu { - g.ctx.CopyTexture(dstAtlas.image, move.dstPos, move.src.image, move.srcRect) - } else { - src := move.src.cpuImage.Data() - dst := dstAtlas.cpuImage.Data() - sstride := move.src.size.X * 4 - dstride := dstAtlas.size.X * 4 - copyImage(dst, dstride, move.dstPos, src, sstride, move.srcRect) - } - } - } - for i := len(g.atlases) - 1; i >= 0; i-- { - a := g.atlases[i] - if len(a.allocs) == 0 && g.frameCount-a.lastFrame > maxAtlasAge { - a.Release() - n := len(g.atlases) - g.atlases[i] = g.atlases[n-1] - g.atlases = g.atlases[:n-1] - } - } - return nil -} - -func copyImage(dst []byte, dstStride int, dstPos image.Point, src []byte, srcStride int, srcRect image.Rectangle) { - sz := srcRect.Size() - soff := srcRect.Min.Y*srcStride + srcRect.Min.X*4 - doff := dstPos.Y*dstStride + dstPos.X*4 - rowLen := sz.X * 4 - for y := 0; y < sz.Y; y++ { - srow := src[soff : soff+rowLen] - drow := dst[doff : doff+rowLen] - copy(drow, srow) - soff += srcStride - doff += dstStride - } -} - -func (g *compute) renderLayers(viewport image.Point) error { - layers := g.collector.frame.layers - for len(layers) > 0 { - var materials, dst *textureAtlas - addedLayers := false - g.enc.reset() - for len(layers) > 0 { - l := &layers[0] - if l.alloc != nil { - layers = layers[1:] - continue - } - if materials != nil { - if l.materials != nil && materials != l.materials { - // Only one materials texture per compute pass. - break - } - } else { - materials = l.materials - } - size := l.rect.Size() - alloc, fits := g.atlasAlloc(allocQuery{ - atlas: dst, - empty: true, - format: driver.TextureFormatRGBA8, - bindings: combinedBindings, - // Pad to avoid overlap. - size: size.Add(image.Pt(1, 1)), - }) - if !fits { - // Only one output atlas per compute pass. - break - } - dst = alloc.atlas - dst.compact = true - addedLayers = true - l.alloc = &alloc - dst.allocs = append(dst.allocs, l.alloc) - encodeLayer(*l, alloc.rect.Min, viewport, &g.enc, g.texOps) - layers = layers[1:] - } - if !addedLayers { - break - } - outputSize := dst.packer.sizes[0] - tileDims := image.Point{ - X: (outputSize.X + tileWidthPx - 1) / tileWidthPx, - Y: (outputSize.Y + tileHeightPx - 1) / tileHeightPx, - } - w, h := tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx - if err := g.realizeAtlas(dst, g.useCPU, image.Pt(w, h)); err != nil { - return err - } - if err := g.render(materials, dst.image, dst.cpuImage, tileDims, dst.size.X*4); err != nil { - return err - } - } - return nil -} - -func (g *compute) blitLayers(d driver.LoadDesc, fbo driver.Texture, viewport image.Point) { - layers := g.collector.frame.layers - g.output.layerVertices = g.output.layerVertices[:0] - for _, l := range layers { - placef := layout.FPt(l.alloc.rect.Min) - sizef := layout.FPt(l.rect.Size()) - r := f32.FRect(l.rect) - quad := [4]layerVertex{ - {posX: float32(r.Min.X), posY: float32(r.Min.Y), u: placef.X, v: placef.Y}, - {posX: float32(r.Max.X), posY: float32(r.Min.Y), u: placef.X + sizef.X, v: placef.Y}, - {posX: float32(r.Max.X), posY: float32(r.Max.Y), u: placef.X + sizef.X, v: placef.Y + sizef.Y}, - {posX: float32(r.Min.X), posY: float32(r.Max.Y), u: placef.X, v: placef.Y + sizef.Y}, - } - g.output.layerVertices = append(g.output.layerVertices, quad[0], quad[1], quad[3], quad[3], quad[2], quad[1]) - g.ctx.PrepareTexture(l.alloc.atlas.image) - } - if len(g.output.layerVertices) > 0 { - vertexData := byteslice.Slice(g.output.layerVertices) - g.output.buffer.ensureCapacity(false, g.ctx, driver.BufferBindingVertices, len(vertexData)) - g.output.buffer.buffer.Upload(vertexData) - } - g.ctx.BeginRenderPass(fbo, d) - defer g.ctx.EndRenderPass() - if len(layers) == 0 { - return - } - g.ctx.Viewport(0, 0, viewport.X, viewport.Y) - g.ctx.BindPipeline(g.output.blitPipeline) - g.ctx.BindVertexBuffer(g.output.buffer.buffer, 0) - start := 0 - for len(layers) > 0 { - count := 0 - atlas := layers[0].alloc.atlas - for len(layers) > 0 { - l := layers[0] - if l.alloc.atlas != atlas { - break - } - layers = layers[1:] - const verticesPerQuad = 6 - count += verticesPerQuad - } - - // Transform positions to clip space: [-1, -1] - [1, 1], and texture - // coordinates to texture space: [0, 0] - [1, 1]. - clip := f32.Affine2D{}.Scale(f32.Pt(0, 0), f32.Pt(2/float32(viewport.X), 2/float32(viewport.Y))).Offset(f32.Pt(-1, -1)) - sx, _, ox, _, sy, oy := clip.Elems() - g.output.uniforms.scale = [2]float32{sx, sy} - g.output.uniforms.pos = [2]float32{ox, oy} - g.output.uniforms.uvScale = [2]float32{1 / float32(atlas.size.X), 1 / float32(atlas.size.Y)} - g.output.uniBuf.Upload(byteslice.Struct(g.output.uniforms)) - g.ctx.BindUniforms(g.output.uniBuf) - g.ctx.BindTexture(0, atlas.image) - g.ctx.DrawArrays(start, count) - start += count - } -} - -func (g *compute) renderMaterials() error { - m := &g.materials - for k, place := range m.allocs { - if place.alloc.dead { - delete(m.allocs, k) - } - } - texOps := g.texOps - for len(texOps) > 0 { - m.quads = m.quads[:0] - var ( - atlas *textureAtlas - imgAtlas *textureAtlas - ) - // A material is clipped to avoid drawing outside its atlas bounds. - // However, imprecision in the clipping may cause a single pixel - // overflow. - var padding = image.Pt(1, 1) - var allocStart int - for len(texOps) > 0 { - op := &texOps[0] - if a, exists := m.allocs[op.key]; exists { - g.touchAlloc(a.alloc) - op.matAlloc = a - texOps = texOps[1:] - continue - } - - if imgAtlas != nil && op.imgAlloc.atlas != imgAtlas { - // Only one image atlas per render pass. - break - } - imgAtlas = op.imgAlloc.atlas - quad := g.materialQuad(imgAtlas.size, op.key.transform, op.img, op.imgAlloc.rect.Min) - boundsf := quadBounds(quad) - bounds := boundsf.Round() - bounds = bounds.Intersect(op.key.bounds) - - size := bounds.Size() - alloc, fits := g.atlasAlloc(allocQuery{ - atlas: atlas, - size: size.Add(padding), - format: driver.TextureFormatRGBA8, - bindings: combinedBindings, - }) - if !fits { - break - } - if atlas == nil { - allocStart = len(alloc.atlas.allocs) - } - atlas = alloc.atlas - alloc.cpu = g.useCPU - offsetf := layout.FPt(bounds.Min.Mul(-1)) - scale := f32.Pt(float32(size.X), float32(size.Y)) - for i := range quad { - // Position quad to match place. - quad[i].posX += offsetf.X - quad[i].posY += offsetf.Y - // Scale to match viewport [0, 1]. - quad[i].posX /= scale.X - quad[i].posY /= scale.Y - } - // Draw quad as two triangles. - m.quads = append(m.quads, quad[0], quad[1], quad[3], quad[3], quad[1], quad[2]) - if m.allocs == nil { - m.allocs = make(map[textureKey]materialAlloc) - } - atlasAlloc := materialAlloc{ - alloc: &alloc, - offset: bounds.Min.Mul(-1), - } - atlas.allocs = append(atlas.allocs, atlasAlloc.alloc) - m.allocs[op.key] = atlasAlloc - op.matAlloc = atlasAlloc - texOps = texOps[1:] - } - if len(m.quads) == 0 { - break - } - realized := atlas.realized - if err := g.realizeAtlas(atlas, g.useCPU, atlas.packer.sizes[0]); err != nil { - return err - } - // Transform to clip space: [-1, -1] - [1, 1]. - *m.uniforms.u = materialUniforms{ - scale: [2]float32{2, 2}, - pos: [2]float32{-1, -1}, - } - if !g.srgb { - m.uniforms.u.emulatesRGB = 1.0 - } - m.uniforms.buf.Upload(byteslice.Struct(m.uniforms.u)) - vertexData := byteslice.Slice(m.quads) - n := pow2Ceil(len(vertexData)) - m.buffer.ensureCapacity(false, g.ctx, driver.BufferBindingVertices, n) - m.buffer.buffer.Upload(vertexData) - var d driver.LoadDesc - if !realized { - d.Action = driver.LoadActionClear - } - g.ctx.PrepareTexture(imgAtlas.image) - g.ctx.BeginRenderPass(atlas.image, d) - g.ctx.BindTexture(0, imgAtlas.image) - g.ctx.BindPipeline(m.pipeline) - g.ctx.BindUniforms(m.uniforms.buf) - g.ctx.BindVertexBuffer(m.buffer.buffer, 0) - newAllocs := atlas.allocs[allocStart:] - for i, a := range newAllocs { - sz := a.rect.Size().Sub(padding) - g.ctx.Viewport(a.rect.Min.X, a.rect.Min.Y, sz.X, sz.Y) - g.ctx.DrawArrays(i*6, 6) - } - g.ctx.EndRenderPass() - if !g.useCPU { - continue - } - src := atlas.image - data := atlas.cpuImage.Data() - for _, a := range newAllocs { - stride := atlas.size.X * 4 - col := a.rect.Min.X * 4 - row := stride * a.rect.Min.Y - off := col + row - src.ReadPixels(a.rect, data[off:], stride) - } - } - return nil -} - -func (g *compute) uploadImages() error { - for k, a := range g.imgAllocs { - if a.dead { - delete(g.imgAllocs, k) - } - } - type upload struct { - pos image.Point - img *image.RGBA - } - var uploads []upload - format := driver.TextureFormatSRGBA - if !g.srgb { - format = driver.TextureFormatRGBA8 - } - // padding is the number of pixels added to the right and below - // images, to avoid atlas filtering artifacts. - const padding = 1 - texOps := g.texOps - for len(texOps) > 0 { - uploads = uploads[:0] - var atlas *textureAtlas - for len(texOps) > 0 { - op := &texOps[0] - if a, exists := g.imgAllocs[op.img.handle]; exists { - g.touchAlloc(a) - op.imgAlloc = a - texOps = texOps[1:] - continue - } - size := op.img.src.Bounds().Size().Add(image.Pt(padding, padding)) - alloc, fits := g.atlasAlloc(allocQuery{ - atlas: atlas, - size: size, - format: format, - bindings: driver.BufferBindingTexture | driver.BufferBindingFramebuffer, - }) - if !fits { - break - } - atlas = alloc.atlas - if g.imgAllocs == nil { - g.imgAllocs = make(map[interface{}]*atlasAlloc) - } - op.imgAlloc = &alloc - atlas.allocs = append(atlas.allocs, op.imgAlloc) - g.imgAllocs[op.img.handle] = op.imgAlloc - uploads = append(uploads, upload{pos: alloc.rect.Min, img: op.img.src}) - texOps = texOps[1:] - } - if len(uploads) == 0 { - break - } - if err := g.realizeAtlas(atlas, false, atlas.packer.sizes[0]); err != nil { - return err - } - for _, u := range uploads { - size := u.img.Bounds().Size() - driver.UploadImage(atlas.image, u.pos, u.img) - rightPadding := image.Pt(padding, size.Y) - atlas.image.Upload(image.Pt(u.pos.X+size.X, u.pos.Y), rightPadding, g.zeros(rightPadding.X*rightPadding.Y*4), 0) - bottomPadding := image.Pt(size.X, padding) - atlas.image.Upload(image.Pt(u.pos.X, u.pos.Y+size.Y), bottomPadding, g.zeros(bottomPadding.X*bottomPadding.Y*4), 0) - } - } - return nil -} - -func pow2Ceil(v int) int { - exp := bits.Len(uint(v)) - if bits.OnesCount(uint(v)) == 1 { - exp-- - } - return 1 << exp -} - -// materialQuad constructs a quad that represents the transformed image. It returns the quad -// and its bounds. -func (g *compute) materialQuad(imgAtlasSize image.Point, M f32.Affine2D, img imageOpData, uvPos image.Point) [4]materialVertex { - imgSize := layout.FPt(img.src.Bounds().Size()) - sx, hx, ox, hy, sy, oy := M.Elems() - transOff := f32.Pt(ox, oy) - // The 4 corners of the image rectangle transformed by M, excluding its offset, are: - // - // q0: M * (0, 0) q3: M * (w, 0) - // q1: M * (0, h) q2: M * (w, h) - // - // Note that q0 = M*0 = 0, q2 = q1 + q3. - q0 := f32.Pt(0, 0) - q1 := f32.Pt(hx*imgSize.Y, sy*imgSize.Y) - q3 := f32.Pt(sx*imgSize.X, hy*imgSize.X) - q2 := q1.Add(q3) - q0 = q0.Add(transOff) - q1 = q1.Add(transOff) - q2 = q2.Add(transOff) - q3 = q3.Add(transOff) - - uvPosf := layout.FPt(uvPos) - atlasScale := f32.Pt(1/float32(imgAtlasSize.X), 1/float32(imgAtlasSize.Y)) - uvBounds := f32.Rectangle{ - Min: uvPosf, - Max: uvPosf.Add(imgSize), - } - uvBounds.Min.X *= atlasScale.X - uvBounds.Min.Y *= atlasScale.Y - uvBounds.Max.X *= atlasScale.X - uvBounds.Max.Y *= atlasScale.Y - quad := [4]materialVertex{ - {posX: q0.X, posY: q0.Y, u: uvBounds.Min.X, v: uvBounds.Min.Y}, - {posX: q1.X, posY: q1.Y, u: uvBounds.Min.X, v: uvBounds.Max.Y}, - {posX: q2.X, posY: q2.Y, u: uvBounds.Max.X, v: uvBounds.Max.Y}, - {posX: q3.X, posY: q3.Y, u: uvBounds.Max.X, v: uvBounds.Min.Y}, - } - return quad -} - -func quadBounds(q [4]materialVertex) f32.Rectangle { - q0 := f32.Pt(q[0].posX, q[0].posY) - q1 := f32.Pt(q[1].posX, q[1].posY) - q2 := f32.Pt(q[2].posX, q[2].posY) - q3 := f32.Pt(q[3].posX, q[3].posY) - return f32.Rectangle{ - Min: min(min(q0, q1), min(q2, q3)), - Max: max(max(q0, q1), max(q2, q3)), - } -} - -func max(p1, p2 f32.Point) f32.Point { - p := p1 - if p2.X > p.X { - p.X = p2.X - } - if p2.Y > p.Y { - p.Y = p2.Y - } - return p -} - -func min(p1, p2 f32.Point) f32.Point { - p := p1 - if p2.X < p.X { - p.X = p2.X - } - if p2.Y < p.Y { - p.Y = p2.Y - } - return p -} - -func (enc *encoder) encodePath(verts []byte, fillMode int) { - for ; len(verts) >= scene.CommandSize+4; verts = verts[scene.CommandSize+4:] { - cmd := ops.DecodeCommand(verts[4:]) - if cmd.Op() == scene.OpGap { - if fillMode != scene.FillModeNonzero { - // Skip gaps in strokes. - continue - } - // Replace them by a straight line in outlines. - cmd = scene.Line(scene.DecodeGap(cmd)) - } - enc.scene = append(enc.scene, cmd) - enc.npathseg++ - } -} - -func (g *compute) render(images *textureAtlas, dst driver.Texture, cpuDst cpu.ImageDescriptor, tileDims image.Point, stride int) error { - const ( - // wgSize is the largest and most common workgroup size. - wgSize = 128 - // PARTITION_SIZE from elements.comp - partitionSize = 32 * 4 - ) - widthInBins := (tileDims.X + 15) / 16 - heightInBins := (tileDims.Y + 7) / 8 - if widthInBins*heightInBins > wgSize { - return fmt.Errorf("gpu: output too large (%dx%d)", tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx) - } - - enc := &g.enc - // Pad scene with zeroes to avoid reading garbage in elements.comp. - scenePadding := partitionSize - len(enc.scene)%partitionSize - enc.scene = append(enc.scene, make([]scene.Command, scenePadding)...) - - scene := byteslice.Slice(enc.scene) - if s := len(scene); s > g.buffers.scene.size { - paddedCap := s * 11 / 10 - if err := g.buffers.scene.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead, paddedCap); err != nil { - return err - } - } - g.buffers.scene.upload(scene) - - // alloc is the number of allocated bytes for static buffers. - var alloc uint32 - round := func(v, quantum int) int { - return (v + quantum - 1) &^ (quantum - 1) - } - malloc := func(size int) memAlloc { - size = round(size, 4) - offset := alloc - alloc += uint32(size) - return memAlloc{offset /*, uint32(size)*/} - } - - *g.conf = config{ - n_elements: uint32(enc.npath), - n_pathseg: uint32(enc.npathseg), - width_in_tiles: uint32(tileDims.X), - height_in_tiles: uint32(tileDims.Y), - tile_alloc: malloc(enc.npath * pathSize), - bin_alloc: malloc(round(enc.npath, wgSize) * binSize), - ptcl_alloc: malloc(tileDims.X * tileDims.Y * ptclInitialAlloc), - pathseg_alloc: malloc(enc.npathseg * pathsegSize), - anno_alloc: malloc(enc.npath * annoSize), - trans_alloc: malloc(enc.ntrans * transSize), - } - - numPartitions := (enc.numElements() + 127) / 128 - // clearSize is the atomic partition counter plus flag and 2 states per partition. - clearSize := 4 + numPartitions*stateStride - if clearSize > g.buffers.state.size { - paddedCap := clearSize * 11 / 10 - if err := g.buffers.state.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, paddedCap); err != nil { - return err - } - } - - confData := byteslice.Struct(g.conf) - g.buffers.config.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead, len(confData)) - g.buffers.config.upload(confData) - - minSize := int(unsafe.Sizeof(memoryHeader{})) + int(alloc) - if minSize > g.buffers.memory.size { - // Add space for dynamic GPU allocations. - const sizeBump = 4 * 1024 * 1024 - minSize += sizeBump - if err := g.buffers.memory.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, minSize); err != nil { - return err - } - } - - for { - *g.memHeader = memoryHeader{ - mem_offset: alloc, - } - g.buffers.memory.upload(byteslice.Struct(g.memHeader)) - g.buffers.state.upload(g.zeros(clearSize)) - - if !g.useCPU { - g.ctx.BeginCompute() - g.ctx.BindImageTexture(kernel4OutputUnit, dst) - img := g.output.nullMaterials - if images != nil { - img = images.image - } - g.ctx.BindImageTexture(kernel4AtlasUnit, img) - } else { - *g.output.descriptors.Binding2() = cpuDst - if images != nil { - *g.output.descriptors.Binding3() = images.cpuImage - } - } - - g.bindBuffers() - g.memoryBarrier() - g.dispatch(g.programs.elements, numPartitions, 1, 1) - g.memoryBarrier() - g.dispatch(g.programs.tileAlloc, (enc.npath+wgSize-1)/wgSize, 1, 1) - g.memoryBarrier() - g.dispatch(g.programs.pathCoarse, (enc.npathseg+31)/32, 1, 1) - g.memoryBarrier() - g.dispatch(g.programs.backdrop, (enc.npath+wgSize-1)/wgSize, 1, 1) - // No barrier needed between backdrop and binning. - g.dispatch(g.programs.binning, (enc.npath+wgSize-1)/wgSize, 1, 1) - g.memoryBarrier() - g.dispatch(g.programs.coarse, widthInBins, heightInBins, 1) - g.memoryBarrier() - g.dispatch(g.programs.kernel4, tileDims.X, tileDims.Y, 1) - g.memoryBarrier() - if !g.useCPU { - g.ctx.EndCompute() - } else { - g.dispatcher.Sync() - } - - if err := g.buffers.memory.download(byteslice.Struct(g.memHeader)); err != nil { - if err == driver.ErrContentLost { - continue - } - return err - } - switch errCode := g.memHeader.mem_error; errCode { - case memNoError: - if g.useCPU { - w, h := tileDims.X*tileWidthPx, tileDims.Y*tileHeightPx - dst.Upload(image.Pt(0, 0), image.Pt(w, h), cpuDst.Data(), stride) - } - return nil - case memMallocFailed: - // Resize memory and try again. - sz := g.buffers.memory.size * 15 / 10 - if err := g.buffers.memory.ensureCapacity(g.useCPU, g.ctx, driver.BufferBindingShaderStorageRead|driver.BufferBindingShaderStorageWrite, sz); err != nil { - return err - } - continue - default: - return fmt.Errorf("compute: shader program failed with error %d", errCode) - } - } -} - -func (g *compute) memoryBarrier() { - if g.useCPU { - g.dispatcher.Barrier() - } -} - -func (g *compute) dispatch(p computeProgram, x, y, z int) { - if !g.useCPU { - g.ctx.BindProgram(p.prog) - g.ctx.DispatchCompute(x, y, z) - } else { - g.dispatcher.Dispatch(p.progInfo, p.descriptors, x, y, z) - } -} - -// zeros returns a byte slice with size bytes of zeros. -func (g *compute) zeros(size int) []byte { - if cap(g.zeroSlice) < size { - g.zeroSlice = append(g.zeroSlice, make([]byte, size)...) - } - return g.zeroSlice[:size] -} - -func (g *compute) touchAlloc(a *atlasAlloc) { - if a.dead { - panic("re-use of dead allocation") - } - a.frameCount = g.frameCount - a.atlas.lastFrame = a.frameCount -} - -func (g *compute) atlasAlloc(q allocQuery) (atlasAlloc, bool) { - var ( - place placement - fits bool - atlas = q.atlas - ) - if atlas != nil { - place, fits = atlas.packer.tryAdd(q.size) - if !fits { - atlas.compact = true - } - } - if atlas == nil { - // Look for matching atlas to re-use. - for _, a := range g.atlases { - if q.empty && len(a.allocs) > 0 { - continue - } - if q.nocompact && a.compact { - continue - } - if a.format != q.format || a.bindings&q.bindings != q.bindings { - continue - } - place, fits = a.packer.tryAdd(q.size) - if !fits { - a.compact = true - continue - } - atlas = a - break - } - } - if atlas == nil { - atlas = &textureAtlas{ - format: q.format, - bindings: q.bindings, - } - atlas.packer.maxDims = image.Pt(g.maxTextureDim, g.maxTextureDim) - atlas.packer.newPage() - g.atlases = append(g.atlases, atlas) - place, fits = atlas.packer.tryAdd(q.size) - if !fits { - panic(fmt.Errorf("compute: atlas allocation too large (%v)", q.size)) - } - } - if !fits { - return atlasAlloc{}, false - } - atlas.lastFrame = g.frameCount - return atlasAlloc{ - frameCount: g.frameCount, - atlas: atlas, - rect: image.Rectangle{Min: place.Pos, Max: place.Pos.Add(q.size)}, - }, true -} - -func (g *compute) realizeAtlas(atlas *textureAtlas, useCPU bool, size image.Point) error { - defer func() { - atlas.packer.maxDims = atlas.size - atlas.realized = true - atlas.ensureCPUImage(useCPU) - }() - if atlas.size.X >= size.X && atlas.size.Y >= size.Y { - return nil - } - if atlas.realized { - panic("resizing a realized atlas") - } - if err := atlas.resize(g.ctx, size); err != nil { - return err - } - return nil -} - -func (a *textureAtlas) resize(ctx driver.Device, size image.Point) error { - a.Release() - - img, err := ctx.NewTexture(a.format, size.X, size.Y, - driver.FilterNearest, - driver.FilterNearest, - a.bindings) - if err != nil { - return err - } - a.image = img - a.size = size - return nil -} - -func (a *textureAtlas) ensureCPUImage(useCPU bool) { - if !useCPU || a.hasCPU { - return - } - a.hasCPU = true - a.cpuImage = cpu.NewImageRGBA(a.size.X, a.size.Y) -} - -func (g *compute) Release() { - if g.useCPU { - g.dispatcher.Stop() - } - type resource interface { - Release() - } - res := []resource{ - g.output.nullMaterials, - &g.programs.elements, - &g.programs.tileAlloc, - &g.programs.pathCoarse, - &g.programs.backdrop, - &g.programs.binning, - &g.programs.coarse, - &g.programs.kernel4, - g.output.blitPipeline, - &g.output.buffer, - g.output.uniBuf, - &g.buffers.scene, - &g.buffers.state, - &g.buffers.memory, - &g.buffers.config, - g.materials.pipeline, - &g.materials.buffer, - g.materials.uniforms.buf, - g.timers.t, - } - for _, r := range res { - if r != nil { - r.Release() - } - } - for _, a := range g.atlases { - a.Release() - } - g.ctx.Release() - *g = compute{} -} - -func (a *textureAtlas) Release() { - if a.image != nil { - a.image.Release() - a.image = nil - } - a.cpuImage.Free() - a.hasCPU = false -} - -func (g *compute) bindBuffers() { - g.bindStorageBuffers(g.programs.elements, g.buffers.memory, g.buffers.config, g.buffers.scene, g.buffers.state) - g.bindStorageBuffers(g.programs.tileAlloc, g.buffers.memory, g.buffers.config) - g.bindStorageBuffers(g.programs.pathCoarse, g.buffers.memory, g.buffers.config) - g.bindStorageBuffers(g.programs.backdrop, g.buffers.memory, g.buffers.config) - g.bindStorageBuffers(g.programs.binning, g.buffers.memory, g.buffers.config) - g.bindStorageBuffers(g.programs.coarse, g.buffers.memory, g.buffers.config) - g.bindStorageBuffers(g.programs.kernel4, g.buffers.memory, g.buffers.config) -} - -func (p *computeProgram) Release() { - if p.prog != nil { - p.prog.Release() - } - *p = computeProgram{} -} - -func (b *sizedBuffer) Release() { - if b.buffer != nil { - b.buffer.Release() - } - b.cpuBuf.Free() - *b = sizedBuffer{} -} - -func (b *sizedBuffer) ensureCapacity(useCPU bool, ctx driver.Device, binding driver.BufferBinding, size int) error { - if b.size >= size { - return nil - } - if b.buffer != nil { - b.Release() - } - b.cpuBuf.Free() - if !useCPU { - buf, err := ctx.NewBuffer(binding, size) - if err != nil { - return err - } - b.buffer = buf - } else { - b.cpuBuf = cpu.NewBuffer(size) - } - b.size = size - return nil -} - -func (b *sizedBuffer) download(data []byte) error { - if b.buffer != nil { - return b.buffer.Download(data) - } else { - copy(data, b.cpuBuf.Data()) - return nil - } -} - -func (b *sizedBuffer) upload(data []byte) { - if b.buffer != nil { - b.buffer.Upload(data) - } else { - copy(b.cpuBuf.Data(), data) - } -} - -func (g *compute) bindStorageBuffers(prog computeProgram, buffers ...sizedBuffer) { - for i, buf := range buffers { - if !g.useCPU { - g.ctx.BindStorageBuffer(i, buf.buffer) - } else { - *prog.buffers[i] = buf.cpuBuf - } - } -} - -var bo = binary.LittleEndian - -func (e *encoder) reset() { - e.scene = e.scene[:0] - e.npath = 0 - e.npathseg = 0 - e.ntrans = 0 -} - -func (e *encoder) numElements() int { - return len(e.scene) -} - -func (e *encoder) transform(m f32.Affine2D) { - e.scene = append(e.scene, scene.Transform(m)) - e.ntrans++ -} - -func (e *encoder) lineWidth(width float32) { - e.scene = append(e.scene, scene.SetLineWidth(width)) -} - -func (e *encoder) fillMode(mode scene.FillMode) { - e.scene = append(e.scene, scene.SetFillMode(mode)) -} - -func (e *encoder) beginClip(bbox f32.Rectangle) { - e.scene = append(e.scene, scene.BeginClip(bbox)) - e.npath++ -} - -func (e *encoder) endClip(bbox f32.Rectangle) { - e.scene = append(e.scene, scene.EndClip(bbox)) - e.npath++ -} - -func (e *encoder) rect(r f32.Rectangle) { - // Rectangle corners, clock-wise. - c0, c1, c2, c3 := r.Min, f32.Pt(r.Min.X, r.Max.Y), r.Max, f32.Pt(r.Max.X, r.Min.Y) - e.line(c0, c1) - e.line(c1, c2) - e.line(c2, c3) - e.line(c3, c0) -} - -func (e *encoder) fillColor(col color.RGBA) { - e.scene = append(e.scene, scene.FillColor(col)) - e.npath++ -} - -func (e *encoder) fillImage(index int, offset image.Point) { - e.scene = append(e.scene, scene.FillImage(index, offset)) - e.npath++ -} - -func (e *encoder) line(start, end f32.Point) { - e.scene = append(e.scene, scene.Line(start, end)) - e.npathseg++ -} - -func (c *collector) reset() { - c.prevFrame, c.frame = c.frame, c.prevFrame - c.clipStates = c.clipStates[:0] - c.transStack = c.transStack[:0] - c.frame.reset() -} - -func (c *opsCollector) reset() { - c.paths = c.paths[:0] - c.clipCmds = c.clipCmds[:0] - c.ops = c.ops[:0] - c.layers = c.layers[:0] -} - -func (c *collector) addClip(state *encoderState, viewport, bounds f32.Rectangle, path []byte, key ops.Key, hash uint64, strokeWidth float32, push bool) { - // Rectangle clip regions. - if len(path) == 0 && !push { - // If the rectangular clip region contains a previous path it can be discarded. - p := state.clip - t := state.relTrans.Invert() - for p != nil { - // rect is the parent bounds transformed relative to the rectangle. - rect := transformBounds(t, p.bounds) - if rect.In(bounds) { - return - } - t = p.relTrans.Invert().Mul(t) - p = p.parent - } - } - - absBounds := transformBounds(state.t, bounds).Bounds() - intersect := absBounds - if state.clip != nil { - intersect = state.clip.intersect.Intersect(intersect) - } - c.clipStates = append(c.clipStates, clipState{ - parent: state.clip, - absBounds: absBounds, - path: path, - pathKey: key, - intersect: intersect, - clipKey: clipKey{ - bounds: bounds, - relTrans: state.relTrans, - strokeWidth: strokeWidth, - pathHash: hash, - }, - }) - state.clip = &c.clipStates[len(c.clipStates)-1] - state.relTrans = f32.Affine2D{} -} - -func (c *collector) collect(root *op.Ops, viewport image.Point, texOps *[]textureOp) { - fview := f32.Rectangle{Max: layout.FPt(viewport)} - var intOps *ops.Ops - if root != nil { - intOps = &root.Internal - } - c.reader.Reset(intOps) - var state encoderState - reset := func() { - state = encoderState{ - paintKey: paintKey{ - color: color.NRGBA{A: 0xff}, - }, - } - } - reset() - r := &c.reader - var ( - pathData struct { - data []byte - key ops.Key - hash uint64 - } - strWidth float32 - ) - c.addClip(&state, fview, fview, nil, ops.Key{}, 0, 0, false) - for encOp, ok := r.Decode(); ok; encOp, ok = r.Decode() { - switch ops.OpType(encOp.Data[0]) { - case ops.TypeTransform: - dop, push := ops.DecodeTransform(encOp.Data) - if push { - c.transStack = append(c.transStack, transEntry{t: state.t, relTrans: state.relTrans}) - } - state.t = state.t.Mul(dop) - state.relTrans = state.relTrans.Mul(dop) - case ops.TypePopTransform: - n := len(c.transStack) - st := c.transStack[n-1] - c.transStack = c.transStack[:n-1] - state.t = st.t - state.relTrans = st.relTrans - case ops.TypeStroke: - strWidth = decodeStrokeOp(encOp.Data) - case ops.TypePath: - hash := bo.Uint64(encOp.Data[1:]) - encOp, ok = r.Decode() - if !ok { - panic("unexpected end of path operation") - } - pathData.data = encOp.Data[ops.TypeAuxLen:] - pathData.key = encOp.Key - pathData.hash = hash - case ops.TypeClip: - var op ops.ClipOp - op.Decode(encOp.Data) - bounds := f32.FRect(op.Bounds) - c.addClip(&state, fview, bounds, pathData.data, pathData.key, pathData.hash, strWidth, true) - pathData.data = nil - strWidth = 0 - case ops.TypePopClip: - state.relTrans = state.clip.relTrans.Mul(state.relTrans) - state.clip = state.clip.parent - case ops.TypeColor: - state.matType = materialColor - state.color = decodeColorOp(encOp.Data) - case ops.TypeLinearGradient: - state.matType = materialLinearGradient - op := decodeLinearGradientOp(encOp.Data) - state.stop1 = op.stop1 - state.stop2 = op.stop2 - state.color1 = op.color1 - state.color2 = op.color2 - case ops.TypeImage: - state.matType = materialTexture - state.image = decodeImageOp(encOp.Data, encOp.Refs) - case ops.TypePaint: - paintState := state - if paintState.matType == materialTexture { - // Clip to the bounds of the image, to hide other images in the atlas. - sz := state.image.src.Rect.Size() - bounds := f32.Rectangle{Max: layout.FPt(sz)} - c.addClip(&paintState, fview, bounds, nil, ops.Key{}, 0, 0, false) - } - intersect := paintState.clip.intersect - if intersect.Empty() { - break - } - - // If the paint is a uniform opaque color that takes up the whole - // screen, it covers all previous paints and we can discard all - // rendering commands recorded so far. - if paintState.clip == nil && paintState.matType == materialColor && paintState.color.A == 255 { - c.clearColor = f32color.LinearFromSRGB(paintState.color).Opaque() - c.clear = true - c.frame.reset() - break - } - - // Flatten clip stack. - p := paintState.clip - startIdx := len(c.frame.clipCmds) - for p != nil { - idx := len(c.frame.paths) - c.frame.paths = append(c.frame.paths, make([]byte, len(p.path))...) - path := c.frame.paths[idx:] - copy(path, p.path) - c.frame.clipCmds = append(c.frame.clipCmds, clipCmd{ - state: p.clipKey, - path: path, - pathKey: p.pathKey, - absBounds: p.absBounds, - }) - p = p.parent - } - clipStack := c.frame.clipCmds[startIdx:] - c.frame.ops = append(c.frame.ops, paintOp{ - clipStack: clipStack, - state: paintState.paintKey, - intersect: intersect, - }) - case ops.TypeSave: - id := ops.DecodeSave(encOp.Data) - c.save(id, state.t) - case ops.TypeLoad: - reset() - id := ops.DecodeLoad(encOp.Data) - state.t = c.states[id] - state.relTrans = state.t - } - } - for i := range c.frame.ops { - op := &c.frame.ops[i] - // For each clip, cull rectangular clip regions that contain its - // (transformed) bounds. addClip already handled the converse case. - // TODO: do better than O(n²) to efficiently deal with deep stacks. - for j := 0; j < len(op.clipStack)-1; j++ { - cl := op.clipStack[j] - p := cl.state - r := transformBounds(p.relTrans, p.bounds) - for k := j + 1; k < len(op.clipStack); k++ { - cl2 := op.clipStack[k] - p2 := cl2.state - if len(cl2.path) == 0 && r.In(cl2.state.bounds) { - op.clipStack = append(op.clipStack[:k], op.clipStack[k+1:]...) - k-- - op.clipStack[k].state.relTrans = p2.relTrans.Mul(op.clipStack[k].state.relTrans) - } - r = transformRect(p2.relTrans, r) - } - } - // Separate the integer offset from the first transform. Two ops that differ - // only in integer offsets may share backing storage. - if len(op.clipStack) > 0 { - c := &op.clipStack[len(op.clipStack)-1] - t := c.state.relTrans - t, off := separateTransform(t) - c.state.relTrans = t - op.offset = off - op.state.t = op.state.t.Offset(layout.FPt(off.Mul(-1))) - } - op.hash = c.hashOp(*op) - op.texOpIdx = -1 - switch op.state.matType { - case materialTexture: - op.texOpIdx = len(*texOps) - // Separate integer offset from transformation. TextureOps that have identical transforms - // except for their integer offsets can share a transformed image. - t := op.state.t.Offset(layout.FPt(op.offset)) - t, off := separateTransform(t) - bounds := op.intersect.Round().Sub(off) - *texOps = append(*texOps, textureOp{ - img: op.state.image, - off: off, - key: textureKey{ - bounds: bounds, - transform: t, - handle: op.state.image.handle, - }, - }) - } - } -} - -func (c *collector) hashOp(op paintOp) uint64 { - c.hasher.Reset() - for _, cl := range op.clipStack { - k := cl.state - keyBytes := (*[unsafe.Sizeof(k)]byte)(unsafe.Pointer(unsafe.Pointer(&k))) - c.hasher.Write(keyBytes[:]) - } - k := op.state - keyBytes := (*[unsafe.Sizeof(k)]byte)(unsafe.Pointer(unsafe.Pointer(&k))) - c.hasher.Write(keyBytes[:]) - return c.hasher.Sum64() -} - -func (g *compute) layer(viewport image.Point, texOps []textureOp) { - // Sort ops from previous frames by hash. - c := &g.collector - prevOps := c.prevFrame.ops - c.order = c.order[:0] - for i, op := range prevOps { - c.order = append(c.order, hashIndex{ - index: i, - hash: op.hash, - }) - } - sort.Slice(c.order, func(i, j int) bool { - return c.order[i].hash < c.order[j].hash - }) - // Split layers with different materials atlas; the compute stage has only - // one materials slot. - splitLayer := func(ops []paintOp, prevLayerIdx int) { - for len(ops) > 0 { - var materials *textureAtlas - idx := 0 - for idx < len(ops) { - if i := ops[idx].texOpIdx; i != -1 { - omats := texOps[i].matAlloc.alloc.atlas - if materials != nil && omats != nil && omats != materials { - break - } - materials = omats - } - idx++ - } - l := layer{ops: ops[:idx], materials: materials} - if prevLayerIdx != -1 { - prev := c.prevFrame.layers[prevLayerIdx] - if !prev.alloc.dead && len(prev.ops) == len(l.ops) { - l.alloc = prev.alloc - l.materials = prev.materials - g.touchAlloc(l.alloc) - } - } - for i, op := range l.ops { - l.rect = l.rect.Union(op.intersect.Round()) - l.ops[i].layer = len(c.frame.layers) - } - c.frame.layers = append(c.frame.layers, l) - ops = ops[idx:] - } - } - ops := c.frame.ops - idx := 0 - for idx < len(ops) { - op := ops[idx] - // Search for longest matching op sequence. - // start is the earliest index of a match. - start := searchOp(c.order, op.hash) - layerOps, prevLayerIdx := longestLayer(prevOps, c.order[start:], ops[idx:]) - if len(layerOps) == 0 { - idx++ - continue - } - if unmatched := ops[:idx]; len(unmatched) > 0 { - // Flush layer of unmatched ops. - splitLayer(unmatched, -1) - ops = ops[idx:] - idx = 0 - } - splitLayer(layerOps, prevLayerIdx) - ops = ops[len(layerOps):] - } - if len(ops) > 0 { - splitLayer(ops, -1) - } -} - -func longestLayer(prev []paintOp, order []hashIndex, ops []paintOp) ([]paintOp, int) { - longest := 0 - longestIdx := -1 -outer: - for len(order) > 0 { - first := order[0] - order = order[1:] - match := prev[first.index:] - // Potential match found. Now find longest matching sequence. - end := 0 - layer := match[0].layer - off := match[0].offset.Sub(ops[0].offset) - for end < len(match) && end < len(ops) { - m := match[end] - o := ops[end] - // End layers on previous match. - if m.layer != layer { - break - } - // End layer when the next op doesn't match. - if m.hash != o.hash { - if end == 0 { - // Hashes are sorted so if the first op doesn't match, no - // more matches are possible. - break outer - } - break - } - if !opEqual(off, m, o) { - break - } - end++ - } - if end > longest { - longest = end - longestIdx = layer - - } - } - return ops[:longest], longestIdx -} - -func searchOp(order []hashIndex, hash uint64) int { - lo, hi := 0, len(order) - for lo < hi { - mid := (lo + hi) / 2 - if order[mid].hash < hash { - lo = mid + 1 - } else { - hi = mid - } - } - return lo -} - -func opEqual(off image.Point, o1 paintOp, o2 paintOp) bool { - if len(o1.clipStack) != len(o2.clipStack) { - return false - } - if o1.state != o2.state { - return false - } - if o1.offset.Sub(o2.offset) != off { - return false - } - for i, cl1 := range o1.clipStack { - cl2 := o2.clipStack[i] - if len(cl1.path) != len(cl2.path) { - return false - } - if cl1.state != cl2.state { - return false - } - if cl1.pathKey != cl2.pathKey && !bytes.Equal(cl1.path, cl2.path) { - return false - } - } - return true -} - -func encodeLayer(l layer, pos image.Point, viewport image.Point, enc *encoder, texOps []textureOp) { - off := pos.Sub(l.rect.Min) - offf := layout.FPt(off) - - enc.transform(f32.Affine2D{}.Offset(offf)) - for _, op := range l.ops { - encodeOp(viewport, off, enc, texOps, op) - } - enc.transform(f32.Affine2D{}.Offset(offf.Mul(-1))) -} - -func encodeOp(viewport image.Point, absOff image.Point, enc *encoder, texOps []textureOp, op paintOp) { - // Fill in clip bounds, which the shaders expect to be the union - // of all affected bounds. - var union f32.Rectangle - for i, cl := range op.clipStack { - union = union.Union(cl.absBounds) - op.clipStack[i].union = union - } - - absOfff := layout.FPt(absOff) - fillMode := scene.FillModeNonzero - opOff := layout.FPt(op.offset) - inv := f32.Affine2D{}.Offset(opOff) - enc.transform(inv) - for i := len(op.clipStack) - 1; i >= 0; i-- { - cl := op.clipStack[i] - if w := cl.state.strokeWidth; w > 0 { - enc.fillMode(scene.FillModeStroke) - enc.lineWidth(w) - fillMode = scene.FillModeStroke - } else if fillMode != scene.FillModeNonzero { - enc.fillMode(scene.FillModeNonzero) - fillMode = scene.FillModeNonzero - } - enc.transform(cl.state.relTrans) - inv = inv.Mul(cl.state.relTrans) - if len(cl.path) == 0 { - enc.rect(cl.state.bounds) - } else { - enc.encodePath(cl.path, fillMode) - } - if i != 0 { - enc.beginClip(cl.union.Add(absOfff)) - } - } - if len(op.clipStack) == 0 { - // No clipping; fill the entire view. - enc.rect(f32.Rectangle{Max: layout.FPt(viewport)}) - } - - switch op.state.matType { - case materialTexture: - texOp := texOps[op.texOpIdx] - off := texOp.matAlloc.alloc.rect.Min.Add(texOp.matAlloc.offset).Sub(texOp.off).Sub(absOff) - enc.fillImage(0, off) - case materialColor: - enc.fillColor(f32color.NRGBAToRGBA(op.state.color)) - case materialLinearGradient: - // TODO: implement. - enc.fillColor(f32color.NRGBAToRGBA(op.state.color1)) - default: - panic("not implemented") - } - enc.transform(inv.Invert()) - // Pop the clip stack, except the first entry used for fill. - for i := 1; i < len(op.clipStack); i++ { - cl := op.clipStack[i] - enc.endClip(cl.union.Add(absOfff)) - } - if fillMode != scene.FillModeNonzero { - enc.fillMode(scene.FillModeNonzero) - } -} - -func (c *collector) save(id int, state f32.Affine2D) { - if extra := id - len(c.states) + 1; extra > 0 { - c.states = append(c.states, make([]f32.Affine2D, extra)...) - } - c.states[id] = state -} - -func transformBounds(t f32.Affine2D, bounds f32.Rectangle) rectangle { - return rectangle{ - t.Transform(bounds.Min), t.Transform(f32.Pt(bounds.Max.X, bounds.Min.Y)), - t.Transform(bounds.Max), t.Transform(f32.Pt(bounds.Min.X, bounds.Max.Y)), - } -} - -func separateTransform(t f32.Affine2D) (f32.Affine2D, image.Point) { - sx, hx, ox, hy, sy, oy := t.Elems() - intx, fracx := math.Modf(float64(ox)) - inty, fracy := math.Modf(float64(oy)) - t = f32.NewAffine2D(sx, hx, float32(fracx), hy, sy, float32(fracy)) - return t, image.Pt(int(intx), int(inty)) -} - -func transformRect(t f32.Affine2D, r rectangle) rectangle { - var tr rectangle - for i, c := range r { - tr[i] = t.Transform(c) - } - return tr -} - -func (r rectangle) In(b f32.Rectangle) bool { - for _, c := range r { - inside := b.Min.X <= c.X && c.X <= b.Max.X && - b.Min.Y <= c.Y && c.Y <= b.Max.Y - if !inside { - return false - } - } - return true -} - -func (r rectangle) Contains(b f32.Rectangle) bool { - return true -} - -func (r rectangle) Bounds() f32.Rectangle { - bounds := f32.Rectangle{ - Min: f32.Pt(math.MaxFloat32, math.MaxFloat32), - Max: f32.Pt(-math.MaxFloat32, -math.MaxFloat32), - } - for _, c := range r { - if c.X < bounds.Min.X { - bounds.Min.X = c.X - } - if c.Y < bounds.Min.Y { - bounds.Min.Y = c.Y - } - if c.X > bounds.Max.X { - bounds.Max.X = c.X - } - if c.Y > bounds.Max.Y { - bounds.Max.Y = c.Y - } - } - return bounds -} diff --git a/gpu/cpu.go b/gpu/cpu.go deleted file mode 100644 index f2f84ad0..00000000 --- a/gpu/cpu.go +++ /dev/null @@ -1,129 +0,0 @@ -// SPDX-License-Identifier: Unlicense OR MIT - -package gpu - -import ( - "unsafe" - - "gioui.org/cpu" -) - -// This file contains code specific to running compute shaders on the CPU. - -// dispatcher dispatches CPU compute programs across multiple goroutines. -type dispatcher struct { - // done is notified when a worker completes its work slice. - done chan struct{} - // work receives work slice indices. It is closed when the dispatcher is released. - work chan work - // dispatch receives compute jobs, which is then split among workers. - dispatch chan dispatch - // sync receives notification when a Sync completes. - sync chan struct{} -} - -type work struct { - ctx *cpu.DispatchContext - index int -} - -type dispatch struct { - _type jobType - program *cpu.ProgramInfo - descSet unsafe.Pointer - x, y, z int -} - -type jobType uint8 - -const ( - jobDispatch jobType = iota - jobBarrier - jobSync -) - -func newDispatcher(workers int) *dispatcher { - d := &dispatcher{ - work: make(chan work, workers), - done: make(chan struct{}, workers), - // Leave some room to avoid blocking calls to Dispatch. - dispatch: make(chan dispatch, 20), - sync: make(chan struct{}), - } - for i := 0; i < workers; i++ { - go d.worker() - } - go d.dispatcher() - return d -} - -func (d *dispatcher) dispatcher() { - defer close(d.work) - var free []*cpu.DispatchContext - defer func() { - for _, ctx := range free { - ctx.Free() - } - }() - var used []*cpu.DispatchContext - for job := range d.dispatch { - switch job._type { - case jobDispatch: - if len(free) == 0 { - free = append(free, cpu.NewDispatchContext()) - } - ctx := free[len(free)-1] - free = free[:len(free)-1] - used = append(used, ctx) - ctx.Prepare(cap(d.work), job.program, job.descSet, job.x, job.y, job.z) - for i := 0; i < cap(d.work); i++ { - d.work <- work{ - ctx: ctx, - index: i, - } - } - case jobBarrier: - // Wait for all outstanding dispatches to complete. - for i := 0; i < len(used)*cap(d.work); i++ { - <-d.done - } - free = append(free, used...) - used = used[:0] - case jobSync: - d.sync <- struct{}{} - } - } -} - -func (d *dispatcher) worker() { - thread := cpu.NewThreadContext() - defer thread.Free() - for w := range d.work { - w.ctx.Dispatch(w.index, thread) - d.done <- struct{}{} - } -} - -func (d *dispatcher) Barrier() { - d.dispatch <- dispatch{_type: jobBarrier} -} - -func (d *dispatcher) Sync() { - d.dispatch <- dispatch{_type: jobSync} - <-d.sync -} - -func (d *dispatcher) Dispatch(program *cpu.ProgramInfo, descSet unsafe.Pointer, x, y, z int) { - d.dispatch <- dispatch{ - _type: jobDispatch, - program: program, - descSet: descSet, - x: x, - y: y, - z: z, - } -} - -func (d *dispatcher) Stop() { - close(d.dispatch) -} diff --git a/gpu/gpu.go b/gpu/gpu.go index ea5a6b02..b68ca541 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -9,11 +9,11 @@ package gpu import ( "encoding/binary" + "errors" "fmt" "image" "image/color" "math" - "os" "reflect" "time" "unsafe" @@ -343,13 +343,12 @@ func New(api API) (GPU, error) { func NewWithDevice(d driver.Device) (GPU, error) { d.BeginFrame(nil, false, image.Point{}) defer d.EndFrame() - forceCompute := os.Getenv("GIORENDERER") == "forcecompute" feats := d.Caps().Features switch { - case !forceCompute && feats.Has(driver.FeatureFloatRenderTargets) && feats.Has(driver.FeatureSRGB): + case feats.Has(driver.FeatureFloatRenderTargets) && feats.Has(driver.FeatureSRGB): return newGPU(d) } - return newCompute(d) + return nil, errors.New("no available GPU driver") } func newGPU(ctx driver.Device) (*gpu, error) { @@ -1484,7 +1483,7 @@ func (d *drawOps) buildVerts(pathData []byte, tr f32.Affine2D, outline bool, str // as needed and feeds them to the supplied splitter. func decodeToOutlineQuads(qs *quadSplitter, tr f32.Affine2D, pathData []byte) { for len(pathData) >= scene.CommandSize+4 { - qs.contour = bo.Uint32(pathData) + qs.contour = binary.LittleEndian.Uint32(pathData) cmd := ops.DecodeCommand(pathData[4:]) switch cmd.Op() { case scene.OpLine: @@ -1579,3 +1578,15 @@ func isPureOffset(t f32.Affine2D) bool { a, b, _, d, e, _ := t.Elems() return a == 1 && b == 0 && d == 0 && e == 1 } + +func newShaders(ctx driver.Device, vsrc, fsrc shader.Sources) (vert driver.VertexShader, frag driver.FragmentShader, err error) { + vert, err = ctx.NewVertexShader(vsrc) + if err != nil { + return + } + frag, err = ctx.NewFragmentShader(fsrc) + if err != nil { + vert.Release() + } + return +}