all: switch to external shaders in the gioui.org/shaders module

Signed-off-by: Elias Naur <mail@eliasnaur.com>
This commit is contained in:
Elias Naur
2021-08-02 17:46:40 +02:00
parent 18b4442393
commit 6aee543234
50 changed files with 112 additions and 11502 deletions
+22 -28
View File
@@ -19,6 +19,7 @@ import (
"time"
"unsafe"
"gioui.org/cpu"
"gioui.org/f32"
"gioui.org/gpu/internal/driver"
"gioui.org/internal/byteslice"
@@ -29,9 +30,9 @@ import (
"gioui.org/layout"
"gioui.org/op"
"gioui.org/op/clip"
"gioui.org/cpu"
"gioui.org/cpu/piet"
"gioui.org/shader"
"gioui.org/shader/gio"
"gioui.org/shader/piet"
)
type compute struct {
@@ -390,29 +391,22 @@ func newCompute(ctx driver.Device) (*compute, error) {
}
shaders := []struct {
prog *computeProgram
src driver.ShaderSources
src shader.Sources
info *cpu.ProgramInfo
hash string
}{
{&g.programs.elements, shader_elements_comp, piet.ElementsProgramInfo, piet.ElementsHash},
{&g.programs.tileAlloc, shader_tile_alloc_comp, piet.Tile_allocProgramInfo, piet.Tile_allocHash},
{&g.programs.pathCoarse, shader_path_coarse_comp, piet.Path_coarseProgramInfo, piet.Path_coarseHash},
{&g.programs.backdrop, shader_backdrop_comp, piet.BackdropProgramInfo, piet.BackdropHash},
{&g.programs.binning, shader_binning_comp, piet.BinningProgramInfo, piet.BinningHash},
{&g.programs.coarse, shader_coarse_comp, piet.CoarseProgramInfo, piet.CoarseHash},
{&g.programs.kernel4, shader_kernel4_comp, piet.Kernel4ProgramInfo, piet.Kernel4Hash},
{&g.programs.elements, piet.Shader_elements_comp, piet.ElementsProgramInfo},
{&g.programs.tileAlloc, piet.Shader_tile_alloc_comp, piet.Tile_allocProgramInfo},
{&g.programs.pathCoarse, piet.Shader_path_coarse_comp, piet.Path_coarseProgramInfo},
{&g.programs.backdrop, piet.Shader_backdrop_comp, piet.BackdropProgramInfo},
{&g.programs.binning, piet.Shader_binning_comp, piet.BinningProgramInfo},
{&g.programs.coarse, piet.Shader_coarse_comp, piet.CoarseProgramInfo},
{&g.programs.kernel4, piet.Shader_kernel4_comp, piet.Kernel4ProgramInfo},
}
if !caps.Features.Has(driver.FeatureCompute) {
g.useCPU = supportsCPUCompute
for _, s := range shaders {
if s.src.Hash != s.hash {
g.useCPU = false
break
}
}
if !g.useCPU {
if !supportsCPUCompute {
return nil, errors.New("gpu: missing support for compute programs")
}
g.useCPU = true
}
if g.useCPU {
g.dispatcher = newDispatcher(runtime.NumCPU())
@@ -420,15 +414,15 @@ func newCompute(ctx driver.Device) (*compute, error) {
// Large enough for reasonable fill sizes, yet still spannable by the compute programs.
g.output.packer.maxDim = 4096
blitProg, err := ctx.NewProgram(shader_copy_vert, shader_copy_frag)
blitProg, err := ctx.NewProgram(gio.Shader_copy_vert, gio.Shader_copy_frag)
if err != nil {
g.Release()
return nil, err
}
g.output.blitProg = blitProg
progLayout, err := ctx.NewInputLayout(shader_copy_vert, []driver.InputDesc{
{Type: driver.DataTypeFloat, Size: 2, Offset: 0},
{Type: driver.DataTypeFloat, Size: 2, Offset: 4 * 2},
progLayout, err := ctx.NewInputLayout(gio.Shader_copy_vert, []shader.InputDesc{
{Type: shader.DataTypeFloat, Size: 2, Offset: 0},
{Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2},
})
if err != nil {
g.Release()
@@ -445,15 +439,15 @@ func newCompute(ctx driver.Device) (*compute, error) {
g.output.uniBuf = buf
g.output.blitProg.SetVertexUniforms(buf)
materialProg, err := ctx.NewProgram(shader_material_vert, shader_material_frag)
materialProg, err := ctx.NewProgram(gio.Shader_material_vert, gio.Shader_material_frag)
if err != nil {
g.Release()
return nil, err
}
g.materials.prog = materialProg
progLayout, err = ctx.NewInputLayout(shader_material_vert, []driver.InputDesc{
{Type: driver.DataTypeFloat, Size: 2, Offset: 0},
{Type: driver.DataTypeFloat, Size: 2, Offset: 4 * 2},
progLayout, err = ctx.NewInputLayout(gio.Shader_material_vert, []shader.InputDesc{
{Type: shader.DataTypeFloat, Size: 2, Offset: 0},
{Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2},
})
if err != nil {
g.Release()
-5
View File
@@ -1,5 +0,0 @@
// SPDX-License-Identifier: Unlicense OR MIT
package gpu
//go:generate go run ./internal/convertshaders -package gpu
+33 -5
View File
@@ -9,12 +9,14 @@ package gpu
import (
"encoding/binary"
"errors"
"fmt"
"image"
"image/color"
"math"
"os"
"reflect"
"runtime/debug"
"time"
"unsafe"
@@ -29,6 +31,8 @@ import (
"gioui.org/layout"
"gioui.org/op"
"gioui.org/op/clip"
"gioui.org/shader"
"gioui.org/shader/gio"
// Register backends.
_ "gioui.org/gpu/internal/d3d11"
@@ -129,6 +133,10 @@ type imageOp struct {
place placement
}
// shaderModuleVersion is the exact version of gioui.org/shader expected by
// this package. Shader programs are not backwards or forwards compatible.
const shaderModuleVersion = "v0.0.0-20210808092941-55e18336189e"
func decodeStrokeOp(data []byte) clip.StrokeStyle {
_ = data[4]
if opconst.OpType(data[0]) != opconst.TypeStroke {
@@ -350,6 +358,9 @@ const (
)
func New(api API) (GPU, error) {
if err := verifyShaderModule(); err != nil {
return nil, err
}
d, err := driver.NewDevice(api)
if err != nil {
return nil, err
@@ -376,6 +387,23 @@ func newGPU(ctx driver.Device) (*gpu, error) {
return g, nil
}
func verifyShaderModule() error {
mod, ok := debug.ReadBuildInfo()
if !ok {
// No module support; hopefully the version matches.
return nil
}
for _, m := range mod.Deps {
if m.Path == "gioui.org/shader" {
if got := m.Version; got != shaderModuleVersion {
return fmt.Errorf("gpu: module gioui.org/shader is version %q, expected %q", got, shaderModuleVersion)
}
return nil
}
}
return errors.New("gpu: module version for gioui.org/shader not found")
}
func (g *gpu) init(ctx driver.Device) error {
g.ctx = ctx
g.renderer = newRenderer(ctx)
@@ -530,7 +558,7 @@ func newBlitter(ctx driver.Device) *blitter {
b.colUniforms = new(blitColUniforms)
b.texUniforms = new(blitTexUniforms)
b.linearGradientUniforms = new(blitLinearGradientUniforms)
prog, layout, err := createColorPrograms(ctx, shader_blit_vert, shader_blit_frag,
prog, layout, err := createColorPrograms(ctx, gio.Shader_blit_vert, gio.Shader_blit_frag,
[3]interface{}{&b.colUniforms.vert, &b.linearGradientUniforms.vert, &b.texUniforms.vert},
[3]interface{}{&b.colUniforms.frag, &b.linearGradientUniforms.frag, nil},
)
@@ -550,7 +578,7 @@ func (b *blitter) release() {
b.layout.Release()
}
func createColorPrograms(b driver.Device, vsSrc driver.ShaderSources, fsSrc [3]driver.ShaderSources, vertUniforms, fragUniforms [3]interface{}) ([3]*program, driver.InputLayout, error) {
func createColorPrograms(b driver.Device, vsSrc shader.Sources, fsSrc [3]shader.Sources, vertUniforms, fragUniforms [3]interface{}) ([3]*program, driver.InputLayout, error) {
var progs [3]*program
{
prog, err := b.NewProgram(vsSrc, fsSrc[materialTexture])
@@ -603,9 +631,9 @@ func createColorPrograms(b driver.Device, vsSrc driver.ShaderSources, fsSrc [3]d
}
progs[materialLinearGradient] = newProgram(prog, vertBuffer, fragBuffer)
}
layout, err := b.NewInputLayout(vsSrc, []driver.InputDesc{
{Type: driver.DataTypeFloat, Size: 2, Offset: 0},
{Type: driver.DataTypeFloat, Size: 2, Offset: 4 * 2},
layout, err := b.NewInputLayout(vsSrc, []shader.InputDesc{
{Type: shader.DataTypeFloat, Size: 2, Offset: 0},
{Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2},
})
if err != nil {
progs[materialTexture].Release()
+6 -4
View File
@@ -15,6 +15,8 @@ import (
"gioui.org/gpu/internal/driver"
"gioui.org/internal/byteslice"
"gioui.org/internal/f32color"
"gioui.org/shader"
"gioui.org/shader/gio"
)
var dumpImages = flag.Bool("saveimages", false, "save test images")
@@ -36,7 +38,7 @@ func TestSimpleShader(t *testing.T) {
b := newDriver(t)
sz := image.Point{X: 800, Y: 600}
fbo := setupFBO(t, b, sz)
p, err := b.NewProgram(shader_simple_vert, shader_simple_frag)
p, err := b.NewProgram(gio.Shader_simple_vert, gio.Shader_simple_frag)
if err != nil {
t.Fatal(err)
}
@@ -59,7 +61,7 @@ func TestInputShader(t *testing.T) {
b := newDriver(t)
sz := image.Point{X: 800, Y: 600}
fbo := setupFBO(t, b, sz)
p, err := b.NewProgram(shader_input_vert, shader_simple_frag)
p, err := b.NewProgram(gio.Shader_input_vert, gio.Shader_simple_frag)
if err != nil {
t.Fatal(err)
}
@@ -77,9 +79,9 @@ func TestInputShader(t *testing.T) {
}
defer buf.Release()
b.BindVertexBuffer(buf, 4*4, 0)
layout, err := b.NewInputLayout(shader_input_vert, []driver.InputDesc{
layout, err := b.NewInputLayout(gio.Shader_input_vert, []shader.InputDesc{
{
Type: driver.DataTypeFloat,
Type: shader.DataTypeFloat,
Size: 4,
Offset: 0,
},
-5
View File
@@ -1,5 +0,0 @@
// SPDX-License-Identifier: Unlicense OR MIT
package headless
//go:generate go run ../internal/convertshaders -package headless
-233
View File
@@ -1,233 +0,0 @@
// Code generated by build.go. DO NOT EDIT.
package headless
import "gioui.org/gpu/internal/driver"
var (
shader_input_vert = driver.ShaderSources{
Name: "input.vert",
Inputs: []driver.InputLocation{{Name: "position", Location: 0, Semantic: "TEXCOORD", SemanticIndex: 0, Type: 0x0, Size: 4}},
GLSL100ES: `#version 100
attribute vec4 position;
void main()
{
gl_Position = position;
}
`,
GLSL300ES: `#version 300 es
layout(location = 0) in vec4 position;
void main()
{
gl_Position = position;
}
`,
GLSL130: `#version 130
#ifdef GL_ARB_shading_language_420pack
#extension GL_ARB_shading_language_420pack : require
#endif
in vec4 position;
void main()
{
gl_Position = position;
}
`,
GLSL150: `#version 150
#ifdef GL_ARB_shading_language_420pack
#extension GL_ARB_shading_language_420pack : require
#endif
in vec4 position;
void main()
{
gl_Position = position;
}
`,
HLSL: "DXBC\x1e»\x11\xd3iX7\xd4F\xb9\xa4\xf4R\xf9J\x01\x00\x00\x00\x10\x02\x00\x00\x06\x00\x00\x008\x00\x00\x00\x9c\x00\x00\x00\xe0\x00\x00\x00\\\x01\x00\x00\xa8\x01\x00\x00\xdc\x01\x00\x00Aon9\\\x00\x00\x00\\\x00\x00\x00\x00\x02\xfe\xff4\x00\x00\x00(\x00\x00\x00\x00\x00$\x00\x00\x00$\x00\x00\x00$\x00\x00\x00$\x00\x01\x00$\x00\x00\x00\x00\x00\x00\x02\xfe\xff\x1f\x00\x00\x02\x05\x00\x00\x80\x00\x00\x0f\x90\x04\x00\x00\x04\x00\x00\x03\xc0\x00\x00\xff\x90\x00\x00\xe4\xa0\x00\x00\xe4\x90\x01\x00\x00\x02\x00\x00\f\xc0\x00\x00\xe4\x90\xff\xff\x00\x00SHDR<\x00\x00\x00@\x00\x01\x00\x0f\x00\x00\x00_\x00\x00\x03\xf2\x10\x10\x00\x00\x00\x00\x00g\x00\x00\x04\xf2 \x10\x00\x00\x00\x00\x00\x01\x00\x00\x006\x00\x00\x05\xf2 \x10\x00\x00\x00\x00\x00F\x1e\x10\x00\x00\x00\x00\x00>\x00\x00\x01STATt\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00RDEFD\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1c\x00\x00\x00\x00\x04\xfe\xff\x00\x01\x00\x00\x1c\x00\x00\x00Microsoft (R) HLSL Shader Compiler 10.1\x00ISGN,\x00\x00\x00\x01\x00\x00\x00\b\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x0f\x0f\x00\x00TEXCOORD\x00\xab\xab\xabOSGN,\x00\x00\x00\x01\x00\x00\x00\b\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00SV_Position\x00",
}
shader_simple_frag = driver.ShaderSources{
Name: "simple.frag",
GLSL100ES: `#version 100
precision mediump float;
precision highp int;
void main()
{
gl_FragData[0] = vec4(0.25, 0.550000011920928955078125, 0.75, 1.0);
}
`,
GLSL300ES: `#version 300 es
precision mediump float;
precision highp int;
layout(location = 0) out vec4 fragColor;
void main()
{
fragColor = vec4(0.25, 0.550000011920928955078125, 0.75, 1.0);
}
`,
GLSL130: `#version 130
#ifdef GL_ARB_shading_language_420pack
#extension GL_ARB_shading_language_420pack : require
#endif
out vec4 fragColor;
void main()
{
fragColor = vec4(0.25, 0.550000011920928955078125, 0.75, 1.0);
}
`,
GLSL150: `#version 150
#ifdef GL_ARB_shading_language_420pack
#extension GL_ARB_shading_language_420pack : require
#endif
out vec4 fragColor;
void main()
{
fragColor = vec4(0.25, 0.550000011920928955078125, 0.75, 1.0);
}
`,
HLSL: "DXBC\xf5F\xdef$)\xa8\xbbV\xeas\xb5ks\x12r\x01\x00\x00\x00\xdc\x01\x00\x00\x06\x00\x00\x008\x00\x00\x00\x90\x00\x00\x00\xd0\x00\x00\x00L\x01\x00\x00\x98\x01\x00\x00\xa8\x01\x00\x00Aon9P\x00\x00\x00P\x00\x00\x00\x00\x02\xff\xff,\x00\x00\x00$\x00\x00\x00\x00\x00$\x00\x00\x00$\x00\x00\x00$\x00\x00\x00$\x00\x00\x00$\x00\x00\x02\xff\xffQ\x00\x00\x05\x00\x00\x0f\xa0\x00\x00\x80>\xcd\xcc\f?\x00\x00@?\x00\x00\x80?\x01\x00\x00\x02\x00\b\x0f\x80\x00\x00\xe4\xa0\xff\xff\x00\x00SHDR8\x00\x00\x00@\x00\x00\x00\x0e\x00\x00\x00e\x00\x00\x03\xf2 \x10\x00\x00\x00\x00\x006\x00\x00\b\xf2 \x10\x00\x00\x00\x00\x00\x02@\x00\x00\x00\x00\x80>\xcd\xcc\f?\x00\x00@?\x00\x00\x80?>\x00\x00\x01STATt\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00RDEFD\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1c\x00\x00\x00\x00\x04\xff\xff\x00\x01\x00\x00\x1c\x00\x00\x00Microsoft (R) HLSL Shader Compiler 10.1\x00ISGN\b\x00\x00\x00\x00\x00\x00\x00\b\x00\x00\x00OSGN,\x00\x00\x00\x01\x00\x00\x00\b\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00SV_Target\x00\xab\xab",
}
shader_simple_vert = driver.ShaderSources{
Name: "simple.vert",
GLSL100ES: `#version 100
void main()
{
float x;
float y;
if (gl_VertexID == 0)
{
x = 0.0;
y = 0.5;
}
else
{
if (gl_VertexID == 1)
{
x = 0.5;
y = -0.5;
}
else
{
x = -0.5;
y = -0.5;
}
}
gl_Position = vec4(x, y, 0.5, 1.0);
}
`,
GLSL300ES: `#version 300 es
void main()
{
float x;
float y;
if (gl_VertexID == 0)
{
x = 0.0;
y = 0.5;
}
else
{
if (gl_VertexID == 1)
{
x = 0.5;
y = -0.5;
}
else
{
x = -0.5;
y = -0.5;
}
}
gl_Position = vec4(x, y, 0.5, 1.0);
}
`,
GLSL130: `#version 130
#ifdef GL_ARB_shading_language_420pack
#extension GL_ARB_shading_language_420pack : require
#endif
void main()
{
float x;
float y;
if (gl_VertexID == 0)
{
x = 0.0;
y = 0.5;
}
else
{
if (gl_VertexID == 1)
{
x = 0.5;
y = -0.5;
}
else
{
x = -0.5;
y = -0.5;
}
}
gl_Position = vec4(x, y, 0.5, 1.0);
}
`,
GLSL150: `#version 150
#ifdef GL_ARB_shading_language_420pack
#extension GL_ARB_shading_language_420pack : require
#endif
void main()
{
float x;
float y;
if (gl_VertexID == 0)
{
x = 0.0;
y = 0.5;
}
else
{
if (gl_VertexID == 1)
{
x = 0.5;
y = -0.5;
}
else
{
x = -0.5;
y = -0.5;
}
}
gl_Position = vec4(x, y, 0.5, 1.0);
}
`,
HLSL: "DXBC\xc8 \\\"\xec\xe9\xb2)@\xdf|Z(\xea\f\xb8\x01\x00\x00\x00H\x02\x00\x00\x05\x00\x00\x004\x00\x00\x00\x80\x00\x00\x00\xb4\x00\x00\x00\xe8\x00\x00\x00\xcc\x01\x00\x00RDEFD\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1c\x00\x00\x00\x00\x04\xfe\xff\x00\x01\x00\x00\x1c\x00\x00\x00Microsoft (R) HLSL Shader Compiler 10.1\x00ISGN,\x00\x00\x00\x01\x00\x00\x00\b\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00SV_VertexID\x00OSGN,\x00\x00\x00\x01\x00\x00\x00\b\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00SV_Position\x00SHDR\xdc\x00\x00\x00@\x00\x01\x007\x00\x00\x00`\x00\x00\x04\x12\x10\x10\x00\x00\x00\x00\x00\x06\x00\x00\x00g\x00\x00\x04\xf2 \x10\x00\x00\x00\x00\x00\x01\x00\x00\x00h\x00\x00\x02\x01\x00\x00\x00 \x00\x00\a\x12\x00\x10\x00\x00\x00\x00\x00\n\x10\x10\x00\x00\x00\x00\x00\x01@\x00\x00\x01\x00\x00\x007\x00\x00\x0f2\x00\x10\x00\x00\x00\x00\x00\x06\x00\x10\x00\x00\x00\x00\x00\x02@\x00\x00\x00\x00\x00?\x00\x00\x00\xbf\x00\x00\x00\x00\x00\x00\x00\x00\x02@\x00\x00\x00\x00\x00\xbf\x00\x00\x00\xbf\x00\x00\x00\x00\x00\x00\x00\x007\x00\x00\f2 \x10\x00\x00\x00\x00\x00\x06\x10\x10\x00\x00\x00\x00\x00F\x00\x10\x00\x00\x00\x00\x00\x02@\x00\x00\x00\x00\x00\x00\x00\x00\x00?\x00\x00\x00\x00\x00\x00\x00\x006\x00\x00\b\xc2 \x10\x00\x00\x00\x00\x00\x02@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00?\x00\x00\x80?>\x00\x00\x01STATt\x00\x00\x00\x05\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
}
)
-11
View File
@@ -1,11 +0,0 @@
#version 310 es
// SPDX-License-Identifier: Unlicense OR MIT
precision highp float;
layout(location=0) in vec4 position;
void main() {
gl_Position = position;
}
-11
View File
@@ -1,11 +0,0 @@
#version 310 es
// SPDX-License-Identifier: Unlicense OR MIT
precision mediump float;
layout(location = 0) out vec4 fragColor;
void main() {
fragColor = vec4(.25, .55, .75, 1.0);
}
-20
View File
@@ -1,20 +0,0 @@
#version 310 es
// SPDX-License-Identifier: Unlicense OR MIT
precision highp float;
void main() {
float x, y;
if (gl_VertexIndex == 0) {
x = 0.0;
y = .5;
} else if (gl_VertexIndex == 1) {
x = .5;
y = -.5;
} else {
x = -.5;
y = -.5;
}
gl_Position = vec4(x, y, 0.5, 1.0);
}
@@ -1,50 +0,0 @@
// SPDX-License-Identifier: Unlicense OR MIT
package main
import (
"bytes"
"fmt"
"io/ioutil"
"os/exec"
"path/filepath"
)
// GLSLValidator is OpenGL reference compiler.
type GLSLValidator struct {
Bin string
WorkDir WorkDir
}
func NewGLSLValidator() *GLSLValidator { return &GLSLValidator{Bin: "glslangValidator"} }
// Convert converts a glsl shader to spirv.
func (glsl *GLSLValidator) Convert(path, variant string, hlsl bool, input []byte) ([]byte, error) {
base := glsl.WorkDir.Path(filepath.Base(path), variant)
pathout := base + ".out"
cmd := exec.Command(glsl.Bin,
"--stdin",
"-I"+filepath.Dir(path),
"-V", // OpenGL ES 3.1.
"-w", // Suppress warnings.
"-S", filepath.Ext(path)[1:],
"-o", pathout,
)
if hlsl {
cmd.Args = append(cmd.Args, "-DHLSL")
}
cmd.Stdin = bytes.NewBuffer(input)
out, err := cmd.Output()
if err != nil {
return nil, fmt.Errorf("%s\nfailed to run %v: %w", out, cmd.Args, err)
}
compiled, err := ioutil.ReadFile(pathout)
if err != nil {
return nil, fmt.Errorf("unable to read output %q: %w", pathout, err)
}
return compiled, nil
}
-146
View File
@@ -1,146 +0,0 @@
// SPDX-License-Identifier: Unlicense OR MIT
package main
import (
"bytes"
"fmt"
"io"
"io/ioutil"
"os/exec"
"path/filepath"
"runtime"
"strings"
)
// FXC is hlsl compiler that targets ShaderModel 5.x and lower.
type FXC struct {
Bin string
WorkDir WorkDir
}
func NewFXC() *FXC { return &FXC{Bin: "fxc.exe"} }
// Compile compiles the input shader.
func (fxc *FXC) Compile(path, variant string, input []byte, entryPoint string, profileVersion string) (string, error) {
base := fxc.WorkDir.Path(filepath.Base(path), variant, profileVersion)
pathin := base + ".in"
pathout := base + ".out"
result := pathout
if err := fxc.WorkDir.WriteFile(pathin, input); err != nil {
return "", fmt.Errorf("unable to write shader to disk: %w", err)
}
cmd := exec.Command(fxc.Bin)
if runtime.GOOS != "windows" {
cmd = exec.Command("wine", fxc.Bin)
if err := winepath(&pathin, &pathout); err != nil {
return "", err
}
}
var profile string
switch filepath.Ext(path) {
case ".frag":
profile = "ps_" + profileVersion
case ".vert":
profile = "vs_" + profileVersion
case ".comp":
profile = "cs_" + profileVersion
default:
return "", fmt.Errorf("unrecognized shader type %s", path)
}
cmd.Args = append(cmd.Args,
"/Fo", pathout,
"/T", profile,
"/E", entryPoint,
pathin,
)
output, err := cmd.CombinedOutput()
if err != nil {
info := ""
if runtime.GOOS != "windows" {
info = "If the fxc tool cannot be found, set WINEPATH to the Windows path for the Windows SDK.\n"
}
return "", fmt.Errorf("%s\n%sfailed to run %v: %w", output, info, cmd.Args, err)
}
compiled, err := ioutil.ReadFile(result)
if err != nil {
return "", fmt.Errorf("unable to read output %q: %w", pathout, err)
}
return string(compiled), nil
}
// DXC is hlsl compiler that targets ShaderModel 6.0 and newer.
type DXC struct {
Bin string
WorkDir WorkDir
}
func NewDXC() *DXC { return &DXC{Bin: "dxc"} }
// Compile compiles the input shader.
func (dxc *DXC) Compile(path, variant string, input []byte, entryPoint string, profile string) (string, error) {
base := dxc.WorkDir.Path(filepath.Base(path), variant, profile)
pathin := base + ".in"
pathout := base + ".out"
result := pathout
if err := dxc.WorkDir.WriteFile(pathin, input); err != nil {
return "", fmt.Errorf("unable to write shader to disk: %w", err)
}
cmd := exec.Command(dxc.Bin)
cmd.Args = append(cmd.Args,
"-Fo", pathout,
"-T", profile,
"-E", entryPoint,
"-Qstrip_reflect",
pathin,
)
output, err := cmd.CombinedOutput()
if err != nil {
return "", fmt.Errorf("%s\nfailed to run %v: %w", output, cmd.Args, err)
}
compiled, err := ioutil.ReadFile(result)
if err != nil {
return "", fmt.Errorf("unable to read output %q: %w", pathout, err)
}
return string(compiled), nil
}
// winepath uses the winepath tool to convert a paths to Windows format.
// The returned path can be used as arguments for Windows command line tools.
func winepath(paths ...*string) error {
winepath := exec.Command("winepath", "--windows")
for _, path := range paths {
winepath.Args = append(winepath.Args, *path)
}
// Use a pipe instead of Output, because winepath may have left wineserver
// running for several seconds as a grandchild.
out, err := winepath.StdoutPipe()
if err != nil {
return fmt.Errorf("unable to start winepath: %w", err)
}
if err := winepath.Start(); err != nil {
return fmt.Errorf("unable to start winepath: %w", err)
}
var buf bytes.Buffer
if _, err := io.Copy(&buf, out); err != nil {
return fmt.Errorf("unable to run winepath: %w", err)
}
winPaths := strings.Split(strings.TrimSpace(buf.String()), "\n")
for i, path := range paths {
*path = winPaths[i]
}
return nil
}
-418
View File
@@ -1,418 +0,0 @@
// SPDX-License-Identifier: Unlicense OR MIT
package main
import (
"bytes"
"crypto/sha256"
"encoding/hex"
"errors"
"flag"
"fmt"
"io"
"io/ioutil"
"os"
"os/exec"
"path/filepath"
"sort"
"strconv"
"strings"
"sync"
"text/template"
"gioui.org/gpu/internal/driver"
)
func main() {
packageName := flag.String("package", "", "specify Go package name")
workdir := flag.String("work", "", "temporary working directory (default TEMP)")
shadersDir := flag.String("dir", "shaders", "shaders directory")
directCompute := flag.Bool("directcompute", false, "enable compiling DirectCompute shaders")
flag.Parse()
var work WorkDir
cleanup := func() {}
if *workdir == "" {
tempdir, err := ioutil.TempDir("", "shader-convert")
if err != nil {
fmt.Fprintf(os.Stderr, "failed to create tempdir: %v\n", err)
os.Exit(1)
}
cleanup = func() { os.RemoveAll(tempdir) }
defer cleanup()
work = WorkDir(tempdir)
} else {
if abs, err := filepath.Abs(*workdir); err == nil {
*workdir = abs
}
work = WorkDir(*workdir)
}
var out bytes.Buffer
conv := NewConverter(work, *packageName, *shadersDir, *directCompute)
if err := conv.Run(&out); err != nil {
fmt.Fprintf(os.Stderr, "%v\n", err)
cleanup()
os.Exit(1)
}
if err := ioutil.WriteFile("shaders.go", out.Bytes(), 0644); err != nil {
fmt.Fprintf(os.Stderr, "failed to create shaders: %v\n", err)
cleanup()
os.Exit(1)
}
cmd := exec.Command("gofmt", "-s", "-w", "shaders.go")
cmd.Stdout, cmd.Stderr = os.Stdout, os.Stderr
if err := cmd.Run(); err != nil {
fmt.Fprintf(os.Stderr, "formatting shaders.go failed: %v\n", err)
cleanup()
os.Exit(1)
}
}
type Converter struct {
workDir WorkDir
shadersDir string
directCompute bool
packageName string
glslvalidator *GLSLValidator
spirv *SPIRVCross
fxc *FXC
}
func NewConverter(workDir WorkDir, packageName, shadersDir string, directCompute bool) *Converter {
if abs, err := filepath.Abs(shadersDir); err == nil {
shadersDir = abs
}
conv := &Converter{}
conv.workDir = workDir
conv.shadersDir = shadersDir
conv.directCompute = directCompute
conv.packageName = packageName
conv.glslvalidator = NewGLSLValidator()
conv.spirv = NewSPIRVCross()
conv.fxc = NewFXC()
verifyBinaryPath(&conv.glslvalidator.Bin)
verifyBinaryPath(&conv.spirv.Bin)
// We cannot check fxc since it may depend on wine.
conv.glslvalidator.WorkDir = workDir.Dir("glslvalidator")
conv.fxc.WorkDir = workDir.Dir("fxc")
conv.spirv.WorkDir = workDir.Dir("spirv")
return conv
}
func verifyBinaryPath(bin *string) {
new, err := exec.LookPath(*bin)
if err != nil {
fmt.Fprintf(os.Stderr, "unable to find %q: %v\n", *bin, err)
} else {
*bin = new
}
}
func (conv *Converter) Run(out io.Writer) error {
shaders, err := filepath.Glob(filepath.Join(conv.shadersDir, "*"))
if len(shaders) == 0 || err != nil {
return fmt.Errorf("failed to list shaders in %q: %w", conv.shadersDir, err)
}
sort.Strings(shaders)
var workers Workers
type ShaderResult struct {
Path string
Shaders []driver.ShaderSources
Error error
}
shaderResults := make([]ShaderResult, len(shaders))
for i, shaderPath := range shaders {
i, shaderPath := i, shaderPath
switch filepath.Ext(shaderPath) {
case ".vert", ".frag":
workers.Go(func() {
shaders, err := conv.Shader(shaderPath)
shaderResults[i] = ShaderResult{
Path: shaderPath,
Shaders: shaders,
Error: err,
}
})
case ".comp":
workers.Go(func() {
shaders, err := conv.ComputeShader(shaderPath)
shaderResults[i] = ShaderResult{
Path: shaderPath,
Shaders: shaders,
Error: err,
}
})
default:
continue
}
}
workers.Wait()
var allErrors string
for _, r := range shaderResults {
if r.Error != nil {
if len(allErrors) > 0 {
allErrors += "\n\n"
}
allErrors += "--- " + r.Path + " --- \n\n" + r.Error.Error() + "\n"
}
}
if len(allErrors) > 0 {
return errors.New(allErrors)
}
fmt.Fprintf(out, "// Code generated by build.go. DO NOT EDIT.\n\n")
fmt.Fprintf(out, "package %s\n\n", conv.packageName)
fmt.Fprintf(out, "import %q\n\n", "gioui.org/gpu/internal/driver")
fmt.Fprintf(out, "var (\n")
for _, r := range shaderResults {
if len(r.Shaders) == 0 {
continue
}
name := filepath.Base(r.Path)
name = strings.ReplaceAll(name, ".", "_")
fmt.Fprintf(out, "\tshader_%s = ", name)
multiVariant := len(r.Shaders) > 1
if multiVariant {
fmt.Fprintf(out, "[...]driver.ShaderSources{\n")
}
for _, src := range r.Shaders {
fmt.Fprintf(out, "driver.ShaderSources{\n")
fmt.Fprintf(out, "Name: %#v,\n", src.Name)
if len(src.Inputs) > 0 {
fmt.Fprintf(out, "Inputs: %#v,\n", src.Inputs)
}
if u := src.Uniforms; len(u.Blocks) > 0 {
fmt.Fprintf(out, "Uniforms: driver.UniformsReflection{\n")
fmt.Fprintf(out, "Blocks: %#v,\n", u.Blocks)
fmt.Fprintf(out, "Locations: %#v,\n", u.Locations)
fmt.Fprintf(out, "Size: %d,\n", u.Size)
fmt.Fprintf(out, "},\n")
}
if len(src.Textures) > 0 {
fmt.Fprintf(out, "Textures: %#v,\n", src.Textures)
}
if len(src.GLSL100ES) > 0 {
fmt.Fprintf(out, "GLSL100ES: `%s`,\n", src.GLSL100ES)
}
if len(src.GLSL300ES) > 0 {
fmt.Fprintf(out, "GLSL300ES: `%s`,\n", src.GLSL300ES)
}
if len(src.GLSL310ES) > 0 {
fmt.Fprintf(out, "GLSL310ES: `%s`,\n", src.GLSL310ES)
}
if len(src.GLSL130) > 0 {
fmt.Fprintf(out, "GLSL130: `%s`,\n", src.GLSL130)
}
if len(src.GLSL150) > 0 {
fmt.Fprintf(out, "GLSL150: `%s`,\n", src.GLSL150)
}
if len(src.HLSL) > 0 {
fmt.Fprintf(out, "HLSL: %q,\n", src.HLSL)
}
if len(src.Hash) > 0 {
fmt.Fprintf(out, "Hash: %q,\n", src.Hash)
}
fmt.Fprintf(out, "}")
if multiVariant {
fmt.Fprintf(out, ",")
}
fmt.Fprintf(out, "\n")
}
if multiVariant {
fmt.Fprintf(out, "}\n")
}
}
fmt.Fprintf(out, ")\n")
return nil
}
func (conv *Converter) Shader(shaderPath string) ([]driver.ShaderSources, error) {
type Variant struct {
FetchColorExpr string
Header string
}
variantArgs := [...]Variant{
{
FetchColorExpr: `_color.color`,
Header: `layout(binding=0) uniform Color { vec4 color; } _color;`,
},
{
FetchColorExpr: `mix(_gradient.color1, _gradient.color2, clamp(vUV.x, 0.0, 1.0))`,
Header: `layout(binding=0) uniform Gradient { vec4 color1; vec4 color2; } _gradient;`,
},
{
FetchColorExpr: `texture(tex, vUV)`,
Header: `layout(binding=0) uniform sampler2D tex;`,
},
}
shaderTemplate, err := template.ParseFiles(shaderPath)
if err != nil {
return nil, fmt.Errorf("failed to parse template %q: %w", shaderPath, err)
}
var variants []driver.ShaderSources
for i, variantArg := range variantArgs {
variantName := strconv.Itoa(i)
var buf bytes.Buffer
err := shaderTemplate.Execute(&buf, variantArg)
if err != nil {
return nil, fmt.Errorf("failed to execute template %q with %#v: %w", shaderPath, variantArg, err)
}
var sources driver.ShaderSources
sources.Name = filepath.Base(shaderPath)
// Ignore error; some shaders are not meant to run in GLSL 1.00.
sources.GLSL100ES, _, _ = conv.ShaderVariant(shaderPath, variantName, buf.Bytes(), "es", "100")
var metadata Metadata
sources.GLSL300ES, metadata, err = conv.ShaderVariant(shaderPath, variantName, buf.Bytes(), "es", "300")
if err != nil {
return nil, fmt.Errorf("failed to convert GLSL300ES:\n%w", err)
}
sources.GLSL130, _, err = conv.ShaderVariant(shaderPath, variantName, buf.Bytes(), "glsl", "130")
if err != nil {
return nil, fmt.Errorf("failed to convert GLSL130:\n%w", err)
}
hlsl, _, err := conv.ShaderVariant(shaderPath, variantName, buf.Bytes(), "hlsl", "40")
if err != nil {
return nil, fmt.Errorf("failed to convert HLSL:\n%w", err)
}
sources.HLSL, err = conv.fxc.Compile(shaderPath, variantName, []byte(hlsl), "main", "4_0_level_9_1")
if err != nil {
// Attempt shader model 4.0. Only the gpu/headless
// test shaders use features not supported by level
// 9.1.
sources.HLSL, err = conv.fxc.Compile(shaderPath, variantName, []byte(hlsl), "main", "4_0")
if err != nil {
return nil, fmt.Errorf("failed to compile HLSL: %w", err)
}
}
sources.GLSL150, _, err = conv.ShaderVariant(shaderPath, variantName, buf.Bytes(), "glsl", "150")
if err != nil {
return nil, fmt.Errorf("failed to convert GLSL150:\n%w", err)
}
sources.Uniforms = metadata.Uniforms
sources.Inputs = metadata.Inputs
sources.Textures = metadata.Textures
variants = append(variants, sources)
}
// If the shader don't use the variant arguments, output only a single version.
if variants[0].GLSL100ES == variants[1].GLSL100ES {
variants = variants[:1]
}
return variants, nil
}
func (conv *Converter) ShaderVariant(shaderPath, variant string, src []byte, lang, profile string) (string, Metadata, error) {
spirv, err := conv.glslvalidator.Convert(shaderPath, variant, lang == "hlsl", src)
if err != nil {
return "", Metadata{}, fmt.Errorf("failed to generate SPIR-V for %q: %w", shaderPath, err)
}
dst, err := conv.spirv.Convert(shaderPath, variant, spirv, lang, profile)
if err != nil {
return "", Metadata{}, fmt.Errorf("failed to convert shader %q: %w", shaderPath, err)
}
meta, err := conv.spirv.Metadata(shaderPath, variant, spirv)
if err != nil {
return "", Metadata{}, fmt.Errorf("failed to extract metadata for shader %q: %w", shaderPath, err)
}
return dst, meta, nil
}
func (conv *Converter) ComputeShader(shaderPath string) ([]driver.ShaderSources, error) {
shader, err := ioutil.ReadFile(shaderPath)
if err != nil {
return nil, fmt.Errorf("failed to load shader %q: %w", shaderPath, err)
}
spirv, err := conv.glslvalidator.Convert(shaderPath, "", false, shader)
if err != nil {
return nil, fmt.Errorf("failed to convert compute shader %q: %w", shaderPath, err)
}
var sources driver.ShaderSources
sources.Name = filepath.Base(shaderPath)
sum := sha256.Sum256(shader)
sources.Hash = hex.EncodeToString(sum[:])
sources.GLSL310ES, err = conv.spirv.Convert(shaderPath, "", spirv, "es", "310")
if err != nil {
return nil, fmt.Errorf("failed to convert es compute shader %q: %w", shaderPath, err)
}
sources.GLSL310ES = unixLineEnding(sources.GLSL310ES)
hlslSource, err := conv.spirv.Convert(shaderPath, "", spirv, "hlsl", "50")
if err != nil {
return nil, fmt.Errorf("failed to convert hlsl compute shader %q: %w", shaderPath, err)
}
dxil, err := conv.fxc.Compile(shaderPath, "0", []byte(hlslSource), "main", "5_0")
if err != nil {
return nil, fmt.Errorf("failed to compile hlsl compute shader %q: %w", shaderPath, err)
}
if conv.directCompute {
sources.HLSL = dxil
}
return []driver.ShaderSources{sources}, nil
}
// Workers implements wait group with synchronous logging.
type Workers struct {
running sync.WaitGroup
}
func (lg *Workers) Go(fn func()) {
lg.running.Add(1)
go func() {
defer lg.running.Done()
fn()
}()
}
func (lg *Workers) Wait() {
lg.running.Wait()
}
func unixLineEnding(s string) string {
return strings.ReplaceAll(s, "\r\n", "\n")
}
-212
View File
@@ -1,212 +0,0 @@
// SPDX-License-Identifier: Unlicense OR MIT
package main
import (
"encoding/json"
"fmt"
"os/exec"
"path/filepath"
"sort"
"strings"
"gioui.org/gpu/internal/driver"
)
// Metadata contains reflection data about a shader.
type Metadata struct {
Uniforms driver.UniformsReflection
Inputs []driver.InputLocation
Textures []driver.TextureBinding
}
// SPIRVCross cross-compiles spirv shaders to es, hlsl and others.
type SPIRVCross struct {
Bin string
WorkDir WorkDir
}
func NewSPIRVCross() *SPIRVCross { return &SPIRVCross{Bin: "spirv-cross"} }
// Convert converts compute shader from spirv format to a target format.
func (spirv *SPIRVCross) Convert(path, variant string, shader []byte, target, version string) (string, error) {
base := spirv.WorkDir.Path(filepath.Base(path), variant)
if err := spirv.WorkDir.WriteFile(base, shader); err != nil {
return "", fmt.Errorf("unable to write shader to disk: %w", err)
}
var cmd *exec.Cmd
switch target {
case "glsl":
cmd = exec.Command(spirv.Bin,
"--no-es",
"--version", version,
)
case "es":
cmd = exec.Command(spirv.Bin,
"--es",
"--version", version,
)
case "hlsl":
cmd = exec.Command(spirv.Bin,
"--hlsl",
"--shader-model", version,
)
default:
return "", fmt.Errorf("unknown target %q", target)
}
cmd.Args = append(cmd.Args, "--no-420pack-extension", base)
out, err := cmd.CombinedOutput()
if err != nil {
return "", fmt.Errorf("%s\nfailed to run %v: %w", out, cmd.Args, err)
}
s := string(out)
if target != "hlsl" {
// Strip Windows \r in line endings.
s = unixLineEnding(s)
}
return s, nil
}
// Metadata extracts metadata for a SPIR-V shader.
func (spirv *SPIRVCross) Metadata(path, variant string, shader []byte) (Metadata, error) {
base := spirv.WorkDir.Path(filepath.Base(path), variant)
if err := spirv.WorkDir.WriteFile(base, shader); err != nil {
return Metadata{}, fmt.Errorf("unable to write shader to disk: %w", err)
}
cmd := exec.Command(spirv.Bin,
base,
"--reflect",
)
out, err := cmd.Output()
if err != nil {
return Metadata{}, fmt.Errorf("failed to run %v: %w", cmd.Args, err)
}
meta, err := parseMetadata(out)
if err != nil {
return Metadata{}, fmt.Errorf("%s\nfailed to parse metadata: %w", out, err)
}
return meta, nil
}
func parseMetadata(data []byte) (Metadata, error) {
var reflect struct {
Types map[string]struct {
Name string `json:"name"`
Members []struct {
Name string `json:"name"`
Type string `json:"type"`
Offset int `json:"offset"`
} `json:"members"`
} `json:"types"`
Inputs []struct {
Name string `json:"name"`
Type string `json:"type"`
Location int `json:"location"`
} `json:"inputs"`
Textures []struct {
Name string `json:"name"`
Type string `json:"type"`
Set int `json:"set"`
Binding int `json:"binding"`
} `json:"textures"`
UBOs []struct {
Name string `json:"name"`
Type string `json:"type"`
BlockSize int `json:"block_size"`
Set int `json:"set"`
Binding int `json:"binding"`
} `json:"ubos"`
}
if err := json.Unmarshal(data, &reflect); err != nil {
return Metadata{}, fmt.Errorf("failed to parse reflection data: %w", err)
}
var m Metadata
for _, input := range reflect.Inputs {
dataType, dataSize, err := parseDataType(input.Type)
if err != nil {
return Metadata{}, fmt.Errorf("parseReflection: %v", err)
}
m.Inputs = append(m.Inputs, driver.InputLocation{
Name: input.Name,
Location: input.Location,
Semantic: "TEXCOORD",
SemanticIndex: input.Location,
Type: dataType,
Size: dataSize,
})
}
sort.Slice(m.Inputs, func(i, j int) bool {
return m.Inputs[i].Location < m.Inputs[j].Location
})
blockOffset := 0
for _, block := range reflect.UBOs {
m.Uniforms.Blocks = append(m.Uniforms.Blocks, driver.UniformBlock{
Name: block.Name,
Binding: block.Binding,
})
t := reflect.Types[block.Type]
// By convention uniform block variables are named by prepending an underscore
// and converting to lowercase.
blockVar := "_" + strings.ToLower(block.Name)
for _, member := range t.Members {
dataType, size, err := parseDataType(member.Type)
if err != nil {
return Metadata{}, fmt.Errorf("failed to parse reflection data: %v", err)
}
m.Uniforms.Locations = append(m.Uniforms.Locations, driver.UniformLocation{
Name: fmt.Sprintf("%s.%s", blockVar, member.Name),
Type: dataType,
Size: size,
Offset: blockOffset + member.Offset,
})
}
blockOffset += block.BlockSize
}
m.Uniforms.Size = blockOffset
for _, texture := range reflect.Textures {
m.Textures = append(m.Textures, driver.TextureBinding{
Name: texture.Name,
Binding: texture.Binding,
})
}
//return m, fmt.Errorf("not yet!: %+v", reflect)
return m, nil
}
func parseDataType(t string) (driver.DataType, int, error) {
switch t {
case "float":
return driver.DataTypeFloat, 1, nil
case "vec2":
return driver.DataTypeFloat, 2, nil
case "vec3":
return driver.DataTypeFloat, 3, nil
case "vec4":
return driver.DataTypeFloat, 4, nil
case "int":
return driver.DataTypeInt, 1, nil
case "int2":
return driver.DataTypeInt, 2, nil
case "int3":
return driver.DataTypeInt, 3, nil
case "int4":
return driver.DataTypeInt, 4, nil
default:
return 0, 0, fmt.Errorf("unsupported input data type: %s", t)
}
}
-35
View File
@@ -1,35 +0,0 @@
// SPDX-License-Identifier: Unlicense OR MIT
package main
import (
"fmt"
"io/ioutil"
"os"
"path/filepath"
"strings"
)
type WorkDir string
func (wd WorkDir) Dir(path string) WorkDir {
dirname := filepath.Join(string(wd), path)
if err := os.Mkdir(dirname, 0755); err != nil {
if !os.IsExist(err) {
fmt.Fprintf(os.Stderr, "failed to create %q: %v\n", dirname, err)
}
}
return WorkDir(dirname)
}
func (wd WorkDir) Path(path ...string) (fullpath string) {
return filepath.Join(string(wd), strings.Join(path, "."))
}
func (wd WorkDir) WriteFile(path string, data []byte) error {
err := ioutil.WriteFile(path, data, 0644)
if err != nil {
return fmt.Errorf("unable to create %v: %w", path, err)
}
return nil
}
+9 -8
View File
@@ -14,6 +14,7 @@ import (
"gioui.org/gpu/internal/driver"
"gioui.org/internal/d3d11"
"gioui.org/shader"
)
type Backend struct {
@@ -287,7 +288,7 @@ func (b *Backend) NewFramebuffer(tex driver.Texture) (driver.Framebuffer, error)
return fbo, nil
}
func (b *Backend) NewInputLayout(vertexShader driver.ShaderSources, layout []driver.InputDesc) (driver.InputLayout, error) {
func (b *Backend) NewInputLayout(vertexShader shader.Sources, layout []shader.InputDesc) (driver.InputLayout, error) {
if len(vertexShader.Inputs) != len(layout) {
return nil, fmt.Errorf("NewInputLayout: got %d inputs, expected %d", len(layout), len(vertexShader.Inputs))
}
@@ -300,7 +301,7 @@ func (b *Backend) NewInputLayout(vertexShader driver.ShaderSources, layout []dri
}
var format uint32
switch l.Type {
case driver.DataTypeFloat:
case shader.DataTypeFloat:
switch l.Size {
case 1:
format = d3d11.DXGI_FORMAT_R32_FLOAT
@@ -313,7 +314,7 @@ func (b *Backend) NewInputLayout(vertexShader driver.ShaderSources, layout []dri
default:
panic("unsupported data size")
}
case driver.DataTypeShort:
case shader.DataTypeShort:
switch l.Size {
case 1:
format = d3d11.DXGI_FORMAT_R16_SINT
@@ -332,7 +333,7 @@ func (b *Backend) NewInputLayout(vertexShader driver.ShaderSources, layout []dri
AlignedByteOffset: uint32(l.Offset),
}
}
l, err := b.dev.CreateInputLayout(descs, []byte(vertexShader.HLSL))
l, err := b.dev.CreateInputLayout(descs, []byte(vertexShader.DXBC))
if err != nil {
return nil, err
}
@@ -380,16 +381,16 @@ func (b *Backend) NewImmutableBuffer(typ driver.BufferBinding, data []byte) (dri
return &Buffer{backend: b, buf: buf, bind: bind, immutable: true}, nil
}
func (b *Backend) NewComputeProgram(shader driver.ShaderSources) (driver.Program, error) {
func (b *Backend) NewComputeProgram(shader shader.Sources) (driver.Program, error) {
panic("not implemented")
}
func (b *Backend) NewProgram(vertexShader, fragmentShader driver.ShaderSources) (driver.Program, error) {
vs, err := b.dev.CreateVertexShader([]byte(vertexShader.HLSL))
func (b *Backend) NewProgram(vertexShader, fragmentShader shader.Sources) (driver.Program, error) {
vs, err := b.dev.CreateVertexShader([]byte(vertexShader.DXBC))
if err != nil {
return nil, err
}
ps, err := b.dev.CreatePixelShader([]byte(fragmentShader.HLSL))
ps, err := b.dev.CreatePixelShader([]byte(fragmentShader.DXBC))
if err != nil {
return nil, err
}
+5 -68
View File
@@ -6,6 +6,8 @@ import (
"errors"
"image"
"time"
"gioui.org/shader"
)
// Device represents the abstraction of underlying GPU
@@ -23,9 +25,9 @@ type Device interface {
NewFramebuffer(tex Texture) (Framebuffer, error)
NewImmutableBuffer(typ BufferBinding, data []byte) (Buffer, error)
NewBuffer(typ BufferBinding, size int) (Buffer, error)
NewComputeProgram(shader ShaderSources) (Program, error)
NewProgram(vertexShader, fragmentShader ShaderSources) (Program, error)
NewInputLayout(vertexShader ShaderSources, layout []InputDesc) (InputLayout, error)
NewComputeProgram(shader shader.Sources) (Program, error)
NewProgram(vertexShader, fragmentShader shader.Sources) (Program, error)
NewInputLayout(vertexShader shader.Sources, layout []shader.InputDesc) (InputLayout, error)
Clear(r, g, b, a float32)
Viewport(x, y, width, height int)
@@ -49,63 +51,6 @@ type Device interface {
Release()
}
type ShaderSources struct {
Name string
GLSL100ES string
GLSL300ES string
GLSL310ES string
GLSL130 string
GLSL150 string
HLSL string
Uniforms UniformsReflection
Inputs []InputLocation
Textures []TextureBinding
Hash string
}
type UniformsReflection struct {
Blocks []UniformBlock
Locations []UniformLocation
Size int
}
type TextureBinding struct {
Name string
Binding int
}
type UniformBlock struct {
Name string
Binding int
}
type UniformLocation struct {
Name string
Type DataType
Size int
Offset int
}
type InputLocation struct {
// For GLSL.
Name string
Location int
// For HLSL.
Semantic string
SemanticIndex int
Type DataType
Size int
}
// InputDesc describes a vertex attribute as laid out in a Buffer.
type InputDesc struct {
Type DataType
Size int
Offset int
}
// InputLayout is the driver specific representation of the mapping
// between Buffers and shader attributes.
type InputLayout interface {
@@ -123,8 +68,6 @@ type TextureFormat uint8
type BufferBinding uint8
type DataType uint8
type Features uint
type Caps struct {
@@ -167,12 +110,6 @@ type Texture interface {
Release()
}
const (
DataTypeFloat DataType = iota
DataTypeInt
DataTypeShort
)
const (
BufferBindingIndices BufferBinding = 1 << iota
BufferBindingVertices
+15 -14
View File
@@ -12,6 +12,7 @@ import (
"gioui.org/gpu/internal/driver"
"gioui.org/internal/gl"
"gioui.org/shader"
)
// Backend implements driver.Device.
@@ -139,13 +140,13 @@ type uniformsTracker struct {
type uniformLocation struct {
uniform gl.Uniform
offset int
typ driver.DataType
typ shader.DataType
size int
}
type gpuInputLayout struct {
inputs []driver.InputLocation
layout []driver.InputDesc
inputs []shader.InputLocation
layout []shader.InputDesc
}
// textureTriple holds the type settings for
@@ -846,7 +847,7 @@ func (b *Backend) Clear(colR, colG, colB, colA float32) {
b.funcs.Clear(gl.COLOR_BUFFER_BIT)
}
func (b *Backend) NewInputLayout(vs driver.ShaderSources, layout []driver.InputDesc) (driver.InputLayout, error) {
func (b *Backend) NewInputLayout(vs shader.Sources, layout []shader.InputDesc) (driver.InputLayout, error) {
if len(vs.Inputs) != len(layout) {
return nil, fmt.Errorf("NewInputLayout: got %d inputs, expected %d", len(layout), len(vs.Inputs))
}
@@ -861,7 +862,7 @@ func (b *Backend) NewInputLayout(vs driver.ShaderSources, layout []driver.InputD
}, nil
}
func (b *Backend) NewComputeProgram(src driver.ShaderSources) (driver.Program, error) {
func (b *Backend) NewComputeProgram(src shader.Sources) (driver.Program, error) {
p, err := gl.CreateComputeProgram(b.funcs, src.GLSL310ES)
if err != nil {
return nil, fmt.Errorf("%s: %v", src.Name, err)
@@ -873,7 +874,7 @@ func (b *Backend) NewComputeProgram(src driver.ShaderSources) (driver.Program, e
return gpuProg, nil
}
func (b *Backend) NewProgram(vertShader, fragShader driver.ShaderSources) (driver.Program, error) {
func (b *Backend) NewProgram(vertShader, fragShader shader.Sources) (driver.Program, error) {
attr := make([]string, len(vertShader.Inputs))
for _, inp := range vertShader.Inputs {
attr[inp.Location] = inp.Name
@@ -937,7 +938,7 @@ func (b *Backend) NewProgram(vertShader, fragShader driver.ShaderSources) (drive
return gpuProg, nil
}
func lookupUniform(funcs *gl.Functions, p gl.Program, loc driver.UniformLocation) uniformLocation {
func lookupUniform(funcs *gl.Functions, p gl.Program, loc shader.UniformLocation) uniformLocation {
u := funcs.GetUniformLocation(p, loc.Name)
if !u.Valid() {
panic(fmt.Errorf("uniform %q not found", loc.Name))
@@ -985,7 +986,7 @@ func (p *gpuProgram) Release() {
p.backend.glstate.deleteProgram(p.backend.funcs, p.obj)
}
func (u *uniformsTracker) setup(funcs *gl.Functions, p gl.Program, uniformSize int, uniforms []driver.UniformLocation) {
func (u *uniformsTracker) setup(funcs *gl.Functions, p gl.Program, uniformSize int, uniforms []shader.UniformLocation) {
u.locs = make([]uniformLocation, len(uniforms))
for i, uniform := range uniforms {
u.locs[i] = lookupUniform(funcs, p, uniform)
@@ -1016,19 +1017,19 @@ func (p *uniformsTracker) update(funcs *gl.Functions) {
for _, u := range p.locs {
data := data[u.offset:]
switch {
case u.typ == driver.DataTypeFloat && u.size == 1:
case u.typ == shader.DataTypeFloat && u.size == 1:
data := data[:4]
v := *(*[1]float32)(unsafe.Pointer(&data[0]))
funcs.Uniform1f(u.uniform, v[0])
case u.typ == driver.DataTypeFloat && u.size == 2:
case u.typ == shader.DataTypeFloat && u.size == 2:
data := data[:8]
v := *(*[2]float32)(unsafe.Pointer(&data[0]))
funcs.Uniform2f(u.uniform, v[0], v[1])
case u.typ == driver.DataTypeFloat && u.size == 3:
case u.typ == shader.DataTypeFloat && u.size == 3:
data := data[:12]
v := *(*[3]float32)(unsafe.Pointer(&data[0]))
funcs.Uniform3f(u.uniform, v[0], v[1], v[2])
case u.typ == driver.DataTypeFloat && u.size == 4:
case u.typ == shader.DataTypeFloat && u.size == 4:
data := data[:16]
v := *(*[4]float32)(unsafe.Pointer(&data[0]))
funcs.Uniform4f(u.uniform, v[0], v[1], v[2], v[3])
@@ -1108,9 +1109,9 @@ func (b *Backend) setupVertexArrays() {
l := layout.layout[i]
var gltyp gl.Enum
switch l.Type {
case driver.DataTypeFloat:
case shader.DataTypeFloat:
gltyp = gl.FLOAT
case driver.DataTypeShort:
case shader.DataTypeShort:
gltyp = gl.SHORT
default:
panic("unsupported data type")
+14 -12
View File
@@ -15,6 +15,8 @@ import (
"gioui.org/gpu/internal/driver"
"gioui.org/internal/byteslice"
"gioui.org/internal/f32color"
"gioui.org/shader"
"gioui.org/shader/gio"
)
type pather struct {
@@ -161,7 +163,7 @@ func newCoverer(ctx driver.Device) *coverer {
c.colUniforms = new(coverColUniforms)
c.texUniforms = new(coverTexUniforms)
c.linearGradientUniforms = new(coverLinearGradientUniforms)
prog, layout, err := createColorPrograms(ctx, shader_cover_vert, shader_cover_frag,
prog, layout, err := createColorPrograms(ctx, gio.Shader_cover_vert, gio.Shader_cover_frag,
[3]interface{}{&c.colUniforms.vert, &c.linearGradientUniforms.vert, &c.texUniforms.vert},
[3]interface{}{&c.colUniforms.frag, &c.linearGradientUniforms.frag, nil},
)
@@ -189,19 +191,19 @@ func newStenciler(ctx driver.Device) *stenciler {
if err != nil {
panic(err)
}
progLayout, err := ctx.NewInputLayout(shader_stencil_vert, []driver.InputDesc{
{Type: driver.DataTypeFloat, Size: 1, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).Corner))},
{Type: driver.DataTypeFloat, Size: 1, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).MaxY))},
{Type: driver.DataTypeFloat, Size: 2, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).FromX))},
{Type: driver.DataTypeFloat, Size: 2, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).CtrlX))},
{Type: driver.DataTypeFloat, Size: 2, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).ToX))},
progLayout, err := ctx.NewInputLayout(gio.Shader_stencil_vert, []shader.InputDesc{
{Type: shader.DataTypeFloat, Size: 1, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).Corner))},
{Type: shader.DataTypeFloat, Size: 1, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).MaxY))},
{Type: shader.DataTypeFloat, Size: 2, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).FromX))},
{Type: shader.DataTypeFloat, Size: 2, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).CtrlX))},
{Type: shader.DataTypeFloat, Size: 2, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).ToX))},
})
if err != nil {
panic(err)
}
iprogLayout, err := ctx.NewInputLayout(shader_intersect_vert, []driver.InputDesc{
{Type: driver.DataTypeFloat, Size: 2, Offset: 0},
{Type: driver.DataTypeFloat, Size: 2, Offset: 4 * 2},
iprogLayout, err := ctx.NewInputLayout(gio.Shader_intersect_vert, []shader.InputDesc{
{Type: shader.DataTypeFloat, Size: 2, Offset: 0},
{Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2},
})
if err != nil {
panic(err)
@@ -210,7 +212,7 @@ func newStenciler(ctx driver.Device) *stenciler {
ctx: ctx,
indexBuf: indexBuf,
}
prog, err := ctx.NewProgram(shader_stencil_vert, shader_stencil_frag)
prog, err := ctx.NewProgram(gio.Shader_stencil_vert, gio.Shader_stencil_frag)
if err != nil {
panic(err)
}
@@ -218,7 +220,7 @@ func newStenciler(ctx driver.Device) *stenciler {
vertUniforms := newUniformBuffer(ctx, &st.prog.uniforms.vert)
st.prog.prog = newProgram(prog, vertUniforms, nil)
st.prog.layout = progLayout
iprog, err := ctx.NewProgram(shader_intersect_vert, shader_intersect_frag)
iprog, err := ctx.NewProgram(gio.Shader_intersect_vert, gio.Shader_intersect_frag)
if err != nil {
panic(err)
}
-6684
View File
File diff suppressed because one or more lines are too long
-225
View File
@@ -1,225 +0,0 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// Code auto-generated by piet-gpu-derive
struct AnnoImageRef {
uint offset;
};
struct AnnoColorRef {
uint offset;
};
struct AnnoBeginClipRef {
uint offset;
};
struct AnnoEndClipRef {
uint offset;
};
struct AnnotatedRef {
uint offset;
};
struct AnnoImage {
vec4 bbox;
float linewidth;
uint index;
ivec2 offset;
};
#define AnnoImage_size 28
AnnoImageRef AnnoImage_index(AnnoImageRef ref, uint index) {
return AnnoImageRef(ref.offset + index * AnnoImage_size);
}
struct AnnoColor {
vec4 bbox;
float linewidth;
uint rgba_color;
};
#define AnnoColor_size 24
AnnoColorRef AnnoColor_index(AnnoColorRef ref, uint index) {
return AnnoColorRef(ref.offset + index * AnnoColor_size);
}
struct AnnoBeginClip {
vec4 bbox;
float linewidth;
};
#define AnnoBeginClip_size 20
AnnoBeginClipRef AnnoBeginClip_index(AnnoBeginClipRef ref, uint index) {
return AnnoBeginClipRef(ref.offset + index * AnnoBeginClip_size);
}
struct AnnoEndClip {
vec4 bbox;
};
#define AnnoEndClip_size 16
AnnoEndClipRef AnnoEndClip_index(AnnoEndClipRef ref, uint index) {
return AnnoEndClipRef(ref.offset + index * AnnoEndClip_size);
}
#define Annotated_Nop 0
#define Annotated_Color 1
#define Annotated_Image 2
#define Annotated_BeginClip 3
#define Annotated_EndClip 4
#define Annotated_size 32
AnnotatedRef Annotated_index(AnnotatedRef ref, uint index) {
return AnnotatedRef(ref.offset + index * Annotated_size);
}
struct AnnotatedTag {
uint tag;
uint flags;
};
AnnoImage AnnoImage_read(Alloc a, AnnoImageRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
uint raw4 = read_mem(a, ix + 4);
uint raw5 = read_mem(a, ix + 5);
uint raw6 = read_mem(a, ix + 6);
AnnoImage s;
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.linewidth = uintBitsToFloat(raw4);
s.index = raw5;
s.offset = ivec2(int(raw6 << 16) >> 16, int(raw6) >> 16);
return s;
}
void AnnoImage_write(Alloc a, AnnoImageRef ref, AnnoImage s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, floatBitsToUint(s.bbox.x));
write_mem(a, ix + 1, floatBitsToUint(s.bbox.y));
write_mem(a, ix + 2, floatBitsToUint(s.bbox.z));
write_mem(a, ix + 3, floatBitsToUint(s.bbox.w));
write_mem(a, ix + 4, floatBitsToUint(s.linewidth));
write_mem(a, ix + 5, s.index);
write_mem(a, ix + 6, (uint(s.offset.x) & 0xffff) | (uint(s.offset.y) << 16));
}
AnnoColor AnnoColor_read(Alloc a, AnnoColorRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
uint raw4 = read_mem(a, ix + 4);
uint raw5 = read_mem(a, ix + 5);
AnnoColor s;
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.linewidth = uintBitsToFloat(raw4);
s.rgba_color = raw5;
return s;
}
void AnnoColor_write(Alloc a, AnnoColorRef ref, AnnoColor s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, floatBitsToUint(s.bbox.x));
write_mem(a, ix + 1, floatBitsToUint(s.bbox.y));
write_mem(a, ix + 2, floatBitsToUint(s.bbox.z));
write_mem(a, ix + 3, floatBitsToUint(s.bbox.w));
write_mem(a, ix + 4, floatBitsToUint(s.linewidth));
write_mem(a, ix + 5, s.rgba_color);
}
AnnoBeginClip AnnoBeginClip_read(Alloc a, AnnoBeginClipRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
uint raw4 = read_mem(a, ix + 4);
AnnoBeginClip s;
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.linewidth = uintBitsToFloat(raw4);
return s;
}
void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, floatBitsToUint(s.bbox.x));
write_mem(a, ix + 1, floatBitsToUint(s.bbox.y));
write_mem(a, ix + 2, floatBitsToUint(s.bbox.z));
write_mem(a, ix + 3, floatBitsToUint(s.bbox.w));
write_mem(a, ix + 4, floatBitsToUint(s.linewidth));
}
AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
AnnoEndClip s;
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
return s;
}
void AnnoEndClip_write(Alloc a, AnnoEndClipRef ref, AnnoEndClip s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, floatBitsToUint(s.bbox.x));
write_mem(a, ix + 1, floatBitsToUint(s.bbox.y));
write_mem(a, ix + 2, floatBitsToUint(s.bbox.z));
write_mem(a, ix + 3, floatBitsToUint(s.bbox.w));
}
AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref) {
uint tag_and_flags = read_mem(a, ref.offset >> 2);
return AnnotatedTag(tag_and_flags & 0xffff, tag_and_flags >> 16);
}
AnnoColor Annotated_Color_read(Alloc a, AnnotatedRef ref) {
return AnnoColor_read(a, AnnoColorRef(ref.offset + 4));
}
AnnoImage Annotated_Image_read(Alloc a, AnnotatedRef ref) {
return AnnoImage_read(a, AnnoImageRef(ref.offset + 4));
}
AnnoBeginClip Annotated_BeginClip_read(Alloc a, AnnotatedRef ref) {
return AnnoBeginClip_read(a, AnnoBeginClipRef(ref.offset + 4));
}
AnnoEndClip Annotated_EndClip_read(Alloc a, AnnotatedRef ref) {
return AnnoEndClip_read(a, AnnoEndClipRef(ref.offset + 4));
}
void Annotated_Nop_write(Alloc a, AnnotatedRef ref) {
write_mem(a, ref.offset >> 2, Annotated_Nop);
}
void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s) {
write_mem(a, ref.offset >> 2, (flags << 16) | Annotated_Color);
AnnoColor_write(a, AnnoColorRef(ref.offset + 4), s);
}
void Annotated_Image_write(Alloc a, AnnotatedRef ref, uint flags, AnnoImage s) {
write_mem(a, ref.offset >> 2, (flags << 16) | Annotated_Image);
AnnoImage_write(a, AnnoImageRef(ref.offset + 4), s);
}
void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoBeginClip s) {
write_mem(a, ref.offset >> 2, (flags << 16) | Annotated_BeginClip);
AnnoBeginClip_write(a, AnnoBeginClipRef(ref.offset + 4), s);
}
void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s) {
write_mem(a, ref.offset >> 2, Annotated_EndClip);
AnnoEndClip_write(a, AnnoEndClipRef(ref.offset + 4), s);
}
-109
View File
@@ -1,109 +0,0 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// Propagation of tile backdrop for filling.
//
// Each thread reads one path element and calculates the number of spanned tiles
// based on the bounding box.
// In a further compaction step, the workgroup loops over the corresponding tile rows per element in parallel.
// For each row the per tile backdrop will be read, as calculated in the previous coarse path segment kernel,
// and propagated from the left to the right (prefix summed).
//
// Output state:
// - Each path element has an array of tiles covering the whole path based on boundig box
// - Each tile per path element contains the 'backdrop' and a list of subdivided path segments
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
#define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
#define BACKDROP_WG (1 << LG_BACKDROP_WG)
layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
Config conf;
};
#include "annotated.h"
#include "tile.h"
shared uint sh_row_count[BACKDROP_WG];
shared Alloc sh_row_alloc[BACKDROP_WG];
shared uint sh_row_width[BACKDROP_WG];
void main() {
uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x;
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
// Work assignment: 1 thread : 1 path element
uint row_count = 0;
bool mem_ok = mem_error == NO_ERROR;
if (element_ix < conf.n_elements) {
AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
switch (tag.tag) {
case Annotated_Image:
case Annotated_BeginClip:
case Annotated_Color:
if (fill_mode_from_flags(tag.flags) != MODE_NONZERO) {
break;
}
// Fall through.
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
Path path = Path_read(conf.tile_alloc, path_ref);
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
row_count = path.bbox.w - path.bbox.y;
// Paths that don't cross tile top edges don't have backdrops.
// Don't apply the optimization to paths that may cross the y = 0
// top edge, but clipped to 1 row.
if (row_count == 1 && path.bbox.y > 0) {
// Note: this can probably be expanded to width = 2 as
// long as it doesn't cross the left edge.
row_count = 0;
}
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
sh_row_alloc[th_ix] = path_alloc;
}
}
sh_row_count[th_ix] = row_count;
// Prefix sum of sh_row_count
for (uint i = 0; i < LG_BACKDROP_WG; i++) {
barrier();
if (th_ix >= (1 << i)) {
row_count += sh_row_count[th_ix - (1 << i)];
}
barrier();
sh_row_count[th_ix] = row_count;
}
barrier();
// Work assignment: 1 thread : 1 path element row
uint total_rows = sh_row_count[BACKDROP_WG - 1];
for (uint row = th_ix; row < total_rows; row += BACKDROP_WG) {
// Binary search to find element
uint el_ix = 0;
for (uint i = 0; i < LG_BACKDROP_WG; i++) {
uint probe = el_ix + ((BACKDROP_WG / 2) >> i);
if (row >= sh_row_count[probe - 1]) {
el_ix = probe;
}
}
uint width = sh_row_width[el_ix];
if (width > 0 && mem_ok) {
// Process one row sequentially
// Read backdrop value per tile and prefix sum it
Alloc tiles_alloc = sh_row_alloc[el_ix];
uint seq_ix = row - (el_ix > 0 ? sh_row_count[el_ix - 1] : 0);
uint tile_el_ix = (tiles_alloc.offset >> 2) + 1 + seq_ix * 2 * width;
uint sum = read_mem(tiles_alloc, tile_el_ix);
for (uint x = 1; x < width; x++) {
tile_el_ix += 2;
sum += read_mem(tiles_alloc, tile_el_ix);
write_mem(tiles_alloc, tile_el_ix, sum);
}
}
}
}
-147
View File
@@ -1,147 +0,0 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// The binning stage of the pipeline.
//
// Each workgroup processes N_TILE paths.
// Each thread processes one path and calculates a N_TILE_X x N_TILE_Y coverage mask
// based on the path bounding box to bin the paths.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
layout(local_size_x = N_TILE, local_size_y = 1) in;
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
Config conf;
};
#include "annotated.h"
#include "bins.h"
// scale factors useful for converting coordinates to bins
#define SX (1.0 / float(N_TILE_X * TILE_WIDTH_PX))
#define SY (1.0 / float(N_TILE_Y * TILE_HEIGHT_PX))
// Constant not available in GLSL. Also consider uintBitsToFloat(0x7f800000)
#define INFINITY (1.0 / 0.0)
// Note: cudaraster has N_TILE + 1 to cut down on bank conflicts.
// Bitmaps are sliced (256bit into 8 (N_SLICE) 32bit submaps)
shared uint bitmaps[N_SLICE][N_TILE];
shared uint count[N_SLICE][N_TILE];
shared Alloc sh_chunk_alloc[N_TILE];
shared bool sh_alloc_failed;
void main() {
uint my_n_elements = conf.n_elements;
uint my_partition = gl_WorkGroupID.x;
for (uint i = 0; i < N_SLICE; i++) {
bitmaps[i][gl_LocalInvocationID.x] = 0;
}
if (gl_LocalInvocationID.x == 0) {
sh_alloc_failed = false;
}
barrier();
// Read inputs and determine coverage of bins
uint element_ix = my_partition * N_TILE + gl_LocalInvocationID.x;
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
uint tag = Annotated_Nop;
if (element_ix < my_n_elements) {
tag = Annotated_tag(conf.anno_alloc, ref).tag;
}
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
switch (tag) {
case Annotated_Color:
case Annotated_Image:
case Annotated_BeginClip:
case Annotated_EndClip:
// Note: we take advantage of the fact that these drawing elements
// have the bbox at the same place in their layout.
AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref);
x0 = int(floor(clip.bbox.x * SX));
y0 = int(floor(clip.bbox.y * SY));
x1 = int(ceil(clip.bbox.z * SX));
y1 = int(ceil(clip.bbox.w * SY));
break;
}
// At this point, we run an iterator over the coverage area,
// trying to keep divergence low.
// Right now, it's just a bbox, but we'll get finer with
// segments.
uint width_in_bins = (conf.width_in_tiles + N_TILE_X - 1)/N_TILE_X;
uint height_in_bins = (conf.height_in_tiles + N_TILE_Y - 1)/N_TILE_Y;
x0 = clamp(x0, 0, int(width_in_bins));
x1 = clamp(x1, x0, int(width_in_bins));
y0 = clamp(y0, 0, int(height_in_bins));
y1 = clamp(y1, y0, int(height_in_bins));
if (x0 == x1) y1 = y0;
int x = x0, y = y0;
uint my_slice = gl_LocalInvocationID.x / 32;
uint my_mask = 1 << (gl_LocalInvocationID.x & 31);
while (y < y1) {
atomicOr(bitmaps[my_slice][y * width_in_bins + x], my_mask);
x++;
if (x == x1) {
x = x0;
y++;
}
}
barrier();
// Allocate output segments.
uint element_count = 0;
for (uint i = 0; i < N_SLICE; i++) {
element_count += bitCount(bitmaps[i][gl_LocalInvocationID.x]);
count[i][gl_LocalInvocationID.x] = element_count;
}
// element_count is number of elements covering bin for this invocation.
Alloc chunk_alloc = new_alloc(0, 0, true);
if (element_count != 0) {
// TODO: aggregate atomic adds (subgroup is probably fastest)
MallocResult chunk = malloc(element_count * BinInstance_size);
chunk_alloc = chunk.alloc;
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
if (chunk.failed) {
sh_alloc_failed = true;
}
}
// Note: it might be more efficient for reading to do this in the
// other order (each bin is a contiguous sequence of partitions)
uint out_ix = (conf.bin_alloc.offset >> 2) + (my_partition * N_TILE + gl_LocalInvocationID.x) * 2;
write_mem(conf.bin_alloc, out_ix, element_count);
write_mem(conf.bin_alloc, out_ix + 1, chunk_alloc.offset);
barrier();
if (sh_alloc_failed || mem_error != NO_ERROR) {
return;
}
// Use similar strategy as Laine & Karras paper; loop over bbox of bins
// touched by this element
x = x0;
y = y0;
while (y < y1) {
uint bin_ix = y * width_in_bins + x;
uint out_mask = bitmaps[my_slice][bin_ix];
if ((out_mask & my_mask) != 0) {
uint idx = bitCount(out_mask & (my_mask - 1));
if (my_slice > 0) {
idx += count[my_slice - 1][bin_ix];
}
Alloc out_alloc = sh_chunk_alloc[bin_ix];
uint out_offset = out_alloc.offset + idx * BinInstance_size;
BinInstance_write(out_alloc, BinInstanceRef(out_offset), BinInstance(element_ix));
}
x++;
if (x == x1) {
x = x0;
y++;
}
}
}
-31
View File
@@ -1,31 +0,0 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// Code auto-generated by piet-gpu-derive
struct BinInstanceRef {
uint offset;
};
struct BinInstance {
uint element_ix;
};
#define BinInstance_size 4
BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) {
return BinInstanceRef(ref.offset + index * BinInstance_size);
}
BinInstance BinInstance_read(Alloc a, BinInstanceRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
BinInstance s;
s.element_ix = raw0;
return s;
}
void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, s.element_ix);
}
-15
View File
@@ -1,15 +0,0 @@
#version 310 es
// SPDX-License-Identifier: Unlicense OR MIT
precision mediump float;
layout(location=0) in vec2 vUV;
{{.Header}}
layout(location = 0) out vec4 fragColor;
void main() {
fragColor = {{.FetchColorExpr}};
}
-28
View File
@@ -1,28 +0,0 @@
#version 310 es
// SPDX-License-Identifier: Unlicense OR MIT
#extension GL_GOOGLE_include_directive : enable
precision highp float;
#include "common.h"
layout(binding = 0) uniform Block {
vec4 transform;
vec4 uvTransformR1;
vec4 uvTransformR2;
float z;
} _block;
layout(location = 0) in vec2 pos;
layout(location = 1) in vec2 uv;
layout(location = 0) out vec2 vUV;
void main() {
vec2 p = pos*_block.transform.xy + _block.transform.zw;
gl_Position = toClipSpace(vec4(p, _block.z, 1));
vUV = transform3x2(m3x2(_block.uvTransformR1.xyz, _block.uvTransformR2.xyz), vec3(uv,1)).xy;
}
-426
View File
@@ -1,426 +0,0 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// The coarse rasterizer stage of the pipeline.
//
// As input we have the ordered partitions of paths from the binning phase and
// the annotated tile list of segments and backdrop per path.
//
// Each workgroup operating on one bin by stream compacting
// the elements corresponding to the bin.
//
// As output we have an ordered command stream per tile. Every tile from a path (backdrop + segment list) will be encoded.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
layout(local_size_x = N_TILE, local_size_y = 1) in;
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
Config conf;
};
#include "annotated.h"
#include "bins.h"
#include "tile.h"
#include "ptcl.h"
#define LG_N_PART_READ (7 + LG_WG_FACTOR)
#define N_PART_READ (1 << LG_N_PART_READ)
shared uint sh_elements[N_TILE];
// Number of elements in the partition; prefix sum.
shared uint sh_part_count[N_PART_READ];
shared Alloc sh_part_elements[N_PART_READ];
shared uint sh_bitmaps[N_SLICE][N_TILE];
shared uint sh_tile_count[N_TILE];
// The width of the tile rect for the element, intersected with this bin
shared uint sh_tile_width[N_TILE];
shared uint sh_tile_x0[N_TILE];
shared uint sh_tile_y0[N_TILE];
// These are set up so base + tile_y * stride + tile_x points to a Tile.
shared uint sh_tile_base[N_TILE];
shared uint sh_tile_stride[N_TILE];
#ifdef MEM_DEBUG
// Store allocs only when MEM_DEBUG to save shared memory traffic.
shared Alloc sh_tile_alloc[N_TILE];
void write_tile_alloc(uint el_ix, Alloc a) {
sh_tile_alloc[el_ix] = a;
}
Alloc read_tile_alloc(uint el_ix, bool mem_ok) {
return sh_tile_alloc[el_ix];
}
#else
void write_tile_alloc(uint el_ix, Alloc a) {
// No-op
}
Alloc read_tile_alloc(uint el_ix, bool mem_ok) {
// All memory.
return new_alloc(0, memory.length()*4, mem_ok);
}
#endif
// The maximum number of commands per annotated element.
#define ANNO_COMMANDS 2
// Perhaps cmd_alloc should be a global? This is a style question.
bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit) {
if (cmd_ref.offset < cmd_limit) {
return true;
}
MallocResult new_cmd = malloc(PTCL_INITIAL_ALLOC);
if (new_cmd.failed) {
return false;
}
CmdJump jump = CmdJump(new_cmd.alloc.offset);
Cmd_Jump_write(cmd_alloc, cmd_ref, jump);
cmd_alloc = new_cmd.alloc;
cmd_ref = CmdRef(cmd_alloc.offset);
// Reserve space for the maximum number of commands and a potential jump.
cmd_limit = cmd_alloc.offset + PTCL_INITIAL_ALLOC - (ANNO_COMMANDS + 1) * Cmd_size;
return true;
}
void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float linewidth) {
if (fill_mode_from_flags(flags) == MODE_NONZERO) {
if (tile.tile.offset != 0) {
CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop);
Cmd_Fill_write(alloc, cmd_ref, cmd_fill);
cmd_ref.offset += 4 + CmdFill_size;
} else {
Cmd_Solid_write(alloc, cmd_ref);
cmd_ref.offset += 4;
}
} else {
CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * linewidth);
Cmd_Stroke_write(alloc, cmd_ref, cmd_stroke);
cmd_ref.offset += 4 + CmdStroke_size;
}
}
void main() {
// Could use either linear or 2d layouts for both dispatch and
// invocations within the workgroup. We'll use variables to abstract.
uint width_in_bins = (conf.width_in_tiles + N_TILE_X - 1)/N_TILE_X;
uint bin_ix = width_in_bins * gl_WorkGroupID.y + gl_WorkGroupID.x;
uint partition_ix = 0;
uint n_partitions = (conf.n_elements + N_TILE - 1) / N_TILE;
uint th_ix = gl_LocalInvocationID.x;
// Coordinates of top left of bin, in tiles.
uint bin_tile_x = N_TILE_X * gl_WorkGroupID.x;
uint bin_tile_y = N_TILE_Y * gl_WorkGroupID.y;
// Per-tile state
uint tile_x = gl_LocalInvocationID.x % N_TILE_X;
uint tile_y = gl_LocalInvocationID.x / N_TILE_X;
uint this_tile_ix = (bin_tile_y + tile_y) * conf.width_in_tiles + bin_tile_x + tile_x;
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, this_tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
CmdRef cmd_ref = CmdRef(cmd_alloc.offset);
// Reserve space for the maximum number of commands and a potential jump.
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - (ANNO_COMMANDS + 1) * Cmd_size;
// The nesting depth of the clip stack
uint clip_depth = 0;
// State for the "clip zero" optimization. If it's nonzero, then we are
// currently in a clip for which the entire tile has an alpha of zero, and
// the value is the depth after the "begin clip" of that element.
uint clip_zero_depth = 0;
// State for the "clip one" optimization. If bit `i` is set, then that means
// that the clip pushed at depth `i` has an alpha of all one.
uint clip_one_mask = 0;
// I'm sure we can figure out how to do this with at least one fewer register...
// Items up to rd_ix have been read from sh_elements
uint rd_ix = 0;
// Items up to wr_ix have been written into sh_elements
uint wr_ix = 0;
// Items between part_start_ix and ready_ix are ready to be transferred from sh_part_elements
uint part_start_ix = 0;
uint ready_ix = 0;
// Leave room for the fine rasterizer scratch allocation.
Alloc scratch_alloc = slice_mem(cmd_alloc, 0, Alloc_size);
cmd_ref.offset += Alloc_size;
uint num_begin_slots = 0;
uint begin_slot = 0;
bool mem_ok = mem_error == NO_ERROR;
while (true) {
for (uint i = 0; i < N_SLICE; i++) {
sh_bitmaps[i][th_ix] = 0;
}
// parallel read of input partitions
do {
if (ready_ix == wr_ix && partition_ix < n_partitions) {
part_start_ix = ready_ix;
uint count = 0;
if (th_ix < N_PART_READ && partition_ix + th_ix < n_partitions) {
uint in_ix = (conf.bin_alloc.offset >> 2) + ((partition_ix + th_ix) * N_TILE + bin_ix) * 2;
count = read_mem(conf.bin_alloc, in_ix);
uint offset = read_mem(conf.bin_alloc, in_ix + 1);
sh_part_elements[th_ix] = new_alloc(offset, count*BinInstance_size, mem_ok);
}
// prefix sum of counts
for (uint i = 0; i < LG_N_PART_READ; i++) {
if (th_ix < N_PART_READ) {
sh_part_count[th_ix] = count;
}
barrier();
if (th_ix < N_PART_READ) {
if (th_ix >= (1 << i)) {
count += sh_part_count[th_ix - (1 << i)];
}
}
barrier();
}
if (th_ix < N_PART_READ) {
sh_part_count[th_ix] = part_start_ix + count;
}
barrier();
ready_ix = sh_part_count[N_PART_READ - 1];
partition_ix += N_PART_READ;
}
// use binary search to find element to read
uint ix = rd_ix + th_ix;
if (ix >= wr_ix && ix < ready_ix && mem_ok) {
uint part_ix = 0;
for (uint i = 0; i < LG_N_PART_READ; i++) {
uint probe = part_ix + ((N_PART_READ / 2) >> i);
if (ix >= sh_part_count[probe - 1]) {
part_ix = probe;
}
}
ix -= part_ix > 0 ? sh_part_count[part_ix - 1] : part_start_ix;
Alloc bin_alloc = sh_part_elements[part_ix];
BinInstanceRef inst_ref = BinInstanceRef(bin_alloc.offset);
BinInstance inst = BinInstance_read(bin_alloc, BinInstance_index(inst_ref, ix));
sh_elements[th_ix] = inst.element_ix;
}
barrier();
wr_ix = min(rd_ix + N_TILE, ready_ix);
} while (wr_ix - rd_ix < N_TILE && (wr_ix < ready_ix || partition_ix < n_partitions));
// We've done the merge and filled the buffer.
// Read one element, compute coverage.
uint tag = Annotated_Nop;
uint element_ix;
AnnotatedRef ref;
if (th_ix + rd_ix < wr_ix) {
element_ix = sh_elements[th_ix];
ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
tag = Annotated_tag(conf.anno_alloc, ref).tag;
}
// Bounding box of element in pixel coordinates.
uint tile_count;
switch (tag) {
case Annotated_Color:
case Annotated_Image:
case Annotated_BeginClip:
case Annotated_EndClip:
// We have one "path" for each element, even if the element isn't
// actually a path (currently EndClip, but images etc in the future).
uint path_ix = element_ix;
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
uint stride = path.bbox.z - path.bbox.x;
sh_tile_stride[th_ix] = stride;
int dx = int(path.bbox.x) - int(bin_tile_x);
int dy = int(path.bbox.y) - int(bin_tile_y);
int x0 = clamp(dx, 0, N_TILE_X);
int y0 = clamp(dy, 0, N_TILE_Y);
int x1 = clamp(int(path.bbox.z) - int(bin_tile_x), 0, N_TILE_X);
int y1 = clamp(int(path.bbox.w) - int(bin_tile_y), 0, N_TILE_Y);
sh_tile_width[th_ix] = uint(x1 - x0);
sh_tile_x0[th_ix] = x0;
sh_tile_y0[th_ix] = y0;
tile_count = uint(x1 - x0) * uint(y1 - y0);
// base relative to bin
uint base = path.tiles.offset - uint(dy * stride + dx) * Tile_size;
sh_tile_base[th_ix] = base;
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
write_tile_alloc(th_ix, path_alloc);
break;
default:
tile_count = 0;
break;
}
// Prefix sum of sh_tile_count
sh_tile_count[th_ix] = tile_count;
for (uint i = 0; i < LG_N_TILE; i++) {
barrier();
if (th_ix >= (1 << i)) {
tile_count += sh_tile_count[th_ix - (1 << i)];
}
barrier();
sh_tile_count[th_ix] = tile_count;
}
barrier();
uint total_tile_count = sh_tile_count[N_TILE - 1];
for (uint ix = th_ix; ix < total_tile_count; ix += N_TILE) {
// Binary search to find element
uint el_ix = 0;
for (uint i = 0; i < LG_N_TILE; i++) {
uint probe = el_ix + ((N_TILE / 2) >> i);
if (ix >= sh_tile_count[probe - 1]) {
el_ix = probe;
}
}
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + sh_elements[el_ix] * Annotated_size);
uint tag = Annotated_tag(conf.anno_alloc, ref).tag;
uint seq_ix = ix - (el_ix > 0 ? sh_tile_count[el_ix - 1] : 0);
uint width = sh_tile_width[el_ix];
uint x = sh_tile_x0[el_ix] + seq_ix % width;
uint y = sh_tile_y0[el_ix] + seq_ix / width;
bool include_tile = false;
if (tag == Annotated_BeginClip || tag == Annotated_EndClip) {
include_tile = true;
} else if (mem_ok) {
Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok), TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
// Include the path in the tile if
// - the tile contains at least a segment (tile offset non-zero)
// - the tile is completely covered (backdrop non-zero)
include_tile = tile.tile.offset != 0 || tile.backdrop != 0;
}
if (include_tile) {
uint el_slice = el_ix / 32;
uint el_mask = 1 << (el_ix & 31);
atomicOr(sh_bitmaps[el_slice][y * N_TILE_X + x], el_mask);
}
}
barrier();
// Output non-segment elements for this tile. The thread does a sequential walk
// through the non-segment elements.
uint slice_ix = 0;
uint bitmap = sh_bitmaps[0][th_ix];
while (mem_ok) {
if (bitmap == 0) {
slice_ix++;
if (slice_ix == N_SLICE) {
break;
}
bitmap = sh_bitmaps[slice_ix][th_ix];
if (bitmap == 0) {
continue;
}
}
uint element_ref_ix = slice_ix * 32 + findLSB(bitmap);
uint element_ix = sh_elements[element_ref_ix];
// Clear LSB
bitmap &= bitmap - 1;
// At this point, we read the element again from global memory.
// If that turns out to be expensive, maybe we can pack it into
// shared memory (or perhaps just the tag).
ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
if (clip_zero_depth == 0) {
switch (tag.tag) {
case Annotated_Color:
Tile tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoColor fill = Annotated_Color_read(conf.anno_alloc, ref);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill.linewidth);
Cmd_Color_write(cmd_alloc, cmd_ref, CmdColor(fill.rgba_color));
cmd_ref.offset += 4 + CmdColor_size;
break;
case Annotated_Image:
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoImage fill_img = Annotated_Image_read(conf.anno_alloc, ref);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill_img.linewidth);
Cmd_Image_write(cmd_alloc, cmd_ref, CmdImage(fill_img.index, fill_img.offset));
cmd_ref.offset += 4 + CmdImage_size;
break;
case Annotated_BeginClip:
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
if (tile.tile.offset == 0 && tile.backdrop == 0) {
clip_zero_depth = clip_depth + 1;
} else if (tile.tile.offset == 0 && clip_depth < 32) {
clip_one_mask |= (1 << clip_depth);
} else {
AnnoBeginClip begin_clip = Annotated_BeginClip_read(conf.anno_alloc, ref);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, begin_clip.linewidth);
Cmd_BeginClip_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
if (clip_depth < 32) {
clip_one_mask &= ~(1 << clip_depth);
}
begin_slot++;
num_begin_slots = max(num_begin_slots, begin_slot);
}
clip_depth++;
break;
case Annotated_EndClip:
clip_depth--;
if (clip_depth >= 32 || (clip_one_mask & (1 << clip_depth)) == 0) {
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
Cmd_Solid_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
begin_slot--;
Cmd_EndClip_write(cmd_alloc, cmd_ref);
cmd_ref.offset += 4;
}
break;
}
} else {
// In "clip zero" state, suppress all drawing
switch (tag.tag) {
case Annotated_BeginClip:
clip_depth++;
break;
case Annotated_EndClip:
if (clip_depth == clip_zero_depth) {
clip_zero_depth = 0;
}
clip_depth--;
break;
}
}
}
barrier();
rd_ix += N_TILE;
if (rd_ix >= ready_ix && partition_ix >= n_partitions) break;
}
if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) {
Cmd_End_write(cmd_alloc, cmd_ref);
if (num_begin_slots > 0) {
// Write scratch allocation: one state per BeginClip per rasterizer chunk.
uint scratch_size = num_begin_slots * TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE * 4;
MallocResult scratch = malloc(scratch_size);
// Ignore scratch.failed; we don't use the allocation and kernel4
// checks for memory overflow before using it.
alloc_write(scratch_alloc, scratch_alloc.offset, scratch.alloc);
}
}
}
-51
View File
@@ -1,51 +0,0 @@
// SPDX-License-Identifier: Unlicense OR MIT
struct m3x2 {
vec3 r0;
vec3 r1;
};
// fboTextureTransform is the transformation
// that cancels the implied transformation between
// the framebuffer and its texture.
// Only two rows are returned. The last is implied
// to be [0, 0, 1].
const m3x2 fboTextureTransform = m3x2(
#ifdef HLSL
vec3(1.0, 0.0, 0.0),
vec3(0.0, -1.0, 1.0)
#else
vec3(1.0, 0.0, 0.0),
vec3(0.0, 1.0, 0.0)
#endif
);
// fboTransform is the transformation
// that cancels the implied transformation between
// the clip space and the framebuffer.
// Only two rows are returned. The last is implied
// to be [0, 0, 1].
const m3x2 fboTransform = m3x2(
#ifdef HLSL
vec3(1.0, 0.0, 0.0),
vec3(0.0, 1.0, 0.0)
#else
vec3(1.0, 0.0, 0.0),
vec3(0.0, -1.0, 0.0)
#endif
);
// toClipSpace converts an OpenGL gl_Position value to a
// native GPU position.
vec4 toClipSpace(vec4 pos) {
#ifdef HLSL
// Map depths to the Direct3D [0; 1] range.
return vec4(pos.xy, (pos.z + pos.w)*.5, pos.w);
#else
return pos;
#endif
}
vec3 transform3x2(m3x2 t, vec3 v) {
return vec3(dot(t.r0, v), dot(t.r1, v), dot(vec3(0.0, 0.0, 1.0), v));
}
-24
View File
@@ -1,24 +0,0 @@
#version 310 es
// SPDX-License-Identifier: Unlicense OR MIT
precision mediump float;
layout(binding = 0) uniform sampler2D tex;
layout(location = 0) in vec2 vUV;
layout(location = 0) out vec4 fragColor;
vec3 sRGBtoRGB(vec3 rgb) {
bvec3 cutoff = greaterThanEqual(rgb, vec3(0.04045));
vec3 below = rgb/vec3(12.92);
vec3 above = pow((rgb + vec3(0.055))/vec3(1.055), vec3(2.4));
return mix(below, above, cutoff);
}
void main() {
vec4 texel = texture(tex, vUV);
texel.rgb = sRGBtoRGB(texel.rgb);
fragColor = texel;
}
-21
View File
@@ -1,21 +0,0 @@
#version 310 es
// SPDX-License-Identifier: Unlicense OR MIT
precision highp float;
layout(binding = 0) uniform Block {
vec2 scale;
vec2 pos;
vec2 uvScale;
} _block;
layout(location = 0) in vec2 pos;
layout(location = 1) in vec2 uv;
layout(location = 0) out vec2 vUV;
void main() {
vUV = uv*_block.uvScale;
gl_Position = vec4(pos*_block.scale + _block.pos, 0, 1);
}
-22
View File
@@ -1,22 +0,0 @@
#version 310 es
// SPDX-License-Identifier: Unlicense OR MIT
precision mediump float;
{{.Header}}
// Use high precision to be pixel accurate for
// large cover atlases.
layout(location = 0) in highp vec2 vCoverUV;
layout(location = 1) in vec2 vUV;
layout(binding = 1) uniform sampler2D cover;
layout(location = 0) out vec4 fragColor;
void main() {
fragColor = {{.FetchColorExpr}};
float cover = min(abs(texture(cover, vCoverUV).r), 1.0);
fragColor *= cover;
}
-31
View File
@@ -1,31 +0,0 @@
#version 310 es
// SPDX-License-Identifier: Unlicense OR MIT
#extension GL_GOOGLE_include_directive : enable
precision highp float;
#include "common.h"
layout(binding = 0) uniform Block {
vec4 transform;
vec4 uvCoverTransform;
vec4 uvTransformR1;
vec4 uvTransformR2;
float z;
} _block;
layout(location = 0) in vec2 pos;
layout(location = 0) out vec2 vCoverUV;
layout(location = 1) in vec2 uv;
layout(location = 1) out vec2 vUV;
void main() {
gl_Position = toClipSpace(vec4(pos*_block.transform.xy + _block.transform.zw, _block.z, 1));
vUV = transform3x2(m3x2(_block.uvTransformR1.xyz, _block.uvTransformR2.xyz), vec3(uv,1)).xy;
vec3 uv3 = transform3x2(fboTextureTransform, vec3(uv, 1.0));
vCoverUV = (uv3*vec3(_block.uvCoverTransform.xy, 1.0)+vec3(_block.uvCoverTransform.zw, 0.0)).xy;
}
-410
View File
@@ -1,410 +0,0 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// The element processing stage, first in the pipeline.
//
// This stage is primarily about applying transforms and computing bounding
// boxes. It is organized as a scan over the input elements, producing
// annotated output elements.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
#define N_ROWS 4
#define WG_SIZE 32
#define LG_WG_SIZE 5
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
Config conf;
};
layout(set = 0, binding = 2) readonly buffer SceneBuf {
uint[] scene;
};
// It would be better to use the Vulkan memory model than
// "volatile" but shooting for compatibility here rather
// than doing things right.
layout(set = 0, binding = 3) volatile buffer StateBuf {
uint part_counter;
uint[] state;
};
#include "scene.h"
#include "state.h"
#include "annotated.h"
#include "pathseg.h"
#include "tile.h"
#define StateBuf_stride (4 + 2 * State_size)
StateRef state_aggregate_ref(uint partition_ix) {
return StateRef(4 + partition_ix * StateBuf_stride);
}
StateRef state_prefix_ref(uint partition_ix) {
return StateRef(4 + partition_ix * StateBuf_stride + State_size);
}
uint state_flag_index(uint partition_ix) {
return partition_ix * (StateBuf_stride / 4);
}
// These correspond to X, A, P respectively in the prefix sum paper.
#define FLAG_NOT_READY 0
#define FLAG_AGGREGATE_READY 1
#define FLAG_PREFIX_READY 2
#define FLAG_SET_LINEWIDTH 1
#define FLAG_SET_BBOX 2
#define FLAG_RESET_BBOX 4
#define FLAG_SET_FILL_MODE 8
// Fill modes take up the next bit. Non-zero fill is 0, stroke is 1.
#define LG_FILL_MODE 4
#define FILL_MODE_BITS 1
#define FILL_MODE_MASK (FILL_MODE_BITS << LG_FILL_MODE)
// This is almost like a monoid (the interaction between transformation and
// bounding boxes is approximate)
State combine_state(State a, State b) {
State c;
c.bbox.x = min(a.mat.x * b.bbox.x, a.mat.x * b.bbox.z) + min(a.mat.z * b.bbox.y, a.mat.z * b.bbox.w) + a.translate.x;
c.bbox.y = min(a.mat.y * b.bbox.x, a.mat.y * b.bbox.z) + min(a.mat.w * b.bbox.y, a.mat.w * b.bbox.w) + a.translate.y;
c.bbox.z = max(a.mat.x * b.bbox.x, a.mat.x * b.bbox.z) + max(a.mat.z * b.bbox.y, a.mat.z * b.bbox.w) + a.translate.x;
c.bbox.w = max(a.mat.y * b.bbox.x, a.mat.y * b.bbox.z) + max(a.mat.w * b.bbox.y, a.mat.w * b.bbox.w) + a.translate.y;
if ((a.flags & FLAG_RESET_BBOX) == 0 && b.bbox.z <= b.bbox.x && b.bbox.w <= b.bbox.y) {
c.bbox = a.bbox;
} else if ((a.flags & FLAG_RESET_BBOX) == 0 && (b.flags & FLAG_SET_BBOX) == 0 &&
(a.bbox.z > a.bbox.x || a.bbox.w > a.bbox.y))
{
c.bbox.xy = min(a.bbox.xy, c.bbox.xy);
c.bbox.zw = max(a.bbox.zw, c.bbox.zw);
}
// It would be more concise to cast to matrix types; ah well.
c.mat.x = a.mat.x * b.mat.x + a.mat.z * b.mat.y;
c.mat.y = a.mat.y * b.mat.x + a.mat.w * b.mat.y;
c.mat.z = a.mat.x * b.mat.z + a.mat.z * b.mat.w;
c.mat.w = a.mat.y * b.mat.z + a.mat.w * b.mat.w;
c.translate.x = a.mat.x * b.translate.x + a.mat.z * b.translate.y + a.translate.x;
c.translate.y = a.mat.y * b.translate.x + a.mat.w * b.translate.y + a.translate.y;
c.linewidth = (b.flags & FLAG_SET_LINEWIDTH) == 0 ? a.linewidth : b.linewidth;
c.flags = (a.flags & (FLAG_SET_LINEWIDTH | FLAG_SET_BBOX | FLAG_SET_FILL_MODE)) | b.flags;
c.flags |= (a.flags & FLAG_RESET_BBOX) >> 1;
uint fill_mode = (b.flags & FLAG_SET_FILL_MODE) == 0 ? a.flags : b.flags;
fill_mode &= FILL_MODE_MASK;
c.flags = (c.flags & ~FILL_MODE_MASK) | fill_mode;
c.path_count = a.path_count + b.path_count;
c.pathseg_count = a.pathseg_count + b.pathseg_count;
c.trans_count = a.trans_count + b.trans_count;
return c;
}
State map_element(ElementRef ref) {
// TODO: it would *probably* be more efficient to make the memory read patterns less
// divergent, though it would be more wasted memory.
uint tag = Element_tag(ref).tag;
State c;
c.bbox = vec4(0.0, 0.0, 0.0, 0.0);
c.mat = vec4(1.0, 0.0, 0.0, 1.0);
c.translate = vec2(0.0, 0.0);
c.linewidth = 1.0; // TODO should be 0.0
c.flags = 0;
c.path_count = 0;
c.pathseg_count = 0;
c.trans_count = 0;
switch (tag) {
case Element_Line:
LineSeg line = Element_Line_read(ref);
c.bbox.xy = min(line.p0, line.p1);
c.bbox.zw = max(line.p0, line.p1);
c.pathseg_count = 1;
break;
case Element_Quad:
QuadSeg quad = Element_Quad_read(ref);
c.bbox.xy = min(min(quad.p0, quad.p1), quad.p2);
c.bbox.zw = max(max(quad.p0, quad.p1), quad.p2);
c.pathseg_count = 1;
break;
case Element_Cubic:
CubicSeg cubic = Element_Cubic_read(ref);
c.bbox.xy = min(min(cubic.p0, cubic.p1), min(cubic.p2, cubic.p3));
c.bbox.zw = max(max(cubic.p0, cubic.p1), max(cubic.p2, cubic.p3));
c.pathseg_count = 1;
break;
case Element_FillColor:
case Element_FillImage:
case Element_BeginClip:
c.flags = FLAG_RESET_BBOX;
c.path_count = 1;
break;
case Element_EndClip:
c.path_count = 1;
break;
case Element_SetLineWidth:
SetLineWidth lw = Element_SetLineWidth_read(ref);
c.linewidth = lw.width;
c.flags = FLAG_SET_LINEWIDTH;
break;
case Element_Transform:
Transform t = Element_Transform_read(ref);
c.mat = t.mat;
c.translate = t.translate;
c.trans_count = 1;
break;
case Element_SetFillMode:
SetFillMode fm = Element_SetFillMode_read(ref);
c.flags = FLAG_SET_FILL_MODE | (fm.fill_mode << LG_FILL_MODE);
break;
}
return c;
}
// Get the bounding box of a circle transformed by the matrix into an ellipse.
vec2 get_linewidth(State st) {
// See https://www.iquilezles.org/www/articles/ellipses/ellipses.htm
return 0.5 * st.linewidth * vec2(length(st.mat.xz), length(st.mat.yw));
}
shared State sh_state[WG_SIZE];
shared uint sh_part_ix;
shared State sh_prefix;
void main() {
State th_state[N_ROWS];
// Determine partition to process by atomic counter (described in Section
// 4.4 of prefix sum paper).
if (gl_LocalInvocationID.x == 0) {
sh_part_ix = atomicAdd(part_counter, 1);
}
barrier();
uint part_ix = sh_part_ix;
uint ix = part_ix * PARTITION_SIZE + gl_LocalInvocationID.x * N_ROWS;
ElementRef ref = ElementRef(ix * Element_size);
th_state[0] = map_element(ref);
for (uint i = 1; i < N_ROWS; i++) {
// discussion question: would it be faster to load using more coherent patterns
// into thread memory? This is kinda strided.
th_state[i] = combine_state(th_state[i - 1], map_element(Element_index(ref, i)));
}
State agg = th_state[N_ROWS - 1];
sh_state[gl_LocalInvocationID.x] = agg;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
if (gl_LocalInvocationID.x >= (1 << i)) {
State other = sh_state[gl_LocalInvocationID.x - (1 << i)];
agg = combine_state(other, agg);
}
barrier();
sh_state[gl_LocalInvocationID.x] = agg;
}
State exclusive;
exclusive.bbox = vec4(0.0, 0.0, 0.0, 0.0);
exclusive.mat = vec4(1.0, 0.0, 0.0, 1.0);
exclusive.translate = vec2(0.0, 0.0);
exclusive.linewidth = 1.0; //TODO should be 0.0
exclusive.flags = 0;
exclusive.path_count = 0;
exclusive.pathseg_count = 0;
exclusive.trans_count = 0;
// Publish aggregate for this partition
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
// Note: with memory model, we'd want to generate the atomic store version of this.
State_write(state_aggregate_ref(part_ix), agg);
uint flag = FLAG_AGGREGATE_READY;
memoryBarrierBuffer();
if (part_ix == 0) {
State_write(state_prefix_ref(part_ix), agg);
flag = FLAG_PREFIX_READY;
}
state[state_flag_index(part_ix)] = flag;
if (part_ix != 0) {
// step 4 of paper: decoupled lookback
uint look_back_ix = part_ix - 1;
State their_agg;
uint their_ix = 0;
while (true) {
flag = state[state_flag_index(look_back_ix)];
if (flag == FLAG_PREFIX_READY) {
State their_prefix = State_read(state_prefix_ref(look_back_ix));
exclusive = combine_state(their_prefix, exclusive);
break;
} else if (flag == FLAG_AGGREGATE_READY) {
their_agg = State_read(state_aggregate_ref(look_back_ix));
exclusive = combine_state(their_agg, exclusive);
look_back_ix--;
their_ix = 0;
continue;
}
// else spin
// Unfortunately there's no guarantee of forward progress of other
// workgroups, so compute a bit of the aggregate before trying again.
// In the worst case, spinning stops when the aggregate is complete.
ElementRef ref = ElementRef((look_back_ix * PARTITION_SIZE + their_ix) * Element_size);
State s = map_element(ref);
if (their_ix == 0) {
their_agg = s;
} else {
their_agg = combine_state(their_agg, s);
}
their_ix++;
if (their_ix == PARTITION_SIZE) {
exclusive = combine_state(their_agg, exclusive);
if (look_back_ix == 0) {
break;
}
look_back_ix--;
their_ix = 0;
}
}
// step 5 of paper: compute inclusive prefix
State inclusive_prefix = combine_state(exclusive, agg);
sh_prefix = exclusive;
State_write(state_prefix_ref(part_ix), inclusive_prefix);
memoryBarrierBuffer();
flag = FLAG_PREFIX_READY;
state[state_flag_index(part_ix)] = flag;
}
}
barrier();
if (part_ix != 0) {
exclusive = sh_prefix;
}
State row = exclusive;
if (gl_LocalInvocationID.x > 0) {
State other = sh_state[gl_LocalInvocationID.x - 1];
row = combine_state(row, other);
}
for (uint i = 0; i < N_ROWS; i++) {
State st = combine_state(row, th_state[i]);
// Here we read again from the original scene. There may be
// gains to be had from stashing in shared memory or possibly
// registers (though register pressure is an issue).
ElementRef this_ref = Element_index(ref, i);
ElementTag tag = Element_tag(this_ref);
uint fill_mode = fill_mode_from_flags(st.flags >> LG_FILL_MODE);
bool is_stroke = fill_mode == MODE_STROKE;
switch (tag.tag) {
case Element_Line:
LineSeg line = Element_Line_read(this_ref);
PathCubic path_cubic;
path_cubic.p0 = line.p0;
path_cubic.p1 = mix(line.p0, line.p1, 1.0 / 3.0);
path_cubic.p2 = mix(line.p1, line.p0, 1.0 / 3.0);
path_cubic.p3 = line.p1;
path_cubic.path_ix = st.path_count;
path_cubic.trans_ix = st.trans_count;
if (is_stroke) {
path_cubic.stroke = get_linewidth(st);
} else {
path_cubic.stroke = vec2(0.0);
}
PathSegRef path_out_ref = PathSegRef(conf.pathseg_alloc.offset + (st.pathseg_count - 1) * PathSeg_size);
PathSeg_Cubic_write(conf.pathseg_alloc, path_out_ref, fill_mode, path_cubic);
break;
case Element_Quad:
QuadSeg quad = Element_Quad_read(this_ref);
path_cubic.p0 = quad.p0;
path_cubic.p1 = mix(quad.p1, quad.p0, 1.0 / 3.0);
path_cubic.p2 = mix(quad.p1, quad.p2, 1.0 / 3.0);
path_cubic.p3 = quad.p2;
path_cubic.path_ix = st.path_count;
path_cubic.trans_ix = st.trans_count;
if (is_stroke) {
path_cubic.stroke = get_linewidth(st);
} else {
path_cubic.stroke = vec2(0.0);
}
path_out_ref = PathSegRef(conf.pathseg_alloc.offset + (st.pathseg_count - 1) * PathSeg_size);
PathSeg_Cubic_write(conf.pathseg_alloc, path_out_ref, fill_mode, path_cubic);
break;
case Element_Cubic:
CubicSeg cubic = Element_Cubic_read(this_ref);
path_cubic.p0 = cubic.p0;
path_cubic.p1 = cubic.p1;
path_cubic.p2 = cubic.p2;
path_cubic.p3 = cubic.p3;
path_cubic.path_ix = st.path_count;
path_cubic.trans_ix = st.trans_count;
if (is_stroke) {
path_cubic.stroke = get_linewidth(st);
} else {
path_cubic.stroke = vec2(0.0);
}
path_out_ref = PathSegRef(conf.pathseg_alloc.offset + (st.pathseg_count - 1) * PathSeg_size);
PathSeg_Cubic_write(conf.pathseg_alloc, path_out_ref, fill_mode, path_cubic);
break;
case Element_FillColor:
FillColor fill = Element_FillColor_read(this_ref);
AnnoColor anno_fill;
anno_fill.rgba_color = fill.rgba_color;
if (is_stroke) {
vec2 lw = get_linewidth(st);
anno_fill.bbox = st.bbox + vec4(-lw, lw);
anno_fill.linewidth = st.linewidth * sqrt(abs(st.mat.x * st.mat.w - st.mat.y * st.mat.z));
} else {
anno_fill.bbox = st.bbox;
anno_fill.linewidth = 0.0;
}
AnnotatedRef out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
Annotated_Color_write(conf.anno_alloc, out_ref, fill_mode, anno_fill);
break;
case Element_FillImage:
FillImage fill_img = Element_FillImage_read(this_ref);
AnnoImage anno_img;
anno_img.index = fill_img.index;
anno_img.offset = fill_img.offset;
if (is_stroke) {
vec2 lw = get_linewidth(st);
anno_img.bbox = st.bbox + vec4(-lw, lw);
anno_img.linewidth = st.linewidth * sqrt(abs(st.mat.x * st.mat.w - st.mat.y * st.mat.z));
} else {
anno_img.bbox = st.bbox;
anno_img.linewidth = 0.0;
}
out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
Annotated_Image_write(conf.anno_alloc, out_ref, fill_mode, anno_img);
break;
case Element_BeginClip:
Clip begin_clip = Element_BeginClip_read(this_ref);
AnnoBeginClip anno_begin_clip;
// This is the absolute bbox, it's been transformed during encoding.
anno_begin_clip.bbox = begin_clip.bbox;
if (is_stroke) {
vec2 lw = get_linewidth(st);
anno_begin_clip.linewidth = st.linewidth * sqrt(abs(st.mat.x * st.mat.w - st.mat.y * st.mat.z));
} else {
anno_fill.linewidth = 0.0;
}
out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
Annotated_BeginClip_write(conf.anno_alloc, out_ref, fill_mode, anno_begin_clip);
break;
case Element_EndClip:
Clip end_clip = Element_EndClip_read(this_ref);
// This bbox is expected to be the same as the begin one.
AnnoEndClip anno_end_clip = AnnoEndClip(end_clip.bbox);
out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
Annotated_EndClip_write(conf.anno_alloc, out_ref, anno_end_clip);
break;
case Element_Transform:
TransformSeg transform = TransformSeg(st.mat, st.translate);
TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (st.trans_count - 1) * TransformSeg_size);
TransformSeg_write(conf.trans_alloc, trans_ref, transform);
break;
}
}
}
-18
View File
@@ -1,18 +0,0 @@
#version 310 es
// SPDX-License-Identifier: Unlicense OR MIT
precision mediump float;
// Use high precision to be pixel accurate for
// large cover atlases.
layout(location = 0) in highp vec2 vUV;
layout(binding = 0) uniform sampler2D cover;
layout(location = 0) out vec4 fragColor;
void main() {
float cover = abs(texture(cover, vUV).r);
fragColor.r = cover;
}
-28
View File
@@ -1,28 +0,0 @@
#version 310 es
// SPDX-License-Identifier: Unlicense OR MIT
#extension GL_GOOGLE_include_directive : enable
precision highp float;
#include "common.h"
layout(location = 0) in vec2 pos;
layout(location = 1) in vec2 uv;
layout(binding = 0) uniform Block {
vec4 uvTransform;
vec4 subUVTransform;
} _block;
layout(location = 0) out vec2 vUV;
void main() {
vec3 p = transform3x2(fboTransform, vec3(pos, 1.0));
gl_Position = vec4(p, 1);
vec3 uv3 = transform3x2(fboTextureTransform, vec3(uv, 1.0));
vUV = uv3.xy*_block.subUVTransform.xy + _block.subUVTransform.zw;
vUV = transform3x2(fboTextureTransform, vec3(vUV, 1.0)).xy;
vUV = vUV*_block.uvTransform.xy + _block.uvTransform.zw;
}
-248
View File
@@ -1,248 +0,0 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// This is "kernel 4" in a 4-kernel pipeline. It renders the commands
// in the per-tile command list to an image.
// Right now, this kernel stores the image in a buffer, but a better
// plan is to use a texture. This is because of limited support.
#version 450
#extension GL_GOOGLE_include_directive : enable
#ifdef ENABLE_IMAGE_INDICES
#extension GL_EXT_nonuniform_qualifier : enable
#endif
#include "mem.h"
#include "setup.h"
#define CHUNK_X 2
#define CHUNK_Y 4
#define CHUNK CHUNK_X * CHUNK_Y
#define CHUNK_DX (TILE_WIDTH_PX / CHUNK_X)
#define CHUNK_DY (TILE_HEIGHT_PX / CHUNK_Y)
layout(local_size_x = CHUNK_DX, local_size_y = CHUNK_DY) in;
layout(set = 0, binding = 1) restrict readonly buffer ConfigBuf {
Config conf;
};
layout(rgba8, set = 0, binding = 2) uniform restrict writeonly image2D image;
#ifdef ENABLE_IMAGE_INDICES
layout(rgba8, set = 0, binding = 3) uniform restrict readonly image2D images[];
#else
layout(rgba8, set = 0, binding = 3) uniform restrict readonly image2D images[1];
#endif
#include "ptcl.h"
#include "tile.h"
mediump vec3 tosRGB(mediump vec3 rgb) {
bvec3 cutoff = greaterThanEqual(rgb, vec3(0.0031308));
mediump vec3 below = vec3(12.92)*rgb;
mediump vec3 above = vec3(1.055)*pow(rgb, vec3(0.41666)) - vec3(0.055);
return mix(below, above, cutoff);
}
mediump vec3 fromsRGB(mediump vec3 srgb) {
// Formula from EXT_sRGB.
bvec3 cutoff = greaterThanEqual(srgb, vec3(0.04045));
mediump vec3 below = srgb/vec3(12.92);
mediump vec3 above = pow((srgb + vec3(0.055))/vec3(1.055), vec3(2.4));
return mix(below, above, cutoff);
}
// unpacksRGB unpacks a color in the sRGB color space to a vec4 in the linear color
// space.
mediump vec4 unpacksRGB(uint srgba) {
mediump vec4 color = unpackUnorm4x8(srgba).wzyx;
return vec4(fromsRGB(color.rgb), color.a);
}
// packsRGB packs a color in the linear color space into its 8-bit sRGB equivalent.
uint packsRGB(mediump vec4 rgba) {
rgba = vec4(tosRGB(rgba.rgb), rgba.a);
return packUnorm4x8(rgba.wzyx);
}
uvec2 chunk_offset(uint i) {
return uvec2(i % CHUNK_X * CHUNK_DX, i / CHUNK_X * CHUNK_DY);
}
mediump vec4[CHUNK] fillImage(uvec2 xy, CmdImage cmd_img) {
mediump vec4 rgba[CHUNK];
for (uint i = 0; i < CHUNK; i++) {
ivec2 uv = ivec2(xy + chunk_offset(i)) + cmd_img.offset;
mediump vec4 fg_rgba;
#ifdef ENABLE_IMAGE_INDICES
fg_rgba = imageLoad(images[cmd_img.index], uv);
#else
fg_rgba = imageLoad(images[0], uv);
#endif
fg_rgba.rgb = fromsRGB(fg_rgba.rgb);
rgba[i] = fg_rgba;
}
return rgba;
}
void main() {
uint tile_ix = gl_WorkGroupID.y * conf.width_in_tiles + gl_WorkGroupID.x;
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
CmdRef cmd_ref = CmdRef(cmd_alloc.offset);
// Read scrach space allocation, written first in the command list.
Alloc scratch_alloc = alloc_read(cmd_alloc, cmd_ref.offset);
cmd_ref.offset += Alloc_size;
uvec2 xy_uint = uvec2(gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_WorkGroupID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
vec2 xy = vec2(xy_uint);
mediump vec4 rgba[CHUNK];
for (uint i = 0; i < CHUNK; i++) {
rgba[i] = vec4(0.0);
// TODO: remove this debug image support when the actual image method is plumbed.
#ifdef DEBUG_IMAGES
#ifdef ENABLE_IMAGE_INDICES
if (xy_uint.x < 1024 && xy_uint.y < 1024) {
rgba[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint + chunk_offset(i))/4);
}
#else
if (xy_uint.x < 1024 && xy_uint.y < 1024) {
rgb[i] = imageLoad(images[0], ivec2(xy_uint + chunk_offset(i))/4).rgb;
}
#endif
#endif
}
mediump float area[CHUNK];
uint clip_depth = 0;
bool mem_ok = mem_error == NO_ERROR;
while (mem_ok) {
uint tag = Cmd_tag(cmd_alloc, cmd_ref).tag;
if (tag == Cmd_End) {
break;
}
switch (tag) {
case Cmd_Stroke:
// Calculate distance field from all the line segments in this tile.
CmdStroke stroke = Cmd_Stroke_read(cmd_alloc, cmd_ref);
mediump float df[CHUNK];
for (uint k = 0; k < CHUNK; k++) df[k] = 1e9;
TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);
do {
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref);
vec2 line_vec = seg.vector;
for (uint k = 0; k < CHUNK; k++) {
vec2 dpos = xy + vec2(0.5, 0.5) - seg.origin;
dpos += vec2(chunk_offset(k));
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
df[k] = min(df[k], length(line_vec * t - dpos));
}
tile_seg_ref = seg.next;
} while (tile_seg_ref.offset != 0);
for (uint k = 0; k < CHUNK; k++) {
area[k] = clamp(stroke.half_width + 0.5 - df[k], 0.0, 1.0);
}
cmd_ref.offset += 4 + CmdStroke_size;
break;
case Cmd_Fill:
CmdFill fill = Cmd_Fill_read(cmd_alloc, cmd_ref);
for (uint k = 0; k < CHUNK; k++) area[k] = float(fill.backdrop);
tile_seg_ref = TileSegRef(fill.tile_ref);
// Calculate coverage based on backdrop + coverage of each line segment
do {
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref);
for (uint k = 0; k < CHUNK; k++) {
vec2 my_xy = xy + vec2(chunk_offset(k));
vec2 start = seg.origin - my_xy;
vec2 end = start + seg.vector;
vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
if (window.x != window.y) {
vec2 t = (window - start.y) / seg.vector.y;
vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));
float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6;
float xmax = max(xs.x, xs.y);
float b = min(xmax, 1.0);
float c = max(b, 0.0);
float d = max(xmin, 0.0);
float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
area[k] += a * (window.x - window.y);
}
area[k] += sign(seg.vector.x) * clamp(my_xy.y - seg.y_edge + 1.0, 0.0, 1.0);
}
tile_seg_ref = seg.next;
} while (tile_seg_ref.offset != 0);
for (uint k = 0; k < CHUNK; k++) {
area[k] = min(abs(area[k]), 1.0);
}
cmd_ref.offset += 4 + CmdFill_size;
break;
case Cmd_Solid:
for (uint k = 0; k < CHUNK; k++) {
area[k] = 1.0;
}
cmd_ref.offset += 4;
break;
case Cmd_Alpha:
CmdAlpha alpha = Cmd_Alpha_read(cmd_alloc, cmd_ref);
for (uint k = 0; k < CHUNK; k++) {
area[k] = alpha.alpha;
}
cmd_ref.offset += 4 + CmdAlpha_size;
break;
case Cmd_Color:
CmdColor color = Cmd_Color_read(cmd_alloc, cmd_ref);
mediump vec4 fg = unpacksRGB(color.rgba_color);
for (uint k = 0; k < CHUNK; k++) {
mediump vec4 fg_k = fg * area[k];
rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k;
}
cmd_ref.offset += 4 + CmdColor_size;
break;
case Cmd_Image:
CmdImage fill_img = Cmd_Image_read(cmd_alloc, cmd_ref);
mediump vec4 img[CHUNK] = fillImage(xy_uint, fill_img);
for (uint k = 0; k < CHUNK; k++) {
mediump vec4 fg_k = img[k] * area[k];
rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k;
}
cmd_ref.offset += 4 + CmdImage_size;
break;
case Cmd_BeginClip:
uint base_ix = (scratch_alloc.offset >> 2) + CLIP_STATE_SIZE * (clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX +
gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y);
for (uint k = 0; k < CHUNK; k++) {
uvec2 offset = chunk_offset(k);
uint srgb = packsRGB(vec4(rgba[k]));
mediump float alpha = clamp(abs(area[k]), 0.0, 1.0);
write_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), srgb);
write_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), floatBitsToUint(alpha));
rgba[k] = vec4(0.0);
}
clip_depth++;
cmd_ref.offset += 4;
break;
case Cmd_EndClip:
clip_depth--;
base_ix = (scratch_alloc.offset >> 2) + CLIP_STATE_SIZE * (clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX +
gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y);
for (uint k = 0; k < CHUNK; k++) {
uvec2 offset = chunk_offset(k);
uint srgb = read_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX));
uint alpha = read_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX));
mediump vec4 bg = unpacksRGB(srgb);
mediump vec4 fg = rgba[k] * area[k] * uintBitsToFloat(alpha);
rgba[k] = bg * (1.0 - fg.a) + fg;
}
cmd_ref.offset += 4;
break;
case Cmd_Jump:
cmd_ref = CmdRef(Cmd_Jump_read(cmd_alloc, cmd_ref).new_ref);
cmd_alloc.offset = cmd_ref.offset;
break;
}
}
for (uint i = 0; i < CHUNK; i++) {
imageStore(image, ivec2(xy_uint + chunk_offset(i)), vec4(tosRGB(rgba[i].rgb), rgba[i].a));
}
}
-32
View File
@@ -1,32 +0,0 @@
#version 310 es
// SPDX-License-Identifier: Unlicense OR MIT
precision mediump float;
layout(binding = 0) uniform sampler2D tex;
layout(location = 0) in vec2 vUV;
layout(location = 0) out vec4 fragColor;
layout(binding=0) uniform Color {
// If emulateSRGB is set (!= 0), the input texels are sRGB encoded. We save the
// conversion step below, at the cost of texture filtering in sRGB space.
float emulateSRGB;
};
vec3 RGBtosRGB(vec3 rgb) {
bvec3 cutoff = greaterThanEqual(rgb, vec3(0.0031308));
vec3 below = vec3(12.92)*rgb;
vec3 above = vec3(1.055)*pow(rgb, vec3(0.41666)) - vec3(0.055);
return mix(below, above, cutoff);
}
void main() {
vec4 texel = texture(tex, vUV);
if (emulateSRGB == 0.0) {
texel.rgb = RGBtosRGB(texel.rgb);
}
fragColor = texel;
}
-20
View File
@@ -1,20 +0,0 @@
#version 310 es
// SPDX-License-Identifier: Unlicense OR MIT
precision highp float;
layout(binding = 0) uniform Block {
vec2 scale;
vec2 pos;
} _block;
layout(location = 0) in vec2 pos;
layout(location = 1) in vec2 uv;
layout(location = 0) out vec2 vUV;
void main() {
vUV = uv;
gl_Position = vec4(pos*_block.scale + _block.pos, 0, 1);
}
-147
View File
@@ -1,147 +0,0 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
layout(set = 0, binding = 0) buffer Memory {
// offset into memory of the next allocation, initialized by the user.
uint mem_offset;
// mem_error tracks the status of memory accesses, initialized to NO_ERROR
// by the user. ERR_MALLOC_FAILED is reported for insufficient memory.
// If MEM_DEBUG is defined the following errors are reported:
// - ERR_OUT_OF_BOUNDS is reported for out of bounds writes.
// - ERR_UNALIGNED_ACCESS for memory access not aligned to 32-bit words.
uint mem_error;
uint[] memory;
};
// Uncomment this line to add the size field to Alloc and enable memory checks.
// Note that the Config struct in setup.h grows size fields as well.
//#define MEM_DEBUG
#define NO_ERROR 0
#define ERR_MALLOC_FAILED 1
#define ERR_OUT_OF_BOUNDS 2
#define ERR_UNALIGNED_ACCESS 3
#ifdef MEM_DEBUG
#define Alloc_size 16
#else
#define Alloc_size 8
#endif
// Alloc represents a memory allocation.
struct Alloc {
// offset in bytes into memory.
uint offset;
#ifdef MEM_DEBUG
// size in bytes of the allocation.
uint size;
#endif
};
struct MallocResult {
Alloc alloc;
// failed is true if the allocation overflowed memory.
bool failed;
};
// new_alloc synthesizes an Alloc from an offset and size.
Alloc new_alloc(uint offset, uint size, bool mem_ok) {
Alloc a;
a.offset = offset;
#ifdef MEM_DEBUG
if (mem_ok) {
a.size = size;
} else {
a.size = 0;
}
#endif
return a;
}
// malloc allocates size bytes of memory.
MallocResult malloc(uint size) {
MallocResult r;
uint offset = atomicAdd(mem_offset, size);
r.failed = offset + size > memory.length() * 4;
r.alloc = new_alloc(offset, size, !r.failed);
if (r.failed) {
atomicMax(mem_error, ERR_MALLOC_FAILED);
return r;
}
#ifdef MEM_DEBUG
if ((size & 3) != 0) {
r.failed = true;
atomicMax(mem_error, ERR_UNALIGNED_ACCESS);
return r;
}
#endif
return r;
}
// touch_mem checks whether access to the memory word at offset is valid.
// If MEM_DEBUG is defined, touch_mem returns false if offset is out of bounds.
// Offset is in words.
bool touch_mem(Alloc alloc, uint offset) {
#ifdef MEM_DEBUG
if (offset < alloc.offset/4 || offset >= (alloc.offset + alloc.size)/4) {
atomicMax(mem_error, ERR_OUT_OF_BOUNDS);
return false;
}
#endif
return true;
}
// write_mem writes val to memory at offset.
// Offset is in words.
void write_mem(Alloc alloc, uint offset, uint val) {
if (!touch_mem(alloc, offset)) {
return;
}
memory[offset] = val;
}
// read_mem reads the value from memory at offset.
// Offset is in words.
uint read_mem(Alloc alloc, uint offset) {
if (!touch_mem(alloc, offset)) {
return 0;
}
uint v = memory[offset];
return v;
}
// slice_mem returns a sub-allocation inside another. Offset and size are in
// bytes, relative to a.offset.
Alloc slice_mem(Alloc a, uint offset, uint size) {
#ifdef MEM_DEBUG
if ((offset & 3) != 0 || (size & 3) != 0) {
atomicMax(mem_error, ERR_UNALIGNED_ACCESS);
return Alloc(0, 0);
}
if (offset + size > a.size) {
// slice_mem is sometimes used for slices outside bounds,
// but never written.
return Alloc(0, 0);
}
return Alloc(a.offset + offset, size);
#else
return Alloc(a.offset + offset);
#endif
}
// alloc_write writes alloc to memory at offset bytes.
void alloc_write(Alloc a, uint offset, Alloc alloc) {
write_mem(a, offset >> 2, alloc.offset);
#ifdef MEM_DEBUG
write_mem(a, (offset >> 2) + 1, alloc.size);
#endif
}
// alloc_read reads an Alloc from memory at offset bytes.
Alloc alloc_read(Alloc a, uint offset) {
Alloc alloc;
alloc.offset = read_mem(a, offset >> 2);
#ifdef MEM_DEBUG
alloc.size = read_mem(a, (offset >> 2) + 1);
#endif
return alloc;
}
-294
View File
@@ -1,294 +0,0 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// Coarse rasterization of path segments.
// Allocation and initialization of tiles for paths.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
#define LG_COARSE_WG 5
#define COARSE_WG (1 << LG_COARSE_WG)
layout(local_size_x = COARSE_WG, local_size_y = 1) in;
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
Config conf;
};
#include "pathseg.h"
#include "tile.h"
// scale factors useful for converting coordinates to tiles
#define SX (1.0 / float(TILE_WIDTH_PX))
#define SY (1.0 / float(TILE_HEIGHT_PX))
#define ACCURACY 0.25
#define Q_ACCURACY (ACCURACY * 0.1)
#define REM_ACCURACY (ACCURACY - Q_ACCURACY)
#define MAX_HYPOT2 (432.0 * Q_ACCURACY * Q_ACCURACY)
#define MAX_QUADS 16
vec2 eval_quad(vec2 p0, vec2 p1, vec2 p2, float t) {
float mt = 1.0 - t;
return p0 * (mt * mt) + (p1 * (mt * 2.0) + p2 * t) * t;
}
vec2 eval_cubic(vec2 p0, vec2 p1, vec2 p2, vec2 p3, float t) {
float mt = 1.0 - t;
return p0 * (mt * mt * mt) + (p1 * (mt * mt * 3.0) + (p2 * (mt * 3.0) + p3 * t) * t) * t;
}
struct SubdivResult {
float val;
float a0;
float a2;
};
/// An approximation to $\int (1 + 4x^2) ^ -0.25 dx$
///
/// This is used for flattening curves.
#define D 0.67
float approx_parabola_integral(float x) {
return x * inversesqrt(sqrt(1.0 - D + (D * D * D * D + 0.25 * x * x)));
}
/// An approximation to the inverse parabola integral.
#define B 0.39
float approx_parabola_inv_integral(float x) {
return x * sqrt(1.0 - B + (B * B + 0.25 * x * x));
}
SubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol) {
vec2 d01 = p1 - p0;
vec2 d12 = p2 - p1;
vec2 dd = d01 - d12;
float cross = (p2.x - p0.x) * dd.y - (p2.y - p0.y) * dd.x;
float x0 = (d01.x * dd.x + d01.y * dd.y) / cross;
float x2 = (d12.x * dd.x + d12.y * dd.y) / cross;
float scale = abs(cross / (length(dd) * (x2 - x0)));
float a0 = approx_parabola_integral(x0);
float a2 = approx_parabola_integral(x2);
float val = 0.0;
if (scale < 1e9) {
float da = abs(a2 - a0);
float sqrt_scale = sqrt(scale);
if (sign(x0) == sign(x2)) {
val = da * sqrt_scale;
} else {
float xmin = sqrt_tol / sqrt_scale;
val = sqrt_tol * da / approx_parabola_integral(xmin);
}
}
return SubdivResult(val, a0, a2);
}
void main() {
uint element_ix = gl_GlobalInvocationID.x;
PathSegRef ref = PathSegRef(conf.pathseg_alloc.offset + element_ix * PathSeg_size);
PathSegTag tag = PathSegTag(PathSeg_Nop, 0);
if (element_ix < conf.n_pathseg) {
tag = PathSeg_tag(conf.pathseg_alloc, ref);
}
bool mem_ok = mem_error == NO_ERROR;
switch (tag.tag) {
case PathSeg_Cubic:
PathCubic cubic = PathSeg_Cubic_read(conf.pathseg_alloc, ref);
uint trans_ix = cubic.trans_ix;
if (trans_ix > 0) {
TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (trans_ix - 1) * TransformSeg_size);
TransformSeg trans = TransformSeg_read(conf.trans_alloc, trans_ref);
cubic.p0 = trans.mat.xy * cubic.p0.x + trans.mat.zw * cubic.p0.y + trans.translate;
cubic.p1 = trans.mat.xy * cubic.p1.x + trans.mat.zw * cubic.p1.y + trans.translate;
cubic.p2 = trans.mat.xy * cubic.p2.x + trans.mat.zw * cubic.p2.y + trans.translate;
cubic.p3 = trans.mat.xy * cubic.p3.x + trans.mat.zw * cubic.p3.y + trans.translate;
}
vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
float err = err_v.x * err_v.x + err_v.y * err_v.y;
// The number of quadratics.
uint n_quads = max(uint(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0))), 1);
n_quads = min(n_quads, MAX_QUADS);
SubdivResult keep_params[MAX_QUADS];
// Iterate over quadratics and tote up the estimated number of segments.
float val = 0.0;
vec2 qp0 = cubic.p0;
float step = 1.0 / float(n_quads);
for (uint i = 0; i < n_quads; i++) {
float t = float(i + 1) * step;
vec2 qp2 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t);
vec2 qp1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step);
qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2);
SubdivResult params = estimate_subdiv(qp0, qp1, qp2, sqrt(REM_ACCURACY));
keep_params[i] = params;
val += params.val;
qp0 = qp2;
}
uint n = max(uint(ceil(val * 0.5 / sqrt(REM_ACCURACY))), 1);
bool is_stroke = fill_mode_from_flags(tag.flags) == MODE_STROKE;
uint path_ix = cubic.path_ix;
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
ivec4 bbox = ivec4(path.bbox);
vec2 p0 = cubic.p0;
qp0 = cubic.p0;
float v_step = val / float(n);
int n_out = 1;
float val_sum = 0.0;
for (uint i = 0; i < n_quads; i++) {
float t = float(i + 1) * step;
vec2 qp2 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t);
vec2 qp1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step);
qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2);
SubdivResult params = keep_params[i];
float u0 = approx_parabola_inv_integral(params.a0);
float u2 = approx_parabola_inv_integral(params.a2);
float uscale = 1.0 / (u2 - u0);
float target = float(n_out) * v_step;
while (n_out == n || target < val_sum + params.val) {
vec2 p1;
if (n_out == n) {
p1 = cubic.p3;
} else {
float u = (target - val_sum) / params.val;
float a = mix(params.a0, params.a2, u);
float au = approx_parabola_inv_integral(a);
float t = (au - u0) * uscale;
p1 = eval_quad(qp0, qp1, qp2, t);
}
// Output line segment
// Bounding box of element in pixel coordinates.
float xmin = min(p0.x, p1.x) - cubic.stroke.x;
float xmax = max(p0.x, p1.x) + cubic.stroke.x;
float ymin = min(p0.y, p1.y) - cubic.stroke.y;
float ymax = max(p0.y, p1.y) + cubic.stroke.y;
float dx = p1.x - p0.x;
float dy = p1.y - p0.y;
// Set up for per-scanline coverage formula, below.
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
float c = (cubic.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + cubic.stroke.y)) * SX;
float b = invslope; // Note: assumes square tiles, otherwise scale.
float a = (p0.x - (p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;
int x0 = int(floor(xmin * SX));
int x1 = int(floor(xmax * SX) + 1);
int y0 = int(floor(ymin * SY));
int y1 = int(floor(ymax * SY) + 1);
x0 = clamp(x0, bbox.x, bbox.z);
y0 = clamp(y0, bbox.y, bbox.w);
x1 = clamp(x1, bbox.x, bbox.z);
y1 = clamp(y1, bbox.y, bbox.w);
float xc = a + b * float(y0);
int stride = bbox.z - bbox.x;
int base = (y0 - bbox.y) * stride - bbox.x;
// TODO: can be tighter, use c to bound width
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
// Consider using subgroups to aggregate atomic add.
MallocResult tile_alloc = malloc(n_tile_alloc * TileSeg_size);
if (tile_alloc.failed || !mem_ok) {
return;
}
uint tile_offset = tile_alloc.alloc.offset;
TileSeg tile_seg;
int xray = int(floor(p0.x*SX));
int last_xray = int(floor(p1.x*SX));
if (p0.y > p1.y) {
int tmp = xray;
xray = last_xray;
last_xray = tmp;
}
for (int y = y0; y < y1; y++) {
float tile_y0 = float(y * TILE_HEIGHT_PX);
int xbackdrop = max(xray + 1, bbox.x);
if (!is_stroke && min(p0.y, p1.y) < tile_y0 && xbackdrop < bbox.z) {
int backdrop = p1.y < p0.y ? 1 : -1;
TileRef tile_ref = Tile_index(path.tiles, uint(base + xbackdrop));
uint tile_el = tile_ref.offset >> 2;
if (touch_mem(path_alloc, tile_el + 1)) {
atomicAdd(memory[tile_el + 1], backdrop);
}
}
// next_xray is the xray for the next scanline; the line segment intersects
// all tiles between xray and next_xray.
int next_xray = last_xray;
if (y < y1 - 1) {
float tile_y1 = float((y + 1) * TILE_HEIGHT_PX);
float x_edge = mix(p0.x, p1.x, (tile_y1 - p0.y) / dy);
next_xray = int(floor(x_edge*SX));
}
int min_xray = min(xray, next_xray);
int max_xray = max(xray, next_xray);
int xx0 = min(int(floor(xc - c)), min_xray);
int xx1 = max(int(ceil(xc + c)), max_xray + 1);
xx0 = clamp(xx0, x0, x1);
xx1 = clamp(xx1, x0, x1);
for (int x = xx0; x < xx1; x++) {
float tile_x0 = float(x * TILE_WIDTH_PX);
TileRef tile_ref = Tile_index(TileRef(path.tiles.offset), uint(base + x));
uint tile_el = tile_ref.offset >> 2;
uint old = 0;
if (touch_mem(path_alloc, tile_el)) {
old = atomicExchange(memory[tile_el], tile_offset);
}
tile_seg.origin = p0;
tile_seg.vector = p1 - p0;
float y_edge = 0.0;
if (!is_stroke) {
y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx);
if (min(p0.x, p1.x) < tile_x0) {
vec2 p = vec2(tile_x0, y_edge);
if (p0.x > p1.x) {
tile_seg.vector = p - p0;
} else {
tile_seg.origin = p;
tile_seg.vector = p1 - p;
}
// kernel4 uses sign(vector.x) for the sign of the intersection backdrop.
// Nudge zeroes towards the intended sign.
if (tile_seg.vector.x == 0) {
tile_seg.vector.x = sign(p1.x - p0.x)*1e-9;
}
}
if (x <= min_xray || max_xray < x) {
// Reject inconsistent intersections.
y_edge = 1e9;
}
}
tile_seg.y_edge = y_edge;
tile_seg.next.offset = old;
TileSeg_write(tile_alloc.alloc, TileSegRef(tile_offset), tile_seg);
tile_offset += TileSeg_size;
}
xc += b;
base += stride;
xray = next_xray;
}
n_out += 1;
target += v_step;
p0 = p1;
}
val_sum += params.val;
qp0 = qp2;
}
break;
}
}
-100
View File
@@ -1,100 +0,0 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// Code auto-generated by piet-gpu-derive
struct PathCubicRef {
uint offset;
};
struct PathSegRef {
uint offset;
};
struct PathCubic {
vec2 p0;
vec2 p1;
vec2 p2;
vec2 p3;
uint path_ix;
uint trans_ix;
vec2 stroke;
};
#define PathCubic_size 48
PathCubicRef PathCubic_index(PathCubicRef ref, uint index) {
return PathCubicRef(ref.offset + index * PathCubic_size);
}
#define PathSeg_Nop 0
#define PathSeg_Cubic 1
#define PathSeg_size 52
PathSegRef PathSeg_index(PathSegRef ref, uint index) {
return PathSegRef(ref.offset + index * PathSeg_size);
}
struct PathSegTag {
uint tag;
uint flags;
};
PathCubic PathCubic_read(Alloc a, PathCubicRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
uint raw4 = read_mem(a, ix + 4);
uint raw5 = read_mem(a, ix + 5);
uint raw6 = read_mem(a, ix + 6);
uint raw7 = read_mem(a, ix + 7);
uint raw8 = read_mem(a, ix + 8);
uint raw9 = read_mem(a, ix + 9);
uint raw10 = read_mem(a, ix + 10);
uint raw11 = read_mem(a, ix + 11);
PathCubic s;
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));
s.path_ix = raw8;
s.trans_ix = raw9;
s.stroke = vec2(uintBitsToFloat(raw10), uintBitsToFloat(raw11));
return s;
}
void PathCubic_write(Alloc a, PathCubicRef ref, PathCubic s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, floatBitsToUint(s.p0.x));
write_mem(a, ix + 1, floatBitsToUint(s.p0.y));
write_mem(a, ix + 2, floatBitsToUint(s.p1.x));
write_mem(a, ix + 3, floatBitsToUint(s.p1.y));
write_mem(a, ix + 4, floatBitsToUint(s.p2.x));
write_mem(a, ix + 5, floatBitsToUint(s.p2.y));
write_mem(a, ix + 6, floatBitsToUint(s.p3.x));
write_mem(a, ix + 7, floatBitsToUint(s.p3.y));
write_mem(a, ix + 8, s.path_ix);
write_mem(a, ix + 9, s.trans_ix);
write_mem(a, ix + 10, floatBitsToUint(s.stroke.x));
write_mem(a, ix + 11, floatBitsToUint(s.stroke.y));
}
PathSegTag PathSeg_tag(Alloc a, PathSegRef ref) {
uint tag_and_flags = read_mem(a, ref.offset >> 2);
return PathSegTag(tag_and_flags & 0xffff, tag_and_flags >> 16);
}
PathCubic PathSeg_Cubic_read(Alloc a, PathSegRef ref) {
return PathCubic_read(a, PathCubicRef(ref.offset + 4));
}
void PathSeg_Nop_write(Alloc a, PathSegRef ref) {
write_mem(a, ref.offset >> 2, PathSeg_Nop);
}
void PathSeg_Cubic_write(Alloc a, PathSegRef ref, uint flags, PathCubic s) {
write_mem(a, ref.offset >> 2, (flags << 16) | PathSeg_Cubic);
PathCubic_write(a, PathCubicRef(ref.offset + 4), s);
}
-278
View File
@@ -1,278 +0,0 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// Code auto-generated by piet-gpu-derive
struct CmdStrokeRef {
uint offset;
};
struct CmdFillRef {
uint offset;
};
struct CmdColorRef {
uint offset;
};
struct CmdImageRef {
uint offset;
};
struct CmdAlphaRef {
uint offset;
};
struct CmdJumpRef {
uint offset;
};
struct CmdRef {
uint offset;
};
struct CmdStroke {
uint tile_ref;
float half_width;
};
#define CmdStroke_size 8
CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
return CmdStrokeRef(ref.offset + index * CmdStroke_size);
}
struct CmdFill {
uint tile_ref;
int backdrop;
};
#define CmdFill_size 8
CmdFillRef CmdFill_index(CmdFillRef ref, uint index) {
return CmdFillRef(ref.offset + index * CmdFill_size);
}
struct CmdColor {
uint rgba_color;
};
#define CmdColor_size 4
CmdColorRef CmdColor_index(CmdColorRef ref, uint index) {
return CmdColorRef(ref.offset + index * CmdColor_size);
}
struct CmdImage {
uint index;
ivec2 offset;
};
#define CmdImage_size 8
CmdImageRef CmdImage_index(CmdImageRef ref, uint index) {
return CmdImageRef(ref.offset + index * CmdImage_size);
}
struct CmdAlpha {
float alpha;
};
#define CmdAlpha_size 4
CmdAlphaRef CmdAlpha_index(CmdAlphaRef ref, uint index) {
return CmdAlphaRef(ref.offset + index * CmdAlpha_size);
}
struct CmdJump {
uint new_ref;
};
#define CmdJump_size 4
CmdJumpRef CmdJump_index(CmdJumpRef ref, uint index) {
return CmdJumpRef(ref.offset + index * CmdJump_size);
}
#define Cmd_End 0
#define Cmd_Fill 1
#define Cmd_Stroke 2
#define Cmd_Solid 3
#define Cmd_Alpha 4
#define Cmd_Color 5
#define Cmd_Image 6
#define Cmd_BeginClip 7
#define Cmd_EndClip 8
#define Cmd_Jump 9
#define Cmd_size 12
CmdRef Cmd_index(CmdRef ref, uint index) {
return CmdRef(ref.offset + index * Cmd_size);
}
struct CmdTag {
uint tag;
uint flags;
};
CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
CmdStroke s;
s.tile_ref = raw0;
s.half_width = uintBitsToFloat(raw1);
return s;
}
void CmdStroke_write(Alloc a, CmdStrokeRef ref, CmdStroke s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, s.tile_ref);
write_mem(a, ix + 1, floatBitsToUint(s.half_width));
}
CmdFill CmdFill_read(Alloc a, CmdFillRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
CmdFill s;
s.tile_ref = raw0;
s.backdrop = int(raw1);
return s;
}
void CmdFill_write(Alloc a, CmdFillRef ref, CmdFill s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, s.tile_ref);
write_mem(a, ix + 1, uint(s.backdrop));
}
CmdColor CmdColor_read(Alloc a, CmdColorRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
CmdColor s;
s.rgba_color = raw0;
return s;
}
void CmdColor_write(Alloc a, CmdColorRef ref, CmdColor s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, s.rgba_color);
}
CmdImage CmdImage_read(Alloc a, CmdImageRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
CmdImage s;
s.index = raw0;
s.offset = ivec2(int(raw1 << 16) >> 16, int(raw1) >> 16);
return s;
}
void CmdImage_write(Alloc a, CmdImageRef ref, CmdImage s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, s.index);
write_mem(a, ix + 1, (uint(s.offset.x) & 0xffff) | (uint(s.offset.y) << 16));
}
CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
CmdAlpha s;
s.alpha = uintBitsToFloat(raw0);
return s;
}
void CmdAlpha_write(Alloc a, CmdAlphaRef ref, CmdAlpha s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, floatBitsToUint(s.alpha));
}
CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
CmdJump s;
s.new_ref = raw0;
return s;
}
void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, s.new_ref);
}
CmdTag Cmd_tag(Alloc a, CmdRef ref) {
uint tag_and_flags = read_mem(a, ref.offset >> 2);
return CmdTag(tag_and_flags & 0xffff, tag_and_flags >> 16);
}
CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) {
return CmdFill_read(a, CmdFillRef(ref.offset + 4));
}
CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) {
return CmdStroke_read(a, CmdStrokeRef(ref.offset + 4));
}
CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) {
return CmdAlpha_read(a, CmdAlphaRef(ref.offset + 4));
}
CmdColor Cmd_Color_read(Alloc a, CmdRef ref) {
return CmdColor_read(a, CmdColorRef(ref.offset + 4));
}
CmdImage Cmd_Image_read(Alloc a, CmdRef ref) {
return CmdImage_read(a, CmdImageRef(ref.offset + 4));
}
CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) {
return CmdJump_read(a, CmdJumpRef(ref.offset + 4));
}
void Cmd_End_write(Alloc a, CmdRef ref) {
write_mem(a, ref.offset >> 2, Cmd_End);
}
void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s) {
write_mem(a, ref.offset >> 2, Cmd_Fill);
CmdFill_write(a, CmdFillRef(ref.offset + 4), s);
}
void Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s) {
write_mem(a, ref.offset >> 2, Cmd_Stroke);
CmdStroke_write(a, CmdStrokeRef(ref.offset + 4), s);
}
void Cmd_Solid_write(Alloc a, CmdRef ref) {
write_mem(a, ref.offset >> 2, Cmd_Solid);
}
void Cmd_Alpha_write(Alloc a, CmdRef ref, CmdAlpha s) {
write_mem(a, ref.offset >> 2, Cmd_Alpha);
CmdAlpha_write(a, CmdAlphaRef(ref.offset + 4), s);
}
void Cmd_Color_write(Alloc a, CmdRef ref, CmdColor s) {
write_mem(a, ref.offset >> 2, Cmd_Color);
CmdColor_write(a, CmdColorRef(ref.offset + 4), s);
}
void Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s) {
write_mem(a, ref.offset >> 2, Cmd_Image);
CmdImage_write(a, CmdImageRef(ref.offset + 4), s);
}
void Cmd_BeginClip_write(Alloc a, CmdRef ref) {
write_mem(a, ref.offset >> 2, Cmd_BeginClip);
}
void Cmd_EndClip_write(Alloc a, CmdRef ref) {
write_mem(a, ref.offset >> 2, Cmd_EndClip);
}
void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s) {
write_mem(a, ref.offset >> 2, Cmd_Jump);
CmdJump_write(a, CmdJumpRef(ref.offset + 4), s);
}
-313
View File
@@ -1,313 +0,0 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// Code auto-generated by piet-gpu-derive
struct LineSegRef {
uint offset;
};
struct QuadSegRef {
uint offset;
};
struct CubicSegRef {
uint offset;
};
struct FillColorRef {
uint offset;
};
struct FillImageRef {
uint offset;
};
struct SetLineWidthRef {
uint offset;
};
struct TransformRef {
uint offset;
};
struct ClipRef {
uint offset;
};
struct SetFillModeRef {
uint offset;
};
struct ElementRef {
uint offset;
};
struct LineSeg {
vec2 p0;
vec2 p1;
};
#define LineSeg_size 16
LineSegRef LineSeg_index(LineSegRef ref, uint index) {
return LineSegRef(ref.offset + index * LineSeg_size);
}
struct QuadSeg {
vec2 p0;
vec2 p1;
vec2 p2;
};
#define QuadSeg_size 24
QuadSegRef QuadSeg_index(QuadSegRef ref, uint index) {
return QuadSegRef(ref.offset + index * QuadSeg_size);
}
struct CubicSeg {
vec2 p0;
vec2 p1;
vec2 p2;
vec2 p3;
};
#define CubicSeg_size 32
CubicSegRef CubicSeg_index(CubicSegRef ref, uint index) {
return CubicSegRef(ref.offset + index * CubicSeg_size);
}
struct FillColor {
uint rgba_color;
};
#define FillColor_size 4
FillColorRef FillColor_index(FillColorRef ref, uint index) {
return FillColorRef(ref.offset + index * FillColor_size);
}
struct FillImage {
uint index;
ivec2 offset;
};
#define FillImage_size 8
FillImageRef FillImage_index(FillImageRef ref, uint index) {
return FillImageRef(ref.offset + index * FillImage_size);
}
struct SetLineWidth {
float width;
};
#define SetLineWidth_size 4
SetLineWidthRef SetLineWidth_index(SetLineWidthRef ref, uint index) {
return SetLineWidthRef(ref.offset + index * SetLineWidth_size);
}
struct Transform {
vec4 mat;
vec2 translate;
};
#define Transform_size 24
TransformRef Transform_index(TransformRef ref, uint index) {
return TransformRef(ref.offset + index * Transform_size);
}
struct Clip {
vec4 bbox;
};
#define Clip_size 16
ClipRef Clip_index(ClipRef ref, uint index) {
return ClipRef(ref.offset + index * Clip_size);
}
struct SetFillMode {
uint fill_mode;
};
#define SetFillMode_size 4
SetFillModeRef SetFillMode_index(SetFillModeRef ref, uint index) {
return SetFillModeRef(ref.offset + index * SetFillMode_size);
}
#define Element_Nop 0
#define Element_Line 1
#define Element_Quad 2
#define Element_Cubic 3
#define Element_FillColor 4
#define Element_SetLineWidth 5
#define Element_Transform 6
#define Element_BeginClip 7
#define Element_EndClip 8
#define Element_FillImage 9
#define Element_SetFillMode 10
#define Element_size 36
ElementRef Element_index(ElementRef ref, uint index) {
return ElementRef(ref.offset + index * Element_size);
}
struct ElementTag {
uint tag;
uint flags;
};
LineSeg LineSeg_read(LineSegRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = scene[ix + 0];
uint raw1 = scene[ix + 1];
uint raw2 = scene[ix + 2];
uint raw3 = scene[ix + 3];
LineSeg s;
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
return s;
}
QuadSeg QuadSeg_read(QuadSegRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = scene[ix + 0];
uint raw1 = scene[ix + 1];
uint raw2 = scene[ix + 2];
uint raw3 = scene[ix + 3];
uint raw4 = scene[ix + 4];
uint raw5 = scene[ix + 5];
QuadSeg s;
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
return s;
}
CubicSeg CubicSeg_read(CubicSegRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = scene[ix + 0];
uint raw1 = scene[ix + 1];
uint raw2 = scene[ix + 2];
uint raw3 = scene[ix + 3];
uint raw4 = scene[ix + 4];
uint raw5 = scene[ix + 5];
uint raw6 = scene[ix + 6];
uint raw7 = scene[ix + 7];
CubicSeg s;
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));
return s;
}
FillColor FillColor_read(FillColorRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = scene[ix + 0];
FillColor s;
s.rgba_color = raw0;
return s;
}
FillImage FillImage_read(FillImageRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = scene[ix + 0];
uint raw1 = scene[ix + 1];
FillImage s;
s.index = raw0;
s.offset = ivec2(int(raw1 << 16) >> 16, int(raw1) >> 16);
return s;
}
SetLineWidth SetLineWidth_read(SetLineWidthRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = scene[ix + 0];
SetLineWidth s;
s.width = uintBitsToFloat(raw0);
return s;
}
Transform Transform_read(TransformRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = scene[ix + 0];
uint raw1 = scene[ix + 1];
uint raw2 = scene[ix + 2];
uint raw3 = scene[ix + 3];
uint raw4 = scene[ix + 4];
uint raw5 = scene[ix + 5];
Transform s;
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
return s;
}
Clip Clip_read(ClipRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = scene[ix + 0];
uint raw1 = scene[ix + 1];
uint raw2 = scene[ix + 2];
uint raw3 = scene[ix + 3];
Clip s;
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
return s;
}
SetFillMode SetFillMode_read(SetFillModeRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = scene[ix + 0];
SetFillMode s;
s.fill_mode = raw0;
return s;
}
ElementTag Element_tag(ElementRef ref) {
uint tag_and_flags = scene[ref.offset >> 2];
return ElementTag(tag_and_flags & 0xffff, tag_and_flags >> 16);
}
LineSeg Element_Line_read(ElementRef ref) {
return LineSeg_read(LineSegRef(ref.offset + 4));
}
QuadSeg Element_Quad_read(ElementRef ref) {
return QuadSeg_read(QuadSegRef(ref.offset + 4));
}
CubicSeg Element_Cubic_read(ElementRef ref) {
return CubicSeg_read(CubicSegRef(ref.offset + 4));
}
FillColor Element_FillColor_read(ElementRef ref) {
return FillColor_read(FillColorRef(ref.offset + 4));
}
SetLineWidth Element_SetLineWidth_read(ElementRef ref) {
return SetLineWidth_read(SetLineWidthRef(ref.offset + 4));
}
Transform Element_Transform_read(ElementRef ref) {
return Transform_read(TransformRef(ref.offset + 4));
}
Clip Element_BeginClip_read(ElementRef ref) {
return Clip_read(ClipRef(ref.offset + 4));
}
Clip Element_EndClip_read(ElementRef ref) {
return Clip_read(ClipRef(ref.offset + 4));
}
FillImage Element_FillImage_read(ElementRef ref) {
return FillImage_read(FillImageRef(ref.offset + 4));
}
SetFillMode Element_SetFillMode_read(ElementRef ref) {
return SetFillMode_read(SetFillModeRef(ref.offset + 4));
}
-51
View File
@@ -1,51 +0,0 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// Various constants for the sizes of groups and tiles.
// Much of this will be made dynamic in various ways, but for now it's easiest
// to hardcode and keep all in one place.
// A LG_WG_FACTOR of n scales workgroup sizes by 2^n. Use 0 for a
// maximum workgroup size of 128, or 1 for a maximum size of 256.
#define LG_WG_FACTOR 0
#define WG_FACTOR (1<<LG_WG_FACTOR)
#define TILE_WIDTH_PX 32
#define TILE_HEIGHT_PX 32
#define PTCL_INITIAL_ALLOC 1024
// These should probably be renamed and/or reworked. In the binning
// kernel, they represent the number of bins. Also, the workgroup size
// of that kernel is equal to the number of bins, but should probably
// be more flexible (it's 512 in the K&L paper).
#define N_TILE_X 16
#define N_TILE_Y (8 * WG_FACTOR)
#define N_TILE (N_TILE_X * N_TILE_Y)
#define LG_N_TILE (7 + LG_WG_FACTOR)
#define N_SLICE (N_TILE / 32)
struct Config {
uint n_elements; // paths
uint n_pathseg;
uint width_in_tiles;
uint height_in_tiles;
Alloc tile_alloc;
Alloc bin_alloc;
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
};
// Fill modes.
#define MODE_NONZERO 0
#define MODE_STROKE 1
// Size of kernel4 clip state, in words.
#define CLIP_STATE_SIZE 2
// fill_mode_from_flags extracts the fill mode from tag flags.
uint fill_mode_from_flags(uint flags) {
return flags & 0x1;
}
-73
View File
@@ -1,73 +0,0 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// Code auto-generated by piet-gpu-derive
struct StateRef {
uint offset;
};
struct State {
vec4 mat;
vec2 translate;
vec4 bbox;
float linewidth;
uint flags;
uint path_count;
uint pathseg_count;
uint trans_count;
};
#define State_size 60
StateRef State_index(StateRef ref, uint index) {
return StateRef(ref.offset + index * State_size);
}
State State_read(StateRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = state[ix + 0];
uint raw1 = state[ix + 1];
uint raw2 = state[ix + 2];
uint raw3 = state[ix + 3];
uint raw4 = state[ix + 4];
uint raw5 = state[ix + 5];
uint raw6 = state[ix + 6];
uint raw7 = state[ix + 7];
uint raw8 = state[ix + 8];
uint raw9 = state[ix + 9];
uint raw10 = state[ix + 10];
uint raw11 = state[ix + 11];
uint raw12 = state[ix + 12];
uint raw13 = state[ix + 13];
uint raw14 = state[ix + 14];
State s;
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
s.bbox = vec4(uintBitsToFloat(raw6), uintBitsToFloat(raw7), uintBitsToFloat(raw8), uintBitsToFloat(raw9));
s.linewidth = uintBitsToFloat(raw10);
s.flags = raw11;
s.path_count = raw12;
s.pathseg_count = raw13;
s.trans_count = raw14;
return s;
}
void State_write(StateRef ref, State s) {
uint ix = ref.offset >> 2;
state[ix + 0] = floatBitsToUint(s.mat.x);
state[ix + 1] = floatBitsToUint(s.mat.y);
state[ix + 2] = floatBitsToUint(s.mat.z);
state[ix + 3] = floatBitsToUint(s.mat.w);
state[ix + 4] = floatBitsToUint(s.translate.x);
state[ix + 5] = floatBitsToUint(s.translate.y);
state[ix + 6] = floatBitsToUint(s.bbox.x);
state[ix + 7] = floatBitsToUint(s.bbox.y);
state[ix + 8] = floatBitsToUint(s.bbox.z);
state[ix + 9] = floatBitsToUint(s.bbox.w);
state[ix + 10] = floatBitsToUint(s.linewidth);
state[ix + 11] = s.flags;
state[ix + 12] = s.path_count;
state[ix + 13] = s.pathseg_count;
state[ix + 14] = s.trans_count;
}
-81
View File
@@ -1,81 +0,0 @@
#version 310 es
// SPDX-License-Identifier: Unlicense OR MIT
precision mediump float;
layout(location=0) in vec2 vFrom;
layout(location=1) in vec2 vCtrl;
layout(location=2) in vec2 vTo;
layout(location = 0) out vec4 fragCover;
void main() {
float dx = vTo.x - vFrom.x;
// Sort from and to in increasing order so the root below
// is always the positive square root, if any.
// We need the direction of the curve below, so this can't be
// done from the vertex shader.
bool increasing = vTo.x >= vFrom.x;
vec2 left = increasing ? vFrom : vTo;
vec2 right = increasing ? vTo : vFrom;
// The signed horizontal extent of the fragment.
vec2 extent = clamp(vec2(vFrom.x, vTo.x), -0.5, 0.5);
// Find the t where the curve crosses the middle of the
// extent, x₀.
// Given the Bézier curve with x coordinates P₀, P₁, P₂
// where P₀ is at the origin, its x coordinate in t
// is given by:
//
// x(t) = 2(1-t)tP₁ + t²P₂
//
// Rearranging:
//
// x(t) = (P₂ - 2P₁)t² + 2P₁t
//
// Setting x(t) = x₀ and using Muller's quadratic formula ("Citardauq")
// for robustnesss,
//
// t = 2x₀/(2P₁±√(4P₁²+4(P₂-2P₁)x₀))
//
// which simplifies to
//
// t = x₀/(P₁±√(P₁²+(P₂-2P₁)x₀))
//
// Setting v = P₂-P₁,
//
// t = x₀/(P₁±√(P₁²+(v-P₁)x₀))
//
// t lie in [0; 1]; P₂ ≥ P₁ and P₁ ≥ 0 since we split curves where
// the control point lies before the start point or after the end point.
// It can then be shown that only the positive square root is valid.
float midx = mix(extent.x, extent.y, 0.5);
float x0 = midx - left.x;
vec2 p1 = vCtrl - left;
vec2 v = right - vCtrl;
float t = x0/(p1.x+sqrt(p1.x*p1.x+(v.x-p1.x)*x0));
// Find y(t) on the curve.
float y = mix(mix(left.y, vCtrl.y, t), mix(vCtrl.y, right.y, t), t);
// And the slope.
vec2 d_half = mix(p1, v, t);
float dy = d_half.y/d_half.x;
// Together, y and dy form a line approximation.
// Compute the fragment area above the line.
// The area is symmetric around dy = 0. Scale slope with extent width.
float width = extent.y - extent.x;
dy = abs(dy*width);
vec4 sides = vec4(dy*+0.5 + y, dy*-0.5 + y, (+0.5-y)/dy, (-0.5-y)/dy);
sides = clamp(sides+0.5, 0.0, 1.0);
float area = 0.5*(sides.z - sides.z*sides.y + 1.0 - sides.x+sides.x*sides.w);
area *= width;
// Work around issue #13.
if (width == 0.0)
area = 0.0;
fragCover.r = area;
}
-53
View File
@@ -1,53 +0,0 @@
#version 310 es
// SPDX-License-Identifier: Unlicense OR MIT
precision highp float;
layout(binding = 0) uniform Block {
vec4 transform;
vec2 pathOffset;
} _block;
layout(location=0) in float corner;
layout(location=1) in float maxy;
layout(location=2) in vec2 from;
layout(location=3) in vec2 ctrl;
layout(location=4) in vec2 to;
layout(location=0) out vec2 vFrom;
layout(location=1) out vec2 vCtrl;
layout(location=2) out vec2 vTo;
void main() {
// Add a one pixel overlap so curve quads cover their
// entire curves. Could use conservative rasterization
// if available.
vec2 from = from + _block.pathOffset;
vec2 ctrl = ctrl + _block.pathOffset;
vec2 to = to + _block.pathOffset;
float maxy = maxy + _block.pathOffset.y;
vec2 pos;
float c = corner;
if (c >= 0.375) {
// North.
c -= 0.5;
pos.y = maxy + 1.0;
} else {
// South.
pos.y = min(min(from.y, ctrl.y), to.y) - 1.0;
}
if (c >= 0.125) {
// East.
pos.x = max(max(from.x, ctrl.x), to.x)+1.0;
} else {
// West.
pos.x = min(min(from.x, ctrl.x), to.x)-1.0;
}
vFrom = from-pos;
vCtrl = ctrl-pos;
vTo = to-pos;
pos = pos*_block.transform.xy + _block.transform.zw;
gl_Position = vec4(pos, 1, 1);
}
-150
View File
@@ -1,150 +0,0 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// Code auto-generated by piet-gpu-derive
struct PathRef {
uint offset;
};
struct TileRef {
uint offset;
};
struct TileSegRef {
uint offset;
};
struct TransformSegRef {
uint offset;
};
struct Path {
uvec4 bbox;
TileRef tiles;
};
#define Path_size 12
PathRef Path_index(PathRef ref, uint index) {
return PathRef(ref.offset + index * Path_size);
}
struct Tile {
TileSegRef tile;
int backdrop;
};
#define Tile_size 8
TileRef Tile_index(TileRef ref, uint index) {
return TileRef(ref.offset + index * Tile_size);
}
struct TileSeg {
vec2 origin;
vec2 vector;
float y_edge;
TileSegRef next;
};
#define TileSeg_size 24
TileSegRef TileSeg_index(TileSegRef ref, uint index) {
return TileSegRef(ref.offset + index * TileSeg_size);
}
struct TransformSeg {
vec4 mat;
vec2 translate;
};
#define TransformSeg_size 24
TransformSegRef TransformSeg_index(TransformSegRef ref, uint index) {
return TransformSegRef(ref.offset + index * TransformSeg_size);
}
Path Path_read(Alloc a, PathRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
Path s;
s.bbox = uvec4(raw0 & 0xffff, raw0 >> 16, raw1 & 0xffff, raw1 >> 16);
s.tiles = TileRef(raw2);
return s;
}
void Path_write(Alloc a, PathRef ref, Path s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, s.bbox.x | (s.bbox.y << 16));
write_mem(a, ix + 1, s.bbox.z | (s.bbox.w << 16));
write_mem(a, ix + 2, s.tiles.offset);
}
Tile Tile_read(Alloc a, TileRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
Tile s;
s.tile = TileSegRef(raw0);
s.backdrop = int(raw1);
return s;
}
void Tile_write(Alloc a, TileRef ref, Tile s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, s.tile.offset);
write_mem(a, ix + 1, uint(s.backdrop));
}
TileSeg TileSeg_read(Alloc a, TileSegRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
uint raw4 = read_mem(a, ix + 4);
uint raw5 = read_mem(a, ix + 5);
TileSeg s;
s.origin = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.vector = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.y_edge = uintBitsToFloat(raw4);
s.next = TileSegRef(raw5);
return s;
}
void TileSeg_write(Alloc a, TileSegRef ref, TileSeg s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, floatBitsToUint(s.origin.x));
write_mem(a, ix + 1, floatBitsToUint(s.origin.y));
write_mem(a, ix + 2, floatBitsToUint(s.vector.x));
write_mem(a, ix + 3, floatBitsToUint(s.vector.y));
write_mem(a, ix + 4, floatBitsToUint(s.y_edge));
write_mem(a, ix + 5, s.next.offset);
}
TransformSeg TransformSeg_read(Alloc a, TransformSegRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
uint raw4 = read_mem(a, ix + 4);
uint raw5 = read_mem(a, ix + 5);
TransformSeg s;
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
return s;
}
void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, floatBitsToUint(s.mat.x));
write_mem(a, ix + 1, floatBitsToUint(s.mat.y));
write_mem(a, ix + 2, floatBitsToUint(s.mat.z));
write_mem(a, ix + 3, floatBitsToUint(s.mat.w));
write_mem(a, ix + 4, floatBitsToUint(s.translate.x));
write_mem(a, ix + 5, floatBitsToUint(s.translate.y));
}
-104
View File
@@ -1,104 +0,0 @@
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
// Allocation and initialization of tiles for paths.
#version 450
#extension GL_GOOGLE_include_directive : enable
#include "mem.h"
#include "setup.h"
#define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
Config conf;
};
#include "annotated.h"
#include "tile.h"
// scale factors useful for converting coordinates to tiles
#define SX (1.0 / float(TILE_WIDTH_PX))
#define SY (1.0 / float(TILE_HEIGHT_PX))
shared uint sh_tile_count[TILE_ALLOC_WG];
shared MallocResult sh_tile_alloc;
void main() {
uint th_ix = gl_LocalInvocationID.x;
uint element_ix = gl_GlobalInvocationID.x;
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
uint tag = Annotated_Nop;
if (element_ix < conf.n_elements) {
tag = Annotated_tag(conf.anno_alloc, ref).tag;
}
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
switch (tag) {
case Annotated_Color:
case Annotated_Image:
case Annotated_BeginClip:
case Annotated_EndClip:
// Note: we take advantage of the fact that fills, strokes, and
// clips have compatible layout.
AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref);
x0 = int(floor(clip.bbox.x * SX));
y0 = int(floor(clip.bbox.y * SY));
x1 = int(ceil(clip.bbox.z * SX));
y1 = int(ceil(clip.bbox.w * SY));
break;
}
x0 = clamp(x0, 0, int(conf.width_in_tiles));
y0 = clamp(y0, 0, int(conf.height_in_tiles));
x1 = clamp(x1, 0, int(conf.width_in_tiles));
y1 = clamp(y1, 0, int(conf.height_in_tiles));
Path path;
path.bbox = uvec4(x0, y0, x1, y1);
uint tile_count = (x1 - x0) * (y1 - y0);
if (tag == Annotated_EndClip) {
// Don't actually allocate tiles for an end clip, but we do want
// the path structure (especially bbox) allocated for it.
tile_count = 0;
}
sh_tile_count[th_ix] = tile_count;
uint total_tile_count = tile_count;
// Prefix sum of sh_tile_count
for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) {
barrier();
if (th_ix >= (1 << i)) {
total_tile_count += sh_tile_count[th_ix - (1 << i)];
}
barrier();
sh_tile_count[th_ix] = total_tile_count;
}
if (th_ix == TILE_ALLOC_WG - 1) {
sh_tile_alloc = malloc(total_tile_count * Tile_size);
}
barrier();
MallocResult alloc_start = sh_tile_alloc;
if (alloc_start.failed || mem_error != NO_ERROR) {
return;
}
if (element_ix < conf.n_elements) {
uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0;
Alloc tiles_alloc = slice_mem(alloc_start.alloc, Tile_size * tile_subix, Tile_size * tile_count);
path.tiles = TileRef(tiles_alloc.offset);
Path_write(conf.tile_alloc, path_ref, path);
}
// Zero out allocated tiles efficiently
uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4);
uint start_ix = alloc_start.alloc.offset >> 2;
for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) {
// Note: this interleaving is faster than using Tile_write
// by a significant amount.
write_mem(alloc_start.alloc, start_ix + i, 0);
}
}