forked from joejulian/gio
all: switch to external shaders in the gioui.org/shaders module
Signed-off-by: Elias Naur <mail@eliasnaur.com>
This commit is contained in:
@@ -8,4 +8,7 @@ require (
|
||||
golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c
|
||||
)
|
||||
|
||||
require gioui.org/cpu v0.0.0-20210727122813-41509bcd3462
|
||||
require (
|
||||
gioui.org/cpu v0.0.0-20210808092351-bfe733dd3334
|
||||
gioui.org/shader v0.0.0-20210808092941-55e18336189e
|
||||
)
|
||||
|
||||
@@ -1,8 +1,10 @@
|
||||
cloud.google.com/go v0.26.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
|
||||
cloud.google.com/go v0.34.0/go.mod h1:aQUYkXzVsufM+DwF1aE+0xfcU+56JwCaLick0ClmMTw=
|
||||
dmitri.shuralyov.com/gpu/mtl v0.0.0-20201218220906-28db891af037/go.mod h1:H6x//7gZCb22OMCxBHrMx7a5I7Hp++hsVxbQ4BYO7hU=
|
||||
gioui.org/cpu v0.0.0-20210727122813-41509bcd3462 h1:JZyB+d8tPExZHNZwMiGKeeAVd0mkFTc3Zsmegdn178M=
|
||||
gioui.org/cpu v0.0.0-20210727122813-41509bcd3462/go.mod h1:DkhBDuHokSMOUxX5LZQ7IcxyJJzs3OON8Z5ojaXUXxo=
|
||||
gioui.org/cpu v0.0.0-20210808092351-bfe733dd3334 h1:1xK224B5DnjlPKCfVDTl7+olrzgAXn4ym6dum3l34rs=
|
||||
gioui.org/cpu v0.0.0-20210808092351-bfe733dd3334/go.mod h1:A8M0Cn5o+vY5LTMlnRoK3O5kG+rH0kWfJjeKd9QpBmQ=
|
||||
gioui.org/shader v0.0.0-20210808092941-55e18336189e h1:JD4FUQ/appkr/58YHvdKfvHT6BHiGJ2yUDBEAnq0Ugw=
|
||||
gioui.org/shader v0.0.0-20210808092941-55e18336189e/go.mod h1:mWdiME581d/kV7/iEhLmUgUK5iZ09XR5XpduXzbePVM=
|
||||
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
|
||||
github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo=
|
||||
github.com/Knetic/govaluate v3.0.1-0.20171022003610-9aa49832a739+incompatible/go.mod h1:r7JcOSlj0wfOMncg0iLm8Leh48TZaKVeNIfJntJ2wa0=
|
||||
|
||||
+22
-28
@@ -19,6 +19,7 @@ import (
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
"gioui.org/cpu"
|
||||
"gioui.org/f32"
|
||||
"gioui.org/gpu/internal/driver"
|
||||
"gioui.org/internal/byteslice"
|
||||
@@ -29,9 +30,9 @@ import (
|
||||
"gioui.org/layout"
|
||||
"gioui.org/op"
|
||||
"gioui.org/op/clip"
|
||||
|
||||
"gioui.org/cpu"
|
||||
"gioui.org/cpu/piet"
|
||||
"gioui.org/shader"
|
||||
"gioui.org/shader/gio"
|
||||
"gioui.org/shader/piet"
|
||||
)
|
||||
|
||||
type compute struct {
|
||||
@@ -390,29 +391,22 @@ func newCompute(ctx driver.Device) (*compute, error) {
|
||||
}
|
||||
shaders := []struct {
|
||||
prog *computeProgram
|
||||
src driver.ShaderSources
|
||||
src shader.Sources
|
||||
info *cpu.ProgramInfo
|
||||
hash string
|
||||
}{
|
||||
{&g.programs.elements, shader_elements_comp, piet.ElementsProgramInfo, piet.ElementsHash},
|
||||
{&g.programs.tileAlloc, shader_tile_alloc_comp, piet.Tile_allocProgramInfo, piet.Tile_allocHash},
|
||||
{&g.programs.pathCoarse, shader_path_coarse_comp, piet.Path_coarseProgramInfo, piet.Path_coarseHash},
|
||||
{&g.programs.backdrop, shader_backdrop_comp, piet.BackdropProgramInfo, piet.BackdropHash},
|
||||
{&g.programs.binning, shader_binning_comp, piet.BinningProgramInfo, piet.BinningHash},
|
||||
{&g.programs.coarse, shader_coarse_comp, piet.CoarseProgramInfo, piet.CoarseHash},
|
||||
{&g.programs.kernel4, shader_kernel4_comp, piet.Kernel4ProgramInfo, piet.Kernel4Hash},
|
||||
{&g.programs.elements, piet.Shader_elements_comp, piet.ElementsProgramInfo},
|
||||
{&g.programs.tileAlloc, piet.Shader_tile_alloc_comp, piet.Tile_allocProgramInfo},
|
||||
{&g.programs.pathCoarse, piet.Shader_path_coarse_comp, piet.Path_coarseProgramInfo},
|
||||
{&g.programs.backdrop, piet.Shader_backdrop_comp, piet.BackdropProgramInfo},
|
||||
{&g.programs.binning, piet.Shader_binning_comp, piet.BinningProgramInfo},
|
||||
{&g.programs.coarse, piet.Shader_coarse_comp, piet.CoarseProgramInfo},
|
||||
{&g.programs.kernel4, piet.Shader_kernel4_comp, piet.Kernel4ProgramInfo},
|
||||
}
|
||||
if !caps.Features.Has(driver.FeatureCompute) {
|
||||
g.useCPU = supportsCPUCompute
|
||||
for _, s := range shaders {
|
||||
if s.src.Hash != s.hash {
|
||||
g.useCPU = false
|
||||
break
|
||||
}
|
||||
}
|
||||
if !g.useCPU {
|
||||
if !supportsCPUCompute {
|
||||
return nil, errors.New("gpu: missing support for compute programs")
|
||||
}
|
||||
g.useCPU = true
|
||||
}
|
||||
if g.useCPU {
|
||||
g.dispatcher = newDispatcher(runtime.NumCPU())
|
||||
@@ -420,15 +414,15 @@ func newCompute(ctx driver.Device) (*compute, error) {
|
||||
|
||||
// Large enough for reasonable fill sizes, yet still spannable by the compute programs.
|
||||
g.output.packer.maxDim = 4096
|
||||
blitProg, err := ctx.NewProgram(shader_copy_vert, shader_copy_frag)
|
||||
blitProg, err := ctx.NewProgram(gio.Shader_copy_vert, gio.Shader_copy_frag)
|
||||
if err != nil {
|
||||
g.Release()
|
||||
return nil, err
|
||||
}
|
||||
g.output.blitProg = blitProg
|
||||
progLayout, err := ctx.NewInputLayout(shader_copy_vert, []driver.InputDesc{
|
||||
{Type: driver.DataTypeFloat, Size: 2, Offset: 0},
|
||||
{Type: driver.DataTypeFloat, Size: 2, Offset: 4 * 2},
|
||||
progLayout, err := ctx.NewInputLayout(gio.Shader_copy_vert, []shader.InputDesc{
|
||||
{Type: shader.DataTypeFloat, Size: 2, Offset: 0},
|
||||
{Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2},
|
||||
})
|
||||
if err != nil {
|
||||
g.Release()
|
||||
@@ -445,15 +439,15 @@ func newCompute(ctx driver.Device) (*compute, error) {
|
||||
g.output.uniBuf = buf
|
||||
g.output.blitProg.SetVertexUniforms(buf)
|
||||
|
||||
materialProg, err := ctx.NewProgram(shader_material_vert, shader_material_frag)
|
||||
materialProg, err := ctx.NewProgram(gio.Shader_material_vert, gio.Shader_material_frag)
|
||||
if err != nil {
|
||||
g.Release()
|
||||
return nil, err
|
||||
}
|
||||
g.materials.prog = materialProg
|
||||
progLayout, err = ctx.NewInputLayout(shader_material_vert, []driver.InputDesc{
|
||||
{Type: driver.DataTypeFloat, Size: 2, Offset: 0},
|
||||
{Type: driver.DataTypeFloat, Size: 2, Offset: 4 * 2},
|
||||
progLayout, err = ctx.NewInputLayout(gio.Shader_material_vert, []shader.InputDesc{
|
||||
{Type: shader.DataTypeFloat, Size: 2, Offset: 0},
|
||||
{Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2},
|
||||
})
|
||||
if err != nil {
|
||||
g.Release()
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
package gpu
|
||||
|
||||
//go:generate go run ./internal/convertshaders -package gpu
|
||||
+33
-5
@@ -9,12 +9,14 @@ package gpu
|
||||
|
||||
import (
|
||||
"encoding/binary"
|
||||
"errors"
|
||||
"fmt"
|
||||
"image"
|
||||
"image/color"
|
||||
"math"
|
||||
"os"
|
||||
"reflect"
|
||||
"runtime/debug"
|
||||
"time"
|
||||
"unsafe"
|
||||
|
||||
@@ -29,6 +31,8 @@ import (
|
||||
"gioui.org/layout"
|
||||
"gioui.org/op"
|
||||
"gioui.org/op/clip"
|
||||
"gioui.org/shader"
|
||||
"gioui.org/shader/gio"
|
||||
|
||||
// Register backends.
|
||||
_ "gioui.org/gpu/internal/d3d11"
|
||||
@@ -129,6 +133,10 @@ type imageOp struct {
|
||||
place placement
|
||||
}
|
||||
|
||||
// shaderModuleVersion is the exact version of gioui.org/shader expected by
|
||||
// this package. Shader programs are not backwards or forwards compatible.
|
||||
const shaderModuleVersion = "v0.0.0-20210808092941-55e18336189e"
|
||||
|
||||
func decodeStrokeOp(data []byte) clip.StrokeStyle {
|
||||
_ = data[4]
|
||||
if opconst.OpType(data[0]) != opconst.TypeStroke {
|
||||
@@ -350,6 +358,9 @@ const (
|
||||
)
|
||||
|
||||
func New(api API) (GPU, error) {
|
||||
if err := verifyShaderModule(); err != nil {
|
||||
return nil, err
|
||||
}
|
||||
d, err := driver.NewDevice(api)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
@@ -376,6 +387,23 @@ func newGPU(ctx driver.Device) (*gpu, error) {
|
||||
return g, nil
|
||||
}
|
||||
|
||||
func verifyShaderModule() error {
|
||||
mod, ok := debug.ReadBuildInfo()
|
||||
if !ok {
|
||||
// No module support; hopefully the version matches.
|
||||
return nil
|
||||
}
|
||||
for _, m := range mod.Deps {
|
||||
if m.Path == "gioui.org/shader" {
|
||||
if got := m.Version; got != shaderModuleVersion {
|
||||
return fmt.Errorf("gpu: module gioui.org/shader is version %q, expected %q", got, shaderModuleVersion)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
}
|
||||
return errors.New("gpu: module version for gioui.org/shader not found")
|
||||
}
|
||||
|
||||
func (g *gpu) init(ctx driver.Device) error {
|
||||
g.ctx = ctx
|
||||
g.renderer = newRenderer(ctx)
|
||||
@@ -530,7 +558,7 @@ func newBlitter(ctx driver.Device) *blitter {
|
||||
b.colUniforms = new(blitColUniforms)
|
||||
b.texUniforms = new(blitTexUniforms)
|
||||
b.linearGradientUniforms = new(blitLinearGradientUniforms)
|
||||
prog, layout, err := createColorPrograms(ctx, shader_blit_vert, shader_blit_frag,
|
||||
prog, layout, err := createColorPrograms(ctx, gio.Shader_blit_vert, gio.Shader_blit_frag,
|
||||
[3]interface{}{&b.colUniforms.vert, &b.linearGradientUniforms.vert, &b.texUniforms.vert},
|
||||
[3]interface{}{&b.colUniforms.frag, &b.linearGradientUniforms.frag, nil},
|
||||
)
|
||||
@@ -550,7 +578,7 @@ func (b *blitter) release() {
|
||||
b.layout.Release()
|
||||
}
|
||||
|
||||
func createColorPrograms(b driver.Device, vsSrc driver.ShaderSources, fsSrc [3]driver.ShaderSources, vertUniforms, fragUniforms [3]interface{}) ([3]*program, driver.InputLayout, error) {
|
||||
func createColorPrograms(b driver.Device, vsSrc shader.Sources, fsSrc [3]shader.Sources, vertUniforms, fragUniforms [3]interface{}) ([3]*program, driver.InputLayout, error) {
|
||||
var progs [3]*program
|
||||
{
|
||||
prog, err := b.NewProgram(vsSrc, fsSrc[materialTexture])
|
||||
@@ -603,9 +631,9 @@ func createColorPrograms(b driver.Device, vsSrc driver.ShaderSources, fsSrc [3]d
|
||||
}
|
||||
progs[materialLinearGradient] = newProgram(prog, vertBuffer, fragBuffer)
|
||||
}
|
||||
layout, err := b.NewInputLayout(vsSrc, []driver.InputDesc{
|
||||
{Type: driver.DataTypeFloat, Size: 2, Offset: 0},
|
||||
{Type: driver.DataTypeFloat, Size: 2, Offset: 4 * 2},
|
||||
layout, err := b.NewInputLayout(vsSrc, []shader.InputDesc{
|
||||
{Type: shader.DataTypeFloat, Size: 2, Offset: 0},
|
||||
{Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2},
|
||||
})
|
||||
if err != nil {
|
||||
progs[materialTexture].Release()
|
||||
|
||||
@@ -15,6 +15,8 @@ import (
|
||||
"gioui.org/gpu/internal/driver"
|
||||
"gioui.org/internal/byteslice"
|
||||
"gioui.org/internal/f32color"
|
||||
"gioui.org/shader"
|
||||
"gioui.org/shader/gio"
|
||||
)
|
||||
|
||||
var dumpImages = flag.Bool("saveimages", false, "save test images")
|
||||
@@ -36,7 +38,7 @@ func TestSimpleShader(t *testing.T) {
|
||||
b := newDriver(t)
|
||||
sz := image.Point{X: 800, Y: 600}
|
||||
fbo := setupFBO(t, b, sz)
|
||||
p, err := b.NewProgram(shader_simple_vert, shader_simple_frag)
|
||||
p, err := b.NewProgram(gio.Shader_simple_vert, gio.Shader_simple_frag)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -59,7 +61,7 @@ func TestInputShader(t *testing.T) {
|
||||
b := newDriver(t)
|
||||
sz := image.Point{X: 800, Y: 600}
|
||||
fbo := setupFBO(t, b, sz)
|
||||
p, err := b.NewProgram(shader_input_vert, shader_simple_frag)
|
||||
p, err := b.NewProgram(gio.Shader_input_vert, gio.Shader_simple_frag)
|
||||
if err != nil {
|
||||
t.Fatal(err)
|
||||
}
|
||||
@@ -77,9 +79,9 @@ func TestInputShader(t *testing.T) {
|
||||
}
|
||||
defer buf.Release()
|
||||
b.BindVertexBuffer(buf, 4*4, 0)
|
||||
layout, err := b.NewInputLayout(shader_input_vert, []driver.InputDesc{
|
||||
layout, err := b.NewInputLayout(gio.Shader_input_vert, []shader.InputDesc{
|
||||
{
|
||||
Type: driver.DataTypeFloat,
|
||||
Type: shader.DataTypeFloat,
|
||||
Size: 4,
|
||||
Offset: 0,
|
||||
},
|
||||
|
||||
@@ -1,5 +0,0 @@
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
package headless
|
||||
|
||||
//go:generate go run ../internal/convertshaders -package headless
|
||||
@@ -1,233 +0,0 @@
|
||||
// Code generated by build.go. DO NOT EDIT.
|
||||
|
||||
package headless
|
||||
|
||||
import "gioui.org/gpu/internal/driver"
|
||||
|
||||
var (
|
||||
shader_input_vert = driver.ShaderSources{
|
||||
Name: "input.vert",
|
||||
Inputs: []driver.InputLocation{{Name: "position", Location: 0, Semantic: "TEXCOORD", SemanticIndex: 0, Type: 0x0, Size: 4}},
|
||||
GLSL100ES: `#version 100
|
||||
|
||||
attribute vec4 position;
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_Position = position;
|
||||
}
|
||||
|
||||
`,
|
||||
GLSL300ES: `#version 300 es
|
||||
|
||||
layout(location = 0) in vec4 position;
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_Position = position;
|
||||
}
|
||||
|
||||
`,
|
||||
GLSL130: `#version 130
|
||||
#ifdef GL_ARB_shading_language_420pack
|
||||
#extension GL_ARB_shading_language_420pack : require
|
||||
#endif
|
||||
|
||||
in vec4 position;
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_Position = position;
|
||||
}
|
||||
|
||||
`,
|
||||
GLSL150: `#version 150
|
||||
#ifdef GL_ARB_shading_language_420pack
|
||||
#extension GL_ARB_shading_language_420pack : require
|
||||
#endif
|
||||
|
||||
in vec4 position;
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_Position = position;
|
||||
}
|
||||
|
||||
`,
|
||||
HLSL: "DXBC\x1e»\x11\xd3iX7\xd4F\xb9\xa4\xf4R\xf9J\x01\x00\x00\x00\x10\x02\x00\x00\x06\x00\x00\x008\x00\x00\x00\x9c\x00\x00\x00\xe0\x00\x00\x00\\\x01\x00\x00\xa8\x01\x00\x00\xdc\x01\x00\x00Aon9\\\x00\x00\x00\\\x00\x00\x00\x00\x02\xfe\xff4\x00\x00\x00(\x00\x00\x00\x00\x00$\x00\x00\x00$\x00\x00\x00$\x00\x00\x00$\x00\x01\x00$\x00\x00\x00\x00\x00\x00\x02\xfe\xff\x1f\x00\x00\x02\x05\x00\x00\x80\x00\x00\x0f\x90\x04\x00\x00\x04\x00\x00\x03\xc0\x00\x00\xff\x90\x00\x00\xe4\xa0\x00\x00\xe4\x90\x01\x00\x00\x02\x00\x00\f\xc0\x00\x00\xe4\x90\xff\xff\x00\x00SHDR<\x00\x00\x00@\x00\x01\x00\x0f\x00\x00\x00_\x00\x00\x03\xf2\x10\x10\x00\x00\x00\x00\x00g\x00\x00\x04\xf2 \x10\x00\x00\x00\x00\x00\x01\x00\x00\x006\x00\x00\x05\xf2 \x10\x00\x00\x00\x00\x00F\x1e\x10\x00\x00\x00\x00\x00>\x00\x00\x01STATt\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00RDEFD\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1c\x00\x00\x00\x00\x04\xfe\xff\x00\x01\x00\x00\x1c\x00\x00\x00Microsoft (R) HLSL Shader Compiler 10.1\x00ISGN,\x00\x00\x00\x01\x00\x00\x00\b\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x0f\x0f\x00\x00TEXCOORD\x00\xab\xab\xabOSGN,\x00\x00\x00\x01\x00\x00\x00\b\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00SV_Position\x00",
|
||||
}
|
||||
shader_simple_frag = driver.ShaderSources{
|
||||
Name: "simple.frag",
|
||||
GLSL100ES: `#version 100
|
||||
precision mediump float;
|
||||
precision highp int;
|
||||
|
||||
void main()
|
||||
{
|
||||
gl_FragData[0] = vec4(0.25, 0.550000011920928955078125, 0.75, 1.0);
|
||||
}
|
||||
|
||||
`,
|
||||
GLSL300ES: `#version 300 es
|
||||
precision mediump float;
|
||||
precision highp int;
|
||||
|
||||
layout(location = 0) out vec4 fragColor;
|
||||
|
||||
void main()
|
||||
{
|
||||
fragColor = vec4(0.25, 0.550000011920928955078125, 0.75, 1.0);
|
||||
}
|
||||
|
||||
`,
|
||||
GLSL130: `#version 130
|
||||
#ifdef GL_ARB_shading_language_420pack
|
||||
#extension GL_ARB_shading_language_420pack : require
|
||||
#endif
|
||||
|
||||
out vec4 fragColor;
|
||||
|
||||
void main()
|
||||
{
|
||||
fragColor = vec4(0.25, 0.550000011920928955078125, 0.75, 1.0);
|
||||
}
|
||||
|
||||
`,
|
||||
GLSL150: `#version 150
|
||||
#ifdef GL_ARB_shading_language_420pack
|
||||
#extension GL_ARB_shading_language_420pack : require
|
||||
#endif
|
||||
|
||||
out vec4 fragColor;
|
||||
|
||||
void main()
|
||||
{
|
||||
fragColor = vec4(0.25, 0.550000011920928955078125, 0.75, 1.0);
|
||||
}
|
||||
|
||||
`,
|
||||
HLSL: "DXBC\xf5F\xdef$)\xa8\xbbV\xeas\xb5ks\x12r\x01\x00\x00\x00\xdc\x01\x00\x00\x06\x00\x00\x008\x00\x00\x00\x90\x00\x00\x00\xd0\x00\x00\x00L\x01\x00\x00\x98\x01\x00\x00\xa8\x01\x00\x00Aon9P\x00\x00\x00P\x00\x00\x00\x00\x02\xff\xff,\x00\x00\x00$\x00\x00\x00\x00\x00$\x00\x00\x00$\x00\x00\x00$\x00\x00\x00$\x00\x00\x00$\x00\x00\x02\xff\xffQ\x00\x00\x05\x00\x00\x0f\xa0\x00\x00\x80>\xcd\xcc\f?\x00\x00@?\x00\x00\x80?\x01\x00\x00\x02\x00\b\x0f\x80\x00\x00\xe4\xa0\xff\xff\x00\x00SHDR8\x00\x00\x00@\x00\x00\x00\x0e\x00\x00\x00e\x00\x00\x03\xf2 \x10\x00\x00\x00\x00\x006\x00\x00\b\xf2 \x10\x00\x00\x00\x00\x00\x02@\x00\x00\x00\x00\x80>\xcd\xcc\f?\x00\x00@?\x00\x00\x80?>\x00\x00\x01STATt\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00RDEFD\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1c\x00\x00\x00\x00\x04\xff\xff\x00\x01\x00\x00\x1c\x00\x00\x00Microsoft (R) HLSL Shader Compiler 10.1\x00ISGN\b\x00\x00\x00\x00\x00\x00\x00\b\x00\x00\x00OSGN,\x00\x00\x00\x01\x00\x00\x00\b\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00SV_Target\x00\xab\xab",
|
||||
}
|
||||
shader_simple_vert = driver.ShaderSources{
|
||||
Name: "simple.vert",
|
||||
GLSL100ES: `#version 100
|
||||
|
||||
void main()
|
||||
{
|
||||
float x;
|
||||
float y;
|
||||
if (gl_VertexID == 0)
|
||||
{
|
||||
x = 0.0;
|
||||
y = 0.5;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (gl_VertexID == 1)
|
||||
{
|
||||
x = 0.5;
|
||||
y = -0.5;
|
||||
}
|
||||
else
|
||||
{
|
||||
x = -0.5;
|
||||
y = -0.5;
|
||||
}
|
||||
}
|
||||
gl_Position = vec4(x, y, 0.5, 1.0);
|
||||
}
|
||||
|
||||
`,
|
||||
GLSL300ES: `#version 300 es
|
||||
|
||||
void main()
|
||||
{
|
||||
float x;
|
||||
float y;
|
||||
if (gl_VertexID == 0)
|
||||
{
|
||||
x = 0.0;
|
||||
y = 0.5;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (gl_VertexID == 1)
|
||||
{
|
||||
x = 0.5;
|
||||
y = -0.5;
|
||||
}
|
||||
else
|
||||
{
|
||||
x = -0.5;
|
||||
y = -0.5;
|
||||
}
|
||||
}
|
||||
gl_Position = vec4(x, y, 0.5, 1.0);
|
||||
}
|
||||
|
||||
`,
|
||||
GLSL130: `#version 130
|
||||
#ifdef GL_ARB_shading_language_420pack
|
||||
#extension GL_ARB_shading_language_420pack : require
|
||||
#endif
|
||||
|
||||
void main()
|
||||
{
|
||||
float x;
|
||||
float y;
|
||||
if (gl_VertexID == 0)
|
||||
{
|
||||
x = 0.0;
|
||||
y = 0.5;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (gl_VertexID == 1)
|
||||
{
|
||||
x = 0.5;
|
||||
y = -0.5;
|
||||
}
|
||||
else
|
||||
{
|
||||
x = -0.5;
|
||||
y = -0.5;
|
||||
}
|
||||
}
|
||||
gl_Position = vec4(x, y, 0.5, 1.0);
|
||||
}
|
||||
|
||||
`,
|
||||
GLSL150: `#version 150
|
||||
#ifdef GL_ARB_shading_language_420pack
|
||||
#extension GL_ARB_shading_language_420pack : require
|
||||
#endif
|
||||
|
||||
void main()
|
||||
{
|
||||
float x;
|
||||
float y;
|
||||
if (gl_VertexID == 0)
|
||||
{
|
||||
x = 0.0;
|
||||
y = 0.5;
|
||||
}
|
||||
else
|
||||
{
|
||||
if (gl_VertexID == 1)
|
||||
{
|
||||
x = 0.5;
|
||||
y = -0.5;
|
||||
}
|
||||
else
|
||||
{
|
||||
x = -0.5;
|
||||
y = -0.5;
|
||||
}
|
||||
}
|
||||
gl_Position = vec4(x, y, 0.5, 1.0);
|
||||
}
|
||||
|
||||
`,
|
||||
HLSL: "DXBC\xc8 \\\"\xec\xe9\xb2)@\xdf|Z(\xea\f\xb8\x01\x00\x00\x00H\x02\x00\x00\x05\x00\x00\x004\x00\x00\x00\x80\x00\x00\x00\xb4\x00\x00\x00\xe8\x00\x00\x00\xcc\x01\x00\x00RDEFD\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x1c\x00\x00\x00\x00\x04\xfe\xff\x00\x01\x00\x00\x1c\x00\x00\x00Microsoft (R) HLSL Shader Compiler 10.1\x00ISGN,\x00\x00\x00\x01\x00\x00\x00\b\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x06\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x01\x01\x00\x00SV_VertexID\x00OSGN,\x00\x00\x00\x01\x00\x00\x00\b\x00\x00\x00 \x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x03\x00\x00\x00\x00\x00\x00\x00\x0f\x00\x00\x00SV_Position\x00SHDR\xdc\x00\x00\x00@\x00\x01\x007\x00\x00\x00`\x00\x00\x04\x12\x10\x10\x00\x00\x00\x00\x00\x06\x00\x00\x00g\x00\x00\x04\xf2 \x10\x00\x00\x00\x00\x00\x01\x00\x00\x00h\x00\x00\x02\x01\x00\x00\x00 \x00\x00\a\x12\x00\x10\x00\x00\x00\x00\x00\n\x10\x10\x00\x00\x00\x00\x00\x01@\x00\x00\x01\x00\x00\x007\x00\x00\x0f2\x00\x10\x00\x00\x00\x00\x00\x06\x00\x10\x00\x00\x00\x00\x00\x02@\x00\x00\x00\x00\x00?\x00\x00\x00\xbf\x00\x00\x00\x00\x00\x00\x00\x00\x02@\x00\x00\x00\x00\x00\xbf\x00\x00\x00\xbf\x00\x00\x00\x00\x00\x00\x00\x007\x00\x00\f2 \x10\x00\x00\x00\x00\x00\x06\x10\x10\x00\x00\x00\x00\x00F\x00\x10\x00\x00\x00\x00\x00\x02@\x00\x00\x00\x00\x00\x00\x00\x00\x00?\x00\x00\x00\x00\x00\x00\x00\x006\x00\x00\b\xc2 \x10\x00\x00\x00\x00\x00\x02@\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00?\x00\x00\x80?>\x00\x00\x01STATt\x00\x00\x00\x05\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x01\x00\x00\x00\x02\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00\x00",
|
||||
}
|
||||
)
|
||||
@@ -1,11 +0,0 @@
|
||||
#version 310 es
|
||||
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
precision highp float;
|
||||
|
||||
layout(location=0) in vec4 position;
|
||||
|
||||
void main() {
|
||||
gl_Position = position;
|
||||
}
|
||||
@@ -1,11 +0,0 @@
|
||||
#version 310 es
|
||||
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
precision mediump float;
|
||||
|
||||
layout(location = 0) out vec4 fragColor;
|
||||
|
||||
void main() {
|
||||
fragColor = vec4(.25, .55, .75, 1.0);
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
#version 310 es
|
||||
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
precision highp float;
|
||||
|
||||
void main() {
|
||||
float x, y;
|
||||
if (gl_VertexIndex == 0) {
|
||||
x = 0.0;
|
||||
y = .5;
|
||||
} else if (gl_VertexIndex == 1) {
|
||||
x = .5;
|
||||
y = -.5;
|
||||
} else {
|
||||
x = -.5;
|
||||
y = -.5;
|
||||
}
|
||||
gl_Position = vec4(x, y, 0.5, 1.0);
|
||||
}
|
||||
@@ -1,50 +0,0 @@
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
)
|
||||
|
||||
// GLSLValidator is OpenGL reference compiler.
|
||||
type GLSLValidator struct {
|
||||
Bin string
|
||||
WorkDir WorkDir
|
||||
}
|
||||
|
||||
func NewGLSLValidator() *GLSLValidator { return &GLSLValidator{Bin: "glslangValidator"} }
|
||||
|
||||
// Convert converts a glsl shader to spirv.
|
||||
func (glsl *GLSLValidator) Convert(path, variant string, hlsl bool, input []byte) ([]byte, error) {
|
||||
base := glsl.WorkDir.Path(filepath.Base(path), variant)
|
||||
pathout := base + ".out"
|
||||
|
||||
cmd := exec.Command(glsl.Bin,
|
||||
"--stdin",
|
||||
"-I"+filepath.Dir(path),
|
||||
"-V", // OpenGL ES 3.1.
|
||||
"-w", // Suppress warnings.
|
||||
"-S", filepath.Ext(path)[1:],
|
||||
"-o", pathout,
|
||||
)
|
||||
if hlsl {
|
||||
cmd.Args = append(cmd.Args, "-DHLSL")
|
||||
}
|
||||
cmd.Stdin = bytes.NewBuffer(input)
|
||||
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%s\nfailed to run %v: %w", out, cmd.Args, err)
|
||||
}
|
||||
|
||||
compiled, err := ioutil.ReadFile(pathout)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("unable to read output %q: %w", pathout, err)
|
||||
}
|
||||
|
||||
return compiled, nil
|
||||
}
|
||||
@@ -1,146 +0,0 @@
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"runtime"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// FXC is hlsl compiler that targets ShaderModel 5.x and lower.
|
||||
type FXC struct {
|
||||
Bin string
|
||||
WorkDir WorkDir
|
||||
}
|
||||
|
||||
func NewFXC() *FXC { return &FXC{Bin: "fxc.exe"} }
|
||||
|
||||
// Compile compiles the input shader.
|
||||
func (fxc *FXC) Compile(path, variant string, input []byte, entryPoint string, profileVersion string) (string, error) {
|
||||
base := fxc.WorkDir.Path(filepath.Base(path), variant, profileVersion)
|
||||
pathin := base + ".in"
|
||||
pathout := base + ".out"
|
||||
result := pathout
|
||||
|
||||
if err := fxc.WorkDir.WriteFile(pathin, input); err != nil {
|
||||
return "", fmt.Errorf("unable to write shader to disk: %w", err)
|
||||
}
|
||||
|
||||
cmd := exec.Command(fxc.Bin)
|
||||
if runtime.GOOS != "windows" {
|
||||
cmd = exec.Command("wine", fxc.Bin)
|
||||
if err := winepath(&pathin, &pathout); err != nil {
|
||||
return "", err
|
||||
}
|
||||
}
|
||||
|
||||
var profile string
|
||||
switch filepath.Ext(path) {
|
||||
case ".frag":
|
||||
profile = "ps_" + profileVersion
|
||||
case ".vert":
|
||||
profile = "vs_" + profileVersion
|
||||
case ".comp":
|
||||
profile = "cs_" + profileVersion
|
||||
default:
|
||||
return "", fmt.Errorf("unrecognized shader type %s", path)
|
||||
}
|
||||
|
||||
cmd.Args = append(cmd.Args,
|
||||
"/Fo", pathout,
|
||||
"/T", profile,
|
||||
"/E", entryPoint,
|
||||
pathin,
|
||||
)
|
||||
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
info := ""
|
||||
if runtime.GOOS != "windows" {
|
||||
info = "If the fxc tool cannot be found, set WINEPATH to the Windows path for the Windows SDK.\n"
|
||||
}
|
||||
return "", fmt.Errorf("%s\n%sfailed to run %v: %w", output, info, cmd.Args, err)
|
||||
}
|
||||
|
||||
compiled, err := ioutil.ReadFile(result)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to read output %q: %w", pathout, err)
|
||||
}
|
||||
|
||||
return string(compiled), nil
|
||||
}
|
||||
|
||||
// DXC is hlsl compiler that targets ShaderModel 6.0 and newer.
|
||||
type DXC struct {
|
||||
Bin string
|
||||
WorkDir WorkDir
|
||||
}
|
||||
|
||||
func NewDXC() *DXC { return &DXC{Bin: "dxc"} }
|
||||
|
||||
// Compile compiles the input shader.
|
||||
func (dxc *DXC) Compile(path, variant string, input []byte, entryPoint string, profile string) (string, error) {
|
||||
base := dxc.WorkDir.Path(filepath.Base(path), variant, profile)
|
||||
pathin := base + ".in"
|
||||
pathout := base + ".out"
|
||||
result := pathout
|
||||
|
||||
if err := dxc.WorkDir.WriteFile(pathin, input); err != nil {
|
||||
return "", fmt.Errorf("unable to write shader to disk: %w", err)
|
||||
}
|
||||
|
||||
cmd := exec.Command(dxc.Bin)
|
||||
|
||||
cmd.Args = append(cmd.Args,
|
||||
"-Fo", pathout,
|
||||
"-T", profile,
|
||||
"-E", entryPoint,
|
||||
"-Qstrip_reflect",
|
||||
pathin,
|
||||
)
|
||||
|
||||
output, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("%s\nfailed to run %v: %w", output, cmd.Args, err)
|
||||
}
|
||||
|
||||
compiled, err := ioutil.ReadFile(result)
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("unable to read output %q: %w", pathout, err)
|
||||
}
|
||||
|
||||
return string(compiled), nil
|
||||
}
|
||||
|
||||
// winepath uses the winepath tool to convert a paths to Windows format.
|
||||
// The returned path can be used as arguments for Windows command line tools.
|
||||
func winepath(paths ...*string) error {
|
||||
winepath := exec.Command("winepath", "--windows")
|
||||
for _, path := range paths {
|
||||
winepath.Args = append(winepath.Args, *path)
|
||||
}
|
||||
// Use a pipe instead of Output, because winepath may have left wineserver
|
||||
// running for several seconds as a grandchild.
|
||||
out, err := winepath.StdoutPipe()
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to start winepath: %w", err)
|
||||
}
|
||||
if err := winepath.Start(); err != nil {
|
||||
return fmt.Errorf("unable to start winepath: %w", err)
|
||||
}
|
||||
var buf bytes.Buffer
|
||||
if _, err := io.Copy(&buf, out); err != nil {
|
||||
return fmt.Errorf("unable to run winepath: %w", err)
|
||||
}
|
||||
winPaths := strings.Split(strings.TrimSpace(buf.String()), "\n")
|
||||
for i, path := range paths {
|
||||
*path = winPaths[i]
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -1,418 +0,0 @@
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"bytes"
|
||||
"crypto/sha256"
|
||||
"encoding/hex"
|
||||
"errors"
|
||||
"flag"
|
||||
"fmt"
|
||||
"io"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"text/template"
|
||||
|
||||
"gioui.org/gpu/internal/driver"
|
||||
)
|
||||
|
||||
func main() {
|
||||
packageName := flag.String("package", "", "specify Go package name")
|
||||
workdir := flag.String("work", "", "temporary working directory (default TEMP)")
|
||||
shadersDir := flag.String("dir", "shaders", "shaders directory")
|
||||
directCompute := flag.Bool("directcompute", false, "enable compiling DirectCompute shaders")
|
||||
|
||||
flag.Parse()
|
||||
|
||||
var work WorkDir
|
||||
cleanup := func() {}
|
||||
if *workdir == "" {
|
||||
tempdir, err := ioutil.TempDir("", "shader-convert")
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "failed to create tempdir: %v\n", err)
|
||||
os.Exit(1)
|
||||
}
|
||||
cleanup = func() { os.RemoveAll(tempdir) }
|
||||
defer cleanup()
|
||||
|
||||
work = WorkDir(tempdir)
|
||||
} else {
|
||||
if abs, err := filepath.Abs(*workdir); err == nil {
|
||||
*workdir = abs
|
||||
}
|
||||
work = WorkDir(*workdir)
|
||||
}
|
||||
|
||||
var out bytes.Buffer
|
||||
conv := NewConverter(work, *packageName, *shadersDir, *directCompute)
|
||||
if err := conv.Run(&out); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "%v\n", err)
|
||||
cleanup()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
if err := ioutil.WriteFile("shaders.go", out.Bytes(), 0644); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "failed to create shaders: %v\n", err)
|
||||
cleanup()
|
||||
os.Exit(1)
|
||||
}
|
||||
|
||||
cmd := exec.Command("gofmt", "-s", "-w", "shaders.go")
|
||||
cmd.Stdout, cmd.Stderr = os.Stdout, os.Stderr
|
||||
if err := cmd.Run(); err != nil {
|
||||
fmt.Fprintf(os.Stderr, "formatting shaders.go failed: %v\n", err)
|
||||
cleanup()
|
||||
os.Exit(1)
|
||||
}
|
||||
}
|
||||
|
||||
type Converter struct {
|
||||
workDir WorkDir
|
||||
shadersDir string
|
||||
directCompute bool
|
||||
|
||||
packageName string
|
||||
|
||||
glslvalidator *GLSLValidator
|
||||
spirv *SPIRVCross
|
||||
fxc *FXC
|
||||
}
|
||||
|
||||
func NewConverter(workDir WorkDir, packageName, shadersDir string, directCompute bool) *Converter {
|
||||
if abs, err := filepath.Abs(shadersDir); err == nil {
|
||||
shadersDir = abs
|
||||
}
|
||||
|
||||
conv := &Converter{}
|
||||
conv.workDir = workDir
|
||||
conv.shadersDir = shadersDir
|
||||
conv.directCompute = directCompute
|
||||
|
||||
conv.packageName = packageName
|
||||
|
||||
conv.glslvalidator = NewGLSLValidator()
|
||||
conv.spirv = NewSPIRVCross()
|
||||
conv.fxc = NewFXC()
|
||||
|
||||
verifyBinaryPath(&conv.glslvalidator.Bin)
|
||||
verifyBinaryPath(&conv.spirv.Bin)
|
||||
// We cannot check fxc since it may depend on wine.
|
||||
|
||||
conv.glslvalidator.WorkDir = workDir.Dir("glslvalidator")
|
||||
conv.fxc.WorkDir = workDir.Dir("fxc")
|
||||
conv.spirv.WorkDir = workDir.Dir("spirv")
|
||||
|
||||
return conv
|
||||
}
|
||||
|
||||
func verifyBinaryPath(bin *string) {
|
||||
new, err := exec.LookPath(*bin)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "unable to find %q: %v\n", *bin, err)
|
||||
} else {
|
||||
*bin = new
|
||||
}
|
||||
}
|
||||
|
||||
func (conv *Converter) Run(out io.Writer) error {
|
||||
shaders, err := filepath.Glob(filepath.Join(conv.shadersDir, "*"))
|
||||
if len(shaders) == 0 || err != nil {
|
||||
return fmt.Errorf("failed to list shaders in %q: %w", conv.shadersDir, err)
|
||||
}
|
||||
|
||||
sort.Strings(shaders)
|
||||
|
||||
var workers Workers
|
||||
|
||||
type ShaderResult struct {
|
||||
Path string
|
||||
Shaders []driver.ShaderSources
|
||||
Error error
|
||||
}
|
||||
shaderResults := make([]ShaderResult, len(shaders))
|
||||
|
||||
for i, shaderPath := range shaders {
|
||||
i, shaderPath := i, shaderPath
|
||||
|
||||
switch filepath.Ext(shaderPath) {
|
||||
case ".vert", ".frag":
|
||||
workers.Go(func() {
|
||||
shaders, err := conv.Shader(shaderPath)
|
||||
shaderResults[i] = ShaderResult{
|
||||
Path: shaderPath,
|
||||
Shaders: shaders,
|
||||
Error: err,
|
||||
}
|
||||
})
|
||||
case ".comp":
|
||||
workers.Go(func() {
|
||||
shaders, err := conv.ComputeShader(shaderPath)
|
||||
shaderResults[i] = ShaderResult{
|
||||
Path: shaderPath,
|
||||
Shaders: shaders,
|
||||
Error: err,
|
||||
}
|
||||
})
|
||||
default:
|
||||
continue
|
||||
}
|
||||
}
|
||||
|
||||
workers.Wait()
|
||||
|
||||
var allErrors string
|
||||
for _, r := range shaderResults {
|
||||
if r.Error != nil {
|
||||
if len(allErrors) > 0 {
|
||||
allErrors += "\n\n"
|
||||
}
|
||||
allErrors += "--- " + r.Path + " --- \n\n" + r.Error.Error() + "\n"
|
||||
}
|
||||
}
|
||||
if len(allErrors) > 0 {
|
||||
return errors.New(allErrors)
|
||||
}
|
||||
|
||||
fmt.Fprintf(out, "// Code generated by build.go. DO NOT EDIT.\n\n")
|
||||
fmt.Fprintf(out, "package %s\n\n", conv.packageName)
|
||||
fmt.Fprintf(out, "import %q\n\n", "gioui.org/gpu/internal/driver")
|
||||
|
||||
fmt.Fprintf(out, "var (\n")
|
||||
|
||||
for _, r := range shaderResults {
|
||||
if len(r.Shaders) == 0 {
|
||||
continue
|
||||
}
|
||||
|
||||
name := filepath.Base(r.Path)
|
||||
name = strings.ReplaceAll(name, ".", "_")
|
||||
fmt.Fprintf(out, "\tshader_%s = ", name)
|
||||
|
||||
multiVariant := len(r.Shaders) > 1
|
||||
if multiVariant {
|
||||
fmt.Fprintf(out, "[...]driver.ShaderSources{\n")
|
||||
}
|
||||
|
||||
for _, src := range r.Shaders {
|
||||
fmt.Fprintf(out, "driver.ShaderSources{\n")
|
||||
fmt.Fprintf(out, "Name: %#v,\n", src.Name)
|
||||
if len(src.Inputs) > 0 {
|
||||
fmt.Fprintf(out, "Inputs: %#v,\n", src.Inputs)
|
||||
}
|
||||
if u := src.Uniforms; len(u.Blocks) > 0 {
|
||||
fmt.Fprintf(out, "Uniforms: driver.UniformsReflection{\n")
|
||||
fmt.Fprintf(out, "Blocks: %#v,\n", u.Blocks)
|
||||
fmt.Fprintf(out, "Locations: %#v,\n", u.Locations)
|
||||
fmt.Fprintf(out, "Size: %d,\n", u.Size)
|
||||
fmt.Fprintf(out, "},\n")
|
||||
}
|
||||
if len(src.Textures) > 0 {
|
||||
fmt.Fprintf(out, "Textures: %#v,\n", src.Textures)
|
||||
}
|
||||
if len(src.GLSL100ES) > 0 {
|
||||
fmt.Fprintf(out, "GLSL100ES: `%s`,\n", src.GLSL100ES)
|
||||
}
|
||||
if len(src.GLSL300ES) > 0 {
|
||||
fmt.Fprintf(out, "GLSL300ES: `%s`,\n", src.GLSL300ES)
|
||||
}
|
||||
if len(src.GLSL310ES) > 0 {
|
||||
fmt.Fprintf(out, "GLSL310ES: `%s`,\n", src.GLSL310ES)
|
||||
}
|
||||
if len(src.GLSL130) > 0 {
|
||||
fmt.Fprintf(out, "GLSL130: `%s`,\n", src.GLSL130)
|
||||
}
|
||||
if len(src.GLSL150) > 0 {
|
||||
fmt.Fprintf(out, "GLSL150: `%s`,\n", src.GLSL150)
|
||||
}
|
||||
if len(src.HLSL) > 0 {
|
||||
fmt.Fprintf(out, "HLSL: %q,\n", src.HLSL)
|
||||
}
|
||||
if len(src.Hash) > 0 {
|
||||
fmt.Fprintf(out, "Hash: %q,\n", src.Hash)
|
||||
}
|
||||
fmt.Fprintf(out, "}")
|
||||
if multiVariant {
|
||||
fmt.Fprintf(out, ",")
|
||||
}
|
||||
fmt.Fprintf(out, "\n")
|
||||
}
|
||||
if multiVariant {
|
||||
fmt.Fprintf(out, "}\n")
|
||||
}
|
||||
}
|
||||
fmt.Fprintf(out, ")\n")
|
||||
|
||||
return nil
|
||||
}
|
||||
|
||||
func (conv *Converter) Shader(shaderPath string) ([]driver.ShaderSources, error) {
|
||||
type Variant struct {
|
||||
FetchColorExpr string
|
||||
Header string
|
||||
}
|
||||
variantArgs := [...]Variant{
|
||||
{
|
||||
FetchColorExpr: `_color.color`,
|
||||
Header: `layout(binding=0) uniform Color { vec4 color; } _color;`,
|
||||
},
|
||||
{
|
||||
FetchColorExpr: `mix(_gradient.color1, _gradient.color2, clamp(vUV.x, 0.0, 1.0))`,
|
||||
Header: `layout(binding=0) uniform Gradient { vec4 color1; vec4 color2; } _gradient;`,
|
||||
},
|
||||
{
|
||||
FetchColorExpr: `texture(tex, vUV)`,
|
||||
Header: `layout(binding=0) uniform sampler2D tex;`,
|
||||
},
|
||||
}
|
||||
|
||||
shaderTemplate, err := template.ParseFiles(shaderPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to parse template %q: %w", shaderPath, err)
|
||||
}
|
||||
|
||||
var variants []driver.ShaderSources
|
||||
for i, variantArg := range variantArgs {
|
||||
variantName := strconv.Itoa(i)
|
||||
var buf bytes.Buffer
|
||||
err := shaderTemplate.Execute(&buf, variantArg)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to execute template %q with %#v: %w", shaderPath, variantArg, err)
|
||||
}
|
||||
|
||||
var sources driver.ShaderSources
|
||||
sources.Name = filepath.Base(shaderPath)
|
||||
|
||||
// Ignore error; some shaders are not meant to run in GLSL 1.00.
|
||||
sources.GLSL100ES, _, _ = conv.ShaderVariant(shaderPath, variantName, buf.Bytes(), "es", "100")
|
||||
|
||||
var metadata Metadata
|
||||
sources.GLSL300ES, metadata, err = conv.ShaderVariant(shaderPath, variantName, buf.Bytes(), "es", "300")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to convert GLSL300ES:\n%w", err)
|
||||
}
|
||||
|
||||
sources.GLSL130, _, err = conv.ShaderVariant(shaderPath, variantName, buf.Bytes(), "glsl", "130")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to convert GLSL130:\n%w", err)
|
||||
}
|
||||
|
||||
hlsl, _, err := conv.ShaderVariant(shaderPath, variantName, buf.Bytes(), "hlsl", "40")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to convert HLSL:\n%w", err)
|
||||
}
|
||||
sources.HLSL, err = conv.fxc.Compile(shaderPath, variantName, []byte(hlsl), "main", "4_0_level_9_1")
|
||||
if err != nil {
|
||||
// Attempt shader model 4.0. Only the gpu/headless
|
||||
// test shaders use features not supported by level
|
||||
// 9.1.
|
||||
sources.HLSL, err = conv.fxc.Compile(shaderPath, variantName, []byte(hlsl), "main", "4_0")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to compile HLSL: %w", err)
|
||||
}
|
||||
}
|
||||
|
||||
sources.GLSL150, _, err = conv.ShaderVariant(shaderPath, variantName, buf.Bytes(), "glsl", "150")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to convert GLSL150:\n%w", err)
|
||||
}
|
||||
|
||||
sources.Uniforms = metadata.Uniforms
|
||||
sources.Inputs = metadata.Inputs
|
||||
sources.Textures = metadata.Textures
|
||||
|
||||
variants = append(variants, sources)
|
||||
}
|
||||
|
||||
// If the shader don't use the variant arguments, output only a single version.
|
||||
if variants[0].GLSL100ES == variants[1].GLSL100ES {
|
||||
variants = variants[:1]
|
||||
}
|
||||
|
||||
return variants, nil
|
||||
}
|
||||
|
||||
func (conv *Converter) ShaderVariant(shaderPath, variant string, src []byte, lang, profile string) (string, Metadata, error) {
|
||||
spirv, err := conv.glslvalidator.Convert(shaderPath, variant, lang == "hlsl", src)
|
||||
if err != nil {
|
||||
return "", Metadata{}, fmt.Errorf("failed to generate SPIR-V for %q: %w", shaderPath, err)
|
||||
}
|
||||
|
||||
dst, err := conv.spirv.Convert(shaderPath, variant, spirv, lang, profile)
|
||||
if err != nil {
|
||||
return "", Metadata{}, fmt.Errorf("failed to convert shader %q: %w", shaderPath, err)
|
||||
}
|
||||
|
||||
meta, err := conv.spirv.Metadata(shaderPath, variant, spirv)
|
||||
if err != nil {
|
||||
return "", Metadata{}, fmt.Errorf("failed to extract metadata for shader %q: %w", shaderPath, err)
|
||||
}
|
||||
|
||||
return dst, meta, nil
|
||||
}
|
||||
|
||||
func (conv *Converter) ComputeShader(shaderPath string) ([]driver.ShaderSources, error) {
|
||||
shader, err := ioutil.ReadFile(shaderPath)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to load shader %q: %w", shaderPath, err)
|
||||
}
|
||||
|
||||
spirv, err := conv.glslvalidator.Convert(shaderPath, "", false, shader)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to convert compute shader %q: %w", shaderPath, err)
|
||||
}
|
||||
|
||||
var sources driver.ShaderSources
|
||||
sources.Name = filepath.Base(shaderPath)
|
||||
|
||||
sum := sha256.Sum256(shader)
|
||||
sources.Hash = hex.EncodeToString(sum[:])
|
||||
|
||||
sources.GLSL310ES, err = conv.spirv.Convert(shaderPath, "", spirv, "es", "310")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to convert es compute shader %q: %w", shaderPath, err)
|
||||
}
|
||||
sources.GLSL310ES = unixLineEnding(sources.GLSL310ES)
|
||||
|
||||
hlslSource, err := conv.spirv.Convert(shaderPath, "", spirv, "hlsl", "50")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to convert hlsl compute shader %q: %w", shaderPath, err)
|
||||
}
|
||||
|
||||
dxil, err := conv.fxc.Compile(shaderPath, "0", []byte(hlslSource), "main", "5_0")
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to compile hlsl compute shader %q: %w", shaderPath, err)
|
||||
}
|
||||
if conv.directCompute {
|
||||
sources.HLSL = dxil
|
||||
}
|
||||
|
||||
return []driver.ShaderSources{sources}, nil
|
||||
}
|
||||
|
||||
// Workers implements wait group with synchronous logging.
|
||||
type Workers struct {
|
||||
running sync.WaitGroup
|
||||
}
|
||||
|
||||
func (lg *Workers) Go(fn func()) {
|
||||
lg.running.Add(1)
|
||||
go func() {
|
||||
defer lg.running.Done()
|
||||
fn()
|
||||
}()
|
||||
}
|
||||
|
||||
func (lg *Workers) Wait() {
|
||||
lg.running.Wait()
|
||||
}
|
||||
|
||||
func unixLineEnding(s string) string {
|
||||
return strings.ReplaceAll(s, "\r\n", "\n")
|
||||
}
|
||||
@@ -1,212 +0,0 @@
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"fmt"
|
||||
"os/exec"
|
||||
"path/filepath"
|
||||
"sort"
|
||||
"strings"
|
||||
|
||||
"gioui.org/gpu/internal/driver"
|
||||
)
|
||||
|
||||
// Metadata contains reflection data about a shader.
|
||||
type Metadata struct {
|
||||
Uniforms driver.UniformsReflection
|
||||
Inputs []driver.InputLocation
|
||||
Textures []driver.TextureBinding
|
||||
}
|
||||
|
||||
// SPIRVCross cross-compiles spirv shaders to es, hlsl and others.
|
||||
type SPIRVCross struct {
|
||||
Bin string
|
||||
WorkDir WorkDir
|
||||
}
|
||||
|
||||
func NewSPIRVCross() *SPIRVCross { return &SPIRVCross{Bin: "spirv-cross"} }
|
||||
|
||||
// Convert converts compute shader from spirv format to a target format.
|
||||
func (spirv *SPIRVCross) Convert(path, variant string, shader []byte, target, version string) (string, error) {
|
||||
base := spirv.WorkDir.Path(filepath.Base(path), variant)
|
||||
|
||||
if err := spirv.WorkDir.WriteFile(base, shader); err != nil {
|
||||
return "", fmt.Errorf("unable to write shader to disk: %w", err)
|
||||
}
|
||||
|
||||
var cmd *exec.Cmd
|
||||
switch target {
|
||||
case "glsl":
|
||||
cmd = exec.Command(spirv.Bin,
|
||||
"--no-es",
|
||||
"--version", version,
|
||||
)
|
||||
case "es":
|
||||
cmd = exec.Command(spirv.Bin,
|
||||
"--es",
|
||||
"--version", version,
|
||||
)
|
||||
case "hlsl":
|
||||
cmd = exec.Command(spirv.Bin,
|
||||
"--hlsl",
|
||||
"--shader-model", version,
|
||||
)
|
||||
default:
|
||||
return "", fmt.Errorf("unknown target %q", target)
|
||||
}
|
||||
cmd.Args = append(cmd.Args, "--no-420pack-extension", base)
|
||||
|
||||
out, err := cmd.CombinedOutput()
|
||||
if err != nil {
|
||||
return "", fmt.Errorf("%s\nfailed to run %v: %w", out, cmd.Args, err)
|
||||
}
|
||||
s := string(out)
|
||||
if target != "hlsl" {
|
||||
// Strip Windows \r in line endings.
|
||||
s = unixLineEnding(s)
|
||||
}
|
||||
|
||||
return s, nil
|
||||
}
|
||||
|
||||
// Metadata extracts metadata for a SPIR-V shader.
|
||||
func (spirv *SPIRVCross) Metadata(path, variant string, shader []byte) (Metadata, error) {
|
||||
base := spirv.WorkDir.Path(filepath.Base(path), variant)
|
||||
|
||||
if err := spirv.WorkDir.WriteFile(base, shader); err != nil {
|
||||
return Metadata{}, fmt.Errorf("unable to write shader to disk: %w", err)
|
||||
}
|
||||
|
||||
cmd := exec.Command(spirv.Bin,
|
||||
base,
|
||||
"--reflect",
|
||||
)
|
||||
|
||||
out, err := cmd.Output()
|
||||
if err != nil {
|
||||
return Metadata{}, fmt.Errorf("failed to run %v: %w", cmd.Args, err)
|
||||
}
|
||||
|
||||
meta, err := parseMetadata(out)
|
||||
if err != nil {
|
||||
return Metadata{}, fmt.Errorf("%s\nfailed to parse metadata: %w", out, err)
|
||||
}
|
||||
|
||||
return meta, nil
|
||||
}
|
||||
|
||||
func parseMetadata(data []byte) (Metadata, error) {
|
||||
var reflect struct {
|
||||
Types map[string]struct {
|
||||
Name string `json:"name"`
|
||||
Members []struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Offset int `json:"offset"`
|
||||
} `json:"members"`
|
||||
} `json:"types"`
|
||||
Inputs []struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Location int `json:"location"`
|
||||
} `json:"inputs"`
|
||||
Textures []struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
Set int `json:"set"`
|
||||
Binding int `json:"binding"`
|
||||
} `json:"textures"`
|
||||
UBOs []struct {
|
||||
Name string `json:"name"`
|
||||
Type string `json:"type"`
|
||||
BlockSize int `json:"block_size"`
|
||||
Set int `json:"set"`
|
||||
Binding int `json:"binding"`
|
||||
} `json:"ubos"`
|
||||
}
|
||||
if err := json.Unmarshal(data, &reflect); err != nil {
|
||||
return Metadata{}, fmt.Errorf("failed to parse reflection data: %w", err)
|
||||
}
|
||||
|
||||
var m Metadata
|
||||
|
||||
for _, input := range reflect.Inputs {
|
||||
dataType, dataSize, err := parseDataType(input.Type)
|
||||
if err != nil {
|
||||
return Metadata{}, fmt.Errorf("parseReflection: %v", err)
|
||||
}
|
||||
m.Inputs = append(m.Inputs, driver.InputLocation{
|
||||
Name: input.Name,
|
||||
Location: input.Location,
|
||||
Semantic: "TEXCOORD",
|
||||
SemanticIndex: input.Location,
|
||||
Type: dataType,
|
||||
Size: dataSize,
|
||||
})
|
||||
}
|
||||
|
||||
sort.Slice(m.Inputs, func(i, j int) bool {
|
||||
return m.Inputs[i].Location < m.Inputs[j].Location
|
||||
})
|
||||
|
||||
blockOffset := 0
|
||||
for _, block := range reflect.UBOs {
|
||||
m.Uniforms.Blocks = append(m.Uniforms.Blocks, driver.UniformBlock{
|
||||
Name: block.Name,
|
||||
Binding: block.Binding,
|
||||
})
|
||||
t := reflect.Types[block.Type]
|
||||
// By convention uniform block variables are named by prepending an underscore
|
||||
// and converting to lowercase.
|
||||
blockVar := "_" + strings.ToLower(block.Name)
|
||||
for _, member := range t.Members {
|
||||
dataType, size, err := parseDataType(member.Type)
|
||||
if err != nil {
|
||||
return Metadata{}, fmt.Errorf("failed to parse reflection data: %v", err)
|
||||
}
|
||||
m.Uniforms.Locations = append(m.Uniforms.Locations, driver.UniformLocation{
|
||||
Name: fmt.Sprintf("%s.%s", blockVar, member.Name),
|
||||
Type: dataType,
|
||||
Size: size,
|
||||
Offset: blockOffset + member.Offset,
|
||||
})
|
||||
}
|
||||
blockOffset += block.BlockSize
|
||||
}
|
||||
m.Uniforms.Size = blockOffset
|
||||
|
||||
for _, texture := range reflect.Textures {
|
||||
m.Textures = append(m.Textures, driver.TextureBinding{
|
||||
Name: texture.Name,
|
||||
Binding: texture.Binding,
|
||||
})
|
||||
}
|
||||
|
||||
//return m, fmt.Errorf("not yet!: %+v", reflect)
|
||||
return m, nil
|
||||
}
|
||||
|
||||
func parseDataType(t string) (driver.DataType, int, error) {
|
||||
switch t {
|
||||
case "float":
|
||||
return driver.DataTypeFloat, 1, nil
|
||||
case "vec2":
|
||||
return driver.DataTypeFloat, 2, nil
|
||||
case "vec3":
|
||||
return driver.DataTypeFloat, 3, nil
|
||||
case "vec4":
|
||||
return driver.DataTypeFloat, 4, nil
|
||||
case "int":
|
||||
return driver.DataTypeInt, 1, nil
|
||||
case "int2":
|
||||
return driver.DataTypeInt, 2, nil
|
||||
case "int3":
|
||||
return driver.DataTypeInt, 3, nil
|
||||
case "int4":
|
||||
return driver.DataTypeInt, 4, nil
|
||||
default:
|
||||
return 0, 0, fmt.Errorf("unsupported input data type: %s", t)
|
||||
}
|
||||
}
|
||||
@@ -1,35 +0,0 @@
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
package main
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/ioutil"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
type WorkDir string
|
||||
|
||||
func (wd WorkDir) Dir(path string) WorkDir {
|
||||
dirname := filepath.Join(string(wd), path)
|
||||
if err := os.Mkdir(dirname, 0755); err != nil {
|
||||
if !os.IsExist(err) {
|
||||
fmt.Fprintf(os.Stderr, "failed to create %q: %v\n", dirname, err)
|
||||
}
|
||||
}
|
||||
return WorkDir(dirname)
|
||||
}
|
||||
|
||||
func (wd WorkDir) Path(path ...string) (fullpath string) {
|
||||
return filepath.Join(string(wd), strings.Join(path, "."))
|
||||
}
|
||||
|
||||
func (wd WorkDir) WriteFile(path string, data []byte) error {
|
||||
err := ioutil.WriteFile(path, data, 0644)
|
||||
if err != nil {
|
||||
return fmt.Errorf("unable to create %v: %w", path, err)
|
||||
}
|
||||
return nil
|
||||
}
|
||||
@@ -14,6 +14,7 @@ import (
|
||||
|
||||
"gioui.org/gpu/internal/driver"
|
||||
"gioui.org/internal/d3d11"
|
||||
"gioui.org/shader"
|
||||
)
|
||||
|
||||
type Backend struct {
|
||||
@@ -287,7 +288,7 @@ func (b *Backend) NewFramebuffer(tex driver.Texture) (driver.Framebuffer, error)
|
||||
return fbo, nil
|
||||
}
|
||||
|
||||
func (b *Backend) NewInputLayout(vertexShader driver.ShaderSources, layout []driver.InputDesc) (driver.InputLayout, error) {
|
||||
func (b *Backend) NewInputLayout(vertexShader shader.Sources, layout []shader.InputDesc) (driver.InputLayout, error) {
|
||||
if len(vertexShader.Inputs) != len(layout) {
|
||||
return nil, fmt.Errorf("NewInputLayout: got %d inputs, expected %d", len(layout), len(vertexShader.Inputs))
|
||||
}
|
||||
@@ -300,7 +301,7 @@ func (b *Backend) NewInputLayout(vertexShader driver.ShaderSources, layout []dri
|
||||
}
|
||||
var format uint32
|
||||
switch l.Type {
|
||||
case driver.DataTypeFloat:
|
||||
case shader.DataTypeFloat:
|
||||
switch l.Size {
|
||||
case 1:
|
||||
format = d3d11.DXGI_FORMAT_R32_FLOAT
|
||||
@@ -313,7 +314,7 @@ func (b *Backend) NewInputLayout(vertexShader driver.ShaderSources, layout []dri
|
||||
default:
|
||||
panic("unsupported data size")
|
||||
}
|
||||
case driver.DataTypeShort:
|
||||
case shader.DataTypeShort:
|
||||
switch l.Size {
|
||||
case 1:
|
||||
format = d3d11.DXGI_FORMAT_R16_SINT
|
||||
@@ -332,7 +333,7 @@ func (b *Backend) NewInputLayout(vertexShader driver.ShaderSources, layout []dri
|
||||
AlignedByteOffset: uint32(l.Offset),
|
||||
}
|
||||
}
|
||||
l, err := b.dev.CreateInputLayout(descs, []byte(vertexShader.HLSL))
|
||||
l, err := b.dev.CreateInputLayout(descs, []byte(vertexShader.DXBC))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
@@ -380,16 +381,16 @@ func (b *Backend) NewImmutableBuffer(typ driver.BufferBinding, data []byte) (dri
|
||||
return &Buffer{backend: b, buf: buf, bind: bind, immutable: true}, nil
|
||||
}
|
||||
|
||||
func (b *Backend) NewComputeProgram(shader driver.ShaderSources) (driver.Program, error) {
|
||||
func (b *Backend) NewComputeProgram(shader shader.Sources) (driver.Program, error) {
|
||||
panic("not implemented")
|
||||
}
|
||||
|
||||
func (b *Backend) NewProgram(vertexShader, fragmentShader driver.ShaderSources) (driver.Program, error) {
|
||||
vs, err := b.dev.CreateVertexShader([]byte(vertexShader.HLSL))
|
||||
func (b *Backend) NewProgram(vertexShader, fragmentShader shader.Sources) (driver.Program, error) {
|
||||
vs, err := b.dev.CreateVertexShader([]byte(vertexShader.DXBC))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
ps, err := b.dev.CreatePixelShader([]byte(fragmentShader.HLSL))
|
||||
ps, err := b.dev.CreatePixelShader([]byte(fragmentShader.DXBC))
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
@@ -6,6 +6,8 @@ import (
|
||||
"errors"
|
||||
"image"
|
||||
"time"
|
||||
|
||||
"gioui.org/shader"
|
||||
)
|
||||
|
||||
// Device represents the abstraction of underlying GPU
|
||||
@@ -23,9 +25,9 @@ type Device interface {
|
||||
NewFramebuffer(tex Texture) (Framebuffer, error)
|
||||
NewImmutableBuffer(typ BufferBinding, data []byte) (Buffer, error)
|
||||
NewBuffer(typ BufferBinding, size int) (Buffer, error)
|
||||
NewComputeProgram(shader ShaderSources) (Program, error)
|
||||
NewProgram(vertexShader, fragmentShader ShaderSources) (Program, error)
|
||||
NewInputLayout(vertexShader ShaderSources, layout []InputDesc) (InputLayout, error)
|
||||
NewComputeProgram(shader shader.Sources) (Program, error)
|
||||
NewProgram(vertexShader, fragmentShader shader.Sources) (Program, error)
|
||||
NewInputLayout(vertexShader shader.Sources, layout []shader.InputDesc) (InputLayout, error)
|
||||
|
||||
Clear(r, g, b, a float32)
|
||||
Viewport(x, y, width, height int)
|
||||
@@ -49,63 +51,6 @@ type Device interface {
|
||||
Release()
|
||||
}
|
||||
|
||||
type ShaderSources struct {
|
||||
Name string
|
||||
GLSL100ES string
|
||||
GLSL300ES string
|
||||
GLSL310ES string
|
||||
GLSL130 string
|
||||
GLSL150 string
|
||||
HLSL string
|
||||
Uniforms UniformsReflection
|
||||
Inputs []InputLocation
|
||||
Textures []TextureBinding
|
||||
Hash string
|
||||
}
|
||||
|
||||
type UniformsReflection struct {
|
||||
Blocks []UniformBlock
|
||||
Locations []UniformLocation
|
||||
Size int
|
||||
}
|
||||
|
||||
type TextureBinding struct {
|
||||
Name string
|
||||
Binding int
|
||||
}
|
||||
|
||||
type UniformBlock struct {
|
||||
Name string
|
||||
Binding int
|
||||
}
|
||||
|
||||
type UniformLocation struct {
|
||||
Name string
|
||||
Type DataType
|
||||
Size int
|
||||
Offset int
|
||||
}
|
||||
|
||||
type InputLocation struct {
|
||||
// For GLSL.
|
||||
Name string
|
||||
Location int
|
||||
// For HLSL.
|
||||
Semantic string
|
||||
SemanticIndex int
|
||||
|
||||
Type DataType
|
||||
Size int
|
||||
}
|
||||
|
||||
// InputDesc describes a vertex attribute as laid out in a Buffer.
|
||||
type InputDesc struct {
|
||||
Type DataType
|
||||
Size int
|
||||
|
||||
Offset int
|
||||
}
|
||||
|
||||
// InputLayout is the driver specific representation of the mapping
|
||||
// between Buffers and shader attributes.
|
||||
type InputLayout interface {
|
||||
@@ -123,8 +68,6 @@ type TextureFormat uint8
|
||||
|
||||
type BufferBinding uint8
|
||||
|
||||
type DataType uint8
|
||||
|
||||
type Features uint
|
||||
|
||||
type Caps struct {
|
||||
@@ -167,12 +110,6 @@ type Texture interface {
|
||||
Release()
|
||||
}
|
||||
|
||||
const (
|
||||
DataTypeFloat DataType = iota
|
||||
DataTypeInt
|
||||
DataTypeShort
|
||||
)
|
||||
|
||||
const (
|
||||
BufferBindingIndices BufferBinding = 1 << iota
|
||||
BufferBindingVertices
|
||||
|
||||
@@ -12,6 +12,7 @@ import (
|
||||
|
||||
"gioui.org/gpu/internal/driver"
|
||||
"gioui.org/internal/gl"
|
||||
"gioui.org/shader"
|
||||
)
|
||||
|
||||
// Backend implements driver.Device.
|
||||
@@ -139,13 +140,13 @@ type uniformsTracker struct {
|
||||
type uniformLocation struct {
|
||||
uniform gl.Uniform
|
||||
offset int
|
||||
typ driver.DataType
|
||||
typ shader.DataType
|
||||
size int
|
||||
}
|
||||
|
||||
type gpuInputLayout struct {
|
||||
inputs []driver.InputLocation
|
||||
layout []driver.InputDesc
|
||||
inputs []shader.InputLocation
|
||||
layout []shader.InputDesc
|
||||
}
|
||||
|
||||
// textureTriple holds the type settings for
|
||||
@@ -846,7 +847,7 @@ func (b *Backend) Clear(colR, colG, colB, colA float32) {
|
||||
b.funcs.Clear(gl.COLOR_BUFFER_BIT)
|
||||
}
|
||||
|
||||
func (b *Backend) NewInputLayout(vs driver.ShaderSources, layout []driver.InputDesc) (driver.InputLayout, error) {
|
||||
func (b *Backend) NewInputLayout(vs shader.Sources, layout []shader.InputDesc) (driver.InputLayout, error) {
|
||||
if len(vs.Inputs) != len(layout) {
|
||||
return nil, fmt.Errorf("NewInputLayout: got %d inputs, expected %d", len(layout), len(vs.Inputs))
|
||||
}
|
||||
@@ -861,7 +862,7 @@ func (b *Backend) NewInputLayout(vs driver.ShaderSources, layout []driver.InputD
|
||||
}, nil
|
||||
}
|
||||
|
||||
func (b *Backend) NewComputeProgram(src driver.ShaderSources) (driver.Program, error) {
|
||||
func (b *Backend) NewComputeProgram(src shader.Sources) (driver.Program, error) {
|
||||
p, err := gl.CreateComputeProgram(b.funcs, src.GLSL310ES)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("%s: %v", src.Name, err)
|
||||
@@ -873,7 +874,7 @@ func (b *Backend) NewComputeProgram(src driver.ShaderSources) (driver.Program, e
|
||||
return gpuProg, nil
|
||||
}
|
||||
|
||||
func (b *Backend) NewProgram(vertShader, fragShader driver.ShaderSources) (driver.Program, error) {
|
||||
func (b *Backend) NewProgram(vertShader, fragShader shader.Sources) (driver.Program, error) {
|
||||
attr := make([]string, len(vertShader.Inputs))
|
||||
for _, inp := range vertShader.Inputs {
|
||||
attr[inp.Location] = inp.Name
|
||||
@@ -937,7 +938,7 @@ func (b *Backend) NewProgram(vertShader, fragShader driver.ShaderSources) (drive
|
||||
return gpuProg, nil
|
||||
}
|
||||
|
||||
func lookupUniform(funcs *gl.Functions, p gl.Program, loc driver.UniformLocation) uniformLocation {
|
||||
func lookupUniform(funcs *gl.Functions, p gl.Program, loc shader.UniformLocation) uniformLocation {
|
||||
u := funcs.GetUniformLocation(p, loc.Name)
|
||||
if !u.Valid() {
|
||||
panic(fmt.Errorf("uniform %q not found", loc.Name))
|
||||
@@ -985,7 +986,7 @@ func (p *gpuProgram) Release() {
|
||||
p.backend.glstate.deleteProgram(p.backend.funcs, p.obj)
|
||||
}
|
||||
|
||||
func (u *uniformsTracker) setup(funcs *gl.Functions, p gl.Program, uniformSize int, uniforms []driver.UniformLocation) {
|
||||
func (u *uniformsTracker) setup(funcs *gl.Functions, p gl.Program, uniformSize int, uniforms []shader.UniformLocation) {
|
||||
u.locs = make([]uniformLocation, len(uniforms))
|
||||
for i, uniform := range uniforms {
|
||||
u.locs[i] = lookupUniform(funcs, p, uniform)
|
||||
@@ -1016,19 +1017,19 @@ func (p *uniformsTracker) update(funcs *gl.Functions) {
|
||||
for _, u := range p.locs {
|
||||
data := data[u.offset:]
|
||||
switch {
|
||||
case u.typ == driver.DataTypeFloat && u.size == 1:
|
||||
case u.typ == shader.DataTypeFloat && u.size == 1:
|
||||
data := data[:4]
|
||||
v := *(*[1]float32)(unsafe.Pointer(&data[0]))
|
||||
funcs.Uniform1f(u.uniform, v[0])
|
||||
case u.typ == driver.DataTypeFloat && u.size == 2:
|
||||
case u.typ == shader.DataTypeFloat && u.size == 2:
|
||||
data := data[:8]
|
||||
v := *(*[2]float32)(unsafe.Pointer(&data[0]))
|
||||
funcs.Uniform2f(u.uniform, v[0], v[1])
|
||||
case u.typ == driver.DataTypeFloat && u.size == 3:
|
||||
case u.typ == shader.DataTypeFloat && u.size == 3:
|
||||
data := data[:12]
|
||||
v := *(*[3]float32)(unsafe.Pointer(&data[0]))
|
||||
funcs.Uniform3f(u.uniform, v[0], v[1], v[2])
|
||||
case u.typ == driver.DataTypeFloat && u.size == 4:
|
||||
case u.typ == shader.DataTypeFloat && u.size == 4:
|
||||
data := data[:16]
|
||||
v := *(*[4]float32)(unsafe.Pointer(&data[0]))
|
||||
funcs.Uniform4f(u.uniform, v[0], v[1], v[2], v[3])
|
||||
@@ -1108,9 +1109,9 @@ func (b *Backend) setupVertexArrays() {
|
||||
l := layout.layout[i]
|
||||
var gltyp gl.Enum
|
||||
switch l.Type {
|
||||
case driver.DataTypeFloat:
|
||||
case shader.DataTypeFloat:
|
||||
gltyp = gl.FLOAT
|
||||
case driver.DataTypeShort:
|
||||
case shader.DataTypeShort:
|
||||
gltyp = gl.SHORT
|
||||
default:
|
||||
panic("unsupported data type")
|
||||
|
||||
+14
-12
@@ -15,6 +15,8 @@ import (
|
||||
"gioui.org/gpu/internal/driver"
|
||||
"gioui.org/internal/byteslice"
|
||||
"gioui.org/internal/f32color"
|
||||
"gioui.org/shader"
|
||||
"gioui.org/shader/gio"
|
||||
)
|
||||
|
||||
type pather struct {
|
||||
@@ -161,7 +163,7 @@ func newCoverer(ctx driver.Device) *coverer {
|
||||
c.colUniforms = new(coverColUniforms)
|
||||
c.texUniforms = new(coverTexUniforms)
|
||||
c.linearGradientUniforms = new(coverLinearGradientUniforms)
|
||||
prog, layout, err := createColorPrograms(ctx, shader_cover_vert, shader_cover_frag,
|
||||
prog, layout, err := createColorPrograms(ctx, gio.Shader_cover_vert, gio.Shader_cover_frag,
|
||||
[3]interface{}{&c.colUniforms.vert, &c.linearGradientUniforms.vert, &c.texUniforms.vert},
|
||||
[3]interface{}{&c.colUniforms.frag, &c.linearGradientUniforms.frag, nil},
|
||||
)
|
||||
@@ -189,19 +191,19 @@ func newStenciler(ctx driver.Device) *stenciler {
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
progLayout, err := ctx.NewInputLayout(shader_stencil_vert, []driver.InputDesc{
|
||||
{Type: driver.DataTypeFloat, Size: 1, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).Corner))},
|
||||
{Type: driver.DataTypeFloat, Size: 1, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).MaxY))},
|
||||
{Type: driver.DataTypeFloat, Size: 2, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).FromX))},
|
||||
{Type: driver.DataTypeFloat, Size: 2, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).CtrlX))},
|
||||
{Type: driver.DataTypeFloat, Size: 2, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).ToX))},
|
||||
progLayout, err := ctx.NewInputLayout(gio.Shader_stencil_vert, []shader.InputDesc{
|
||||
{Type: shader.DataTypeFloat, Size: 1, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).Corner))},
|
||||
{Type: shader.DataTypeFloat, Size: 1, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).MaxY))},
|
||||
{Type: shader.DataTypeFloat, Size: 2, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).FromX))},
|
||||
{Type: shader.DataTypeFloat, Size: 2, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).CtrlX))},
|
||||
{Type: shader.DataTypeFloat, Size: 2, Offset: int(unsafe.Offsetof((*(*vertex)(nil)).ToX))},
|
||||
})
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
iprogLayout, err := ctx.NewInputLayout(shader_intersect_vert, []driver.InputDesc{
|
||||
{Type: driver.DataTypeFloat, Size: 2, Offset: 0},
|
||||
{Type: driver.DataTypeFloat, Size: 2, Offset: 4 * 2},
|
||||
iprogLayout, err := ctx.NewInputLayout(gio.Shader_intersect_vert, []shader.InputDesc{
|
||||
{Type: shader.DataTypeFloat, Size: 2, Offset: 0},
|
||||
{Type: shader.DataTypeFloat, Size: 2, Offset: 4 * 2},
|
||||
})
|
||||
if err != nil {
|
||||
panic(err)
|
||||
@@ -210,7 +212,7 @@ func newStenciler(ctx driver.Device) *stenciler {
|
||||
ctx: ctx,
|
||||
indexBuf: indexBuf,
|
||||
}
|
||||
prog, err := ctx.NewProgram(shader_stencil_vert, shader_stencil_frag)
|
||||
prog, err := ctx.NewProgram(gio.Shader_stencil_vert, gio.Shader_stencil_frag)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
@@ -218,7 +220,7 @@ func newStenciler(ctx driver.Device) *stenciler {
|
||||
vertUniforms := newUniformBuffer(ctx, &st.prog.uniforms.vert)
|
||||
st.prog.prog = newProgram(prog, vertUniforms, nil)
|
||||
st.prog.layout = progLayout
|
||||
iprog, err := ctx.NewProgram(shader_intersect_vert, shader_intersect_frag)
|
||||
iprog, err := ctx.NewProgram(gio.Shader_intersect_vert, gio.Shader_intersect_frag)
|
||||
if err != nil {
|
||||
panic(err)
|
||||
}
|
||||
|
||||
-6684
File diff suppressed because one or more lines are too long
@@ -1,225 +0,0 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// Code auto-generated by piet-gpu-derive
|
||||
|
||||
struct AnnoImageRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct AnnoColorRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct AnnoBeginClipRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct AnnoEndClipRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct AnnotatedRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct AnnoImage {
|
||||
vec4 bbox;
|
||||
float linewidth;
|
||||
uint index;
|
||||
ivec2 offset;
|
||||
};
|
||||
|
||||
#define AnnoImage_size 28
|
||||
|
||||
AnnoImageRef AnnoImage_index(AnnoImageRef ref, uint index) {
|
||||
return AnnoImageRef(ref.offset + index * AnnoImage_size);
|
||||
}
|
||||
|
||||
struct AnnoColor {
|
||||
vec4 bbox;
|
||||
float linewidth;
|
||||
uint rgba_color;
|
||||
};
|
||||
|
||||
#define AnnoColor_size 24
|
||||
|
||||
AnnoColorRef AnnoColor_index(AnnoColorRef ref, uint index) {
|
||||
return AnnoColorRef(ref.offset + index * AnnoColor_size);
|
||||
}
|
||||
|
||||
struct AnnoBeginClip {
|
||||
vec4 bbox;
|
||||
float linewidth;
|
||||
};
|
||||
|
||||
#define AnnoBeginClip_size 20
|
||||
|
||||
AnnoBeginClipRef AnnoBeginClip_index(AnnoBeginClipRef ref, uint index) {
|
||||
return AnnoBeginClipRef(ref.offset + index * AnnoBeginClip_size);
|
||||
}
|
||||
|
||||
struct AnnoEndClip {
|
||||
vec4 bbox;
|
||||
};
|
||||
|
||||
#define AnnoEndClip_size 16
|
||||
|
||||
AnnoEndClipRef AnnoEndClip_index(AnnoEndClipRef ref, uint index) {
|
||||
return AnnoEndClipRef(ref.offset + index * AnnoEndClip_size);
|
||||
}
|
||||
|
||||
#define Annotated_Nop 0
|
||||
#define Annotated_Color 1
|
||||
#define Annotated_Image 2
|
||||
#define Annotated_BeginClip 3
|
||||
#define Annotated_EndClip 4
|
||||
#define Annotated_size 32
|
||||
|
||||
AnnotatedRef Annotated_index(AnnotatedRef ref, uint index) {
|
||||
return AnnotatedRef(ref.offset + index * Annotated_size);
|
||||
}
|
||||
|
||||
struct AnnotatedTag {
|
||||
uint tag;
|
||||
uint flags;
|
||||
};
|
||||
|
||||
AnnoImage AnnoImage_read(Alloc a, AnnoImageRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = read_mem(a, ix + 0);
|
||||
uint raw1 = read_mem(a, ix + 1);
|
||||
uint raw2 = read_mem(a, ix + 2);
|
||||
uint raw3 = read_mem(a, ix + 3);
|
||||
uint raw4 = read_mem(a, ix + 4);
|
||||
uint raw5 = read_mem(a, ix + 5);
|
||||
uint raw6 = read_mem(a, ix + 6);
|
||||
AnnoImage s;
|
||||
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.linewidth = uintBitsToFloat(raw4);
|
||||
s.index = raw5;
|
||||
s.offset = ivec2(int(raw6 << 16) >> 16, int(raw6) >> 16);
|
||||
return s;
|
||||
}
|
||||
|
||||
void AnnoImage_write(Alloc a, AnnoImageRef ref, AnnoImage s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
write_mem(a, ix + 0, floatBitsToUint(s.bbox.x));
|
||||
write_mem(a, ix + 1, floatBitsToUint(s.bbox.y));
|
||||
write_mem(a, ix + 2, floatBitsToUint(s.bbox.z));
|
||||
write_mem(a, ix + 3, floatBitsToUint(s.bbox.w));
|
||||
write_mem(a, ix + 4, floatBitsToUint(s.linewidth));
|
||||
write_mem(a, ix + 5, s.index);
|
||||
write_mem(a, ix + 6, (uint(s.offset.x) & 0xffff) | (uint(s.offset.y) << 16));
|
||||
}
|
||||
|
||||
AnnoColor AnnoColor_read(Alloc a, AnnoColorRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = read_mem(a, ix + 0);
|
||||
uint raw1 = read_mem(a, ix + 1);
|
||||
uint raw2 = read_mem(a, ix + 2);
|
||||
uint raw3 = read_mem(a, ix + 3);
|
||||
uint raw4 = read_mem(a, ix + 4);
|
||||
uint raw5 = read_mem(a, ix + 5);
|
||||
AnnoColor s;
|
||||
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.linewidth = uintBitsToFloat(raw4);
|
||||
s.rgba_color = raw5;
|
||||
return s;
|
||||
}
|
||||
|
||||
void AnnoColor_write(Alloc a, AnnoColorRef ref, AnnoColor s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
write_mem(a, ix + 0, floatBitsToUint(s.bbox.x));
|
||||
write_mem(a, ix + 1, floatBitsToUint(s.bbox.y));
|
||||
write_mem(a, ix + 2, floatBitsToUint(s.bbox.z));
|
||||
write_mem(a, ix + 3, floatBitsToUint(s.bbox.w));
|
||||
write_mem(a, ix + 4, floatBitsToUint(s.linewidth));
|
||||
write_mem(a, ix + 5, s.rgba_color);
|
||||
}
|
||||
|
||||
AnnoBeginClip AnnoBeginClip_read(Alloc a, AnnoBeginClipRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = read_mem(a, ix + 0);
|
||||
uint raw1 = read_mem(a, ix + 1);
|
||||
uint raw2 = read_mem(a, ix + 2);
|
||||
uint raw3 = read_mem(a, ix + 3);
|
||||
uint raw4 = read_mem(a, ix + 4);
|
||||
AnnoBeginClip s;
|
||||
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.linewidth = uintBitsToFloat(raw4);
|
||||
return s;
|
||||
}
|
||||
|
||||
void AnnoBeginClip_write(Alloc a, AnnoBeginClipRef ref, AnnoBeginClip s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
write_mem(a, ix + 0, floatBitsToUint(s.bbox.x));
|
||||
write_mem(a, ix + 1, floatBitsToUint(s.bbox.y));
|
||||
write_mem(a, ix + 2, floatBitsToUint(s.bbox.z));
|
||||
write_mem(a, ix + 3, floatBitsToUint(s.bbox.w));
|
||||
write_mem(a, ix + 4, floatBitsToUint(s.linewidth));
|
||||
}
|
||||
|
||||
AnnoEndClip AnnoEndClip_read(Alloc a, AnnoEndClipRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = read_mem(a, ix + 0);
|
||||
uint raw1 = read_mem(a, ix + 1);
|
||||
uint raw2 = read_mem(a, ix + 2);
|
||||
uint raw3 = read_mem(a, ix + 3);
|
||||
AnnoEndClip s;
|
||||
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
return s;
|
||||
}
|
||||
|
||||
void AnnoEndClip_write(Alloc a, AnnoEndClipRef ref, AnnoEndClip s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
write_mem(a, ix + 0, floatBitsToUint(s.bbox.x));
|
||||
write_mem(a, ix + 1, floatBitsToUint(s.bbox.y));
|
||||
write_mem(a, ix + 2, floatBitsToUint(s.bbox.z));
|
||||
write_mem(a, ix + 3, floatBitsToUint(s.bbox.w));
|
||||
}
|
||||
|
||||
AnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref) {
|
||||
uint tag_and_flags = read_mem(a, ref.offset >> 2);
|
||||
return AnnotatedTag(tag_and_flags & 0xffff, tag_and_flags >> 16);
|
||||
}
|
||||
|
||||
AnnoColor Annotated_Color_read(Alloc a, AnnotatedRef ref) {
|
||||
return AnnoColor_read(a, AnnoColorRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
AnnoImage Annotated_Image_read(Alloc a, AnnotatedRef ref) {
|
||||
return AnnoImage_read(a, AnnoImageRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
AnnoBeginClip Annotated_BeginClip_read(Alloc a, AnnotatedRef ref) {
|
||||
return AnnoBeginClip_read(a, AnnoBeginClipRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
AnnoEndClip Annotated_EndClip_read(Alloc a, AnnotatedRef ref) {
|
||||
return AnnoEndClip_read(a, AnnoEndClipRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
void Annotated_Nop_write(Alloc a, AnnotatedRef ref) {
|
||||
write_mem(a, ref.offset >> 2, Annotated_Nop);
|
||||
}
|
||||
|
||||
void Annotated_Color_write(Alloc a, AnnotatedRef ref, uint flags, AnnoColor s) {
|
||||
write_mem(a, ref.offset >> 2, (flags << 16) | Annotated_Color);
|
||||
AnnoColor_write(a, AnnoColorRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Annotated_Image_write(Alloc a, AnnotatedRef ref, uint flags, AnnoImage s) {
|
||||
write_mem(a, ref.offset >> 2, (flags << 16) | Annotated_Image);
|
||||
AnnoImage_write(a, AnnoImageRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, uint flags, AnnoBeginClip s) {
|
||||
write_mem(a, ref.offset >> 2, (flags << 16) | Annotated_BeginClip);
|
||||
AnnoBeginClip_write(a, AnnoBeginClipRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Annotated_EndClip_write(Alloc a, AnnotatedRef ref, AnnoEndClip s) {
|
||||
write_mem(a, ref.offset >> 2, Annotated_EndClip);
|
||||
AnnoEndClip_write(a, AnnoEndClipRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
@@ -1,109 +0,0 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// Propagation of tile backdrop for filling.
|
||||
//
|
||||
// Each thread reads one path element and calculates the number of spanned tiles
|
||||
// based on the bounding box.
|
||||
// In a further compaction step, the workgroup loops over the corresponding tile rows per element in parallel.
|
||||
// For each row the per tile backdrop will be read, as calculated in the previous coarse path segment kernel,
|
||||
// and propagated from the left to the right (prefix summed).
|
||||
//
|
||||
// Output state:
|
||||
// - Each path element has an array of tiles covering the whole path based on boundig box
|
||||
// - Each tile per path element contains the 'backdrop' and a list of subdivided path segments
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#include "mem.h"
|
||||
#include "setup.h"
|
||||
|
||||
#define LG_BACKDROP_WG (7 + LG_WG_FACTOR)
|
||||
#define BACKDROP_WG (1 << LG_BACKDROP_WG)
|
||||
|
||||
layout(local_size_x = BACKDROP_WG, local_size_y = 1) in;
|
||||
|
||||
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
||||
Config conf;
|
||||
};
|
||||
|
||||
#include "annotated.h"
|
||||
#include "tile.h"
|
||||
|
||||
shared uint sh_row_count[BACKDROP_WG];
|
||||
shared Alloc sh_row_alloc[BACKDROP_WG];
|
||||
shared uint sh_row_width[BACKDROP_WG];
|
||||
|
||||
void main() {
|
||||
uint th_ix = gl_LocalInvocationID.x;
|
||||
uint element_ix = gl_GlobalInvocationID.x;
|
||||
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
|
||||
|
||||
// Work assignment: 1 thread : 1 path element
|
||||
uint row_count = 0;
|
||||
bool mem_ok = mem_error == NO_ERROR;
|
||||
if (element_ix < conf.n_elements) {
|
||||
AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
|
||||
switch (tag.tag) {
|
||||
case Annotated_Image:
|
||||
case Annotated_BeginClip:
|
||||
case Annotated_Color:
|
||||
if (fill_mode_from_flags(tag.flags) != MODE_NONZERO) {
|
||||
break;
|
||||
}
|
||||
// Fall through.
|
||||
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
|
||||
Path path = Path_read(conf.tile_alloc, path_ref);
|
||||
sh_row_width[th_ix] = path.bbox.z - path.bbox.x;
|
||||
row_count = path.bbox.w - path.bbox.y;
|
||||
// Paths that don't cross tile top edges don't have backdrops.
|
||||
// Don't apply the optimization to paths that may cross the y = 0
|
||||
// top edge, but clipped to 1 row.
|
||||
if (row_count == 1 && path.bbox.y > 0) {
|
||||
// Note: this can probably be expanded to width = 2 as
|
||||
// long as it doesn't cross the left edge.
|
||||
row_count = 0;
|
||||
}
|
||||
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
|
||||
sh_row_alloc[th_ix] = path_alloc;
|
||||
}
|
||||
}
|
||||
|
||||
sh_row_count[th_ix] = row_count;
|
||||
// Prefix sum of sh_row_count
|
||||
for (uint i = 0; i < LG_BACKDROP_WG; i++) {
|
||||
barrier();
|
||||
if (th_ix >= (1 << i)) {
|
||||
row_count += sh_row_count[th_ix - (1 << i)];
|
||||
}
|
||||
barrier();
|
||||
sh_row_count[th_ix] = row_count;
|
||||
}
|
||||
barrier();
|
||||
// Work assignment: 1 thread : 1 path element row
|
||||
uint total_rows = sh_row_count[BACKDROP_WG - 1];
|
||||
for (uint row = th_ix; row < total_rows; row += BACKDROP_WG) {
|
||||
// Binary search to find element
|
||||
uint el_ix = 0;
|
||||
for (uint i = 0; i < LG_BACKDROP_WG; i++) {
|
||||
uint probe = el_ix + ((BACKDROP_WG / 2) >> i);
|
||||
if (row >= sh_row_count[probe - 1]) {
|
||||
el_ix = probe;
|
||||
}
|
||||
}
|
||||
uint width = sh_row_width[el_ix];
|
||||
if (width > 0 && mem_ok) {
|
||||
// Process one row sequentially
|
||||
// Read backdrop value per tile and prefix sum it
|
||||
Alloc tiles_alloc = sh_row_alloc[el_ix];
|
||||
uint seq_ix = row - (el_ix > 0 ? sh_row_count[el_ix - 1] : 0);
|
||||
uint tile_el_ix = (tiles_alloc.offset >> 2) + 1 + seq_ix * 2 * width;
|
||||
uint sum = read_mem(tiles_alloc, tile_el_ix);
|
||||
for (uint x = 1; x < width; x++) {
|
||||
tile_el_ix += 2;
|
||||
sum += read_mem(tiles_alloc, tile_el_ix);
|
||||
write_mem(tiles_alloc, tile_el_ix, sum);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,147 +0,0 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// The binning stage of the pipeline.
|
||||
//
|
||||
// Each workgroup processes N_TILE paths.
|
||||
// Each thread processes one path and calculates a N_TILE_X x N_TILE_Y coverage mask
|
||||
// based on the path bounding box to bin the paths.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#include "mem.h"
|
||||
#include "setup.h"
|
||||
|
||||
layout(local_size_x = N_TILE, local_size_y = 1) in;
|
||||
|
||||
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
||||
Config conf;
|
||||
};
|
||||
|
||||
#include "annotated.h"
|
||||
#include "bins.h"
|
||||
|
||||
// scale factors useful for converting coordinates to bins
|
||||
#define SX (1.0 / float(N_TILE_X * TILE_WIDTH_PX))
|
||||
#define SY (1.0 / float(N_TILE_Y * TILE_HEIGHT_PX))
|
||||
|
||||
// Constant not available in GLSL. Also consider uintBitsToFloat(0x7f800000)
|
||||
#define INFINITY (1.0 / 0.0)
|
||||
|
||||
// Note: cudaraster has N_TILE + 1 to cut down on bank conflicts.
|
||||
// Bitmaps are sliced (256bit into 8 (N_SLICE) 32bit submaps)
|
||||
shared uint bitmaps[N_SLICE][N_TILE];
|
||||
shared uint count[N_SLICE][N_TILE];
|
||||
shared Alloc sh_chunk_alloc[N_TILE];
|
||||
shared bool sh_alloc_failed;
|
||||
|
||||
void main() {
|
||||
uint my_n_elements = conf.n_elements;
|
||||
uint my_partition = gl_WorkGroupID.x;
|
||||
|
||||
for (uint i = 0; i < N_SLICE; i++) {
|
||||
bitmaps[i][gl_LocalInvocationID.x] = 0;
|
||||
}
|
||||
if (gl_LocalInvocationID.x == 0) {
|
||||
sh_alloc_failed = false;
|
||||
}
|
||||
barrier();
|
||||
|
||||
// Read inputs and determine coverage of bins
|
||||
uint element_ix = my_partition * N_TILE + gl_LocalInvocationID.x;
|
||||
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
|
||||
uint tag = Annotated_Nop;
|
||||
if (element_ix < my_n_elements) {
|
||||
tag = Annotated_tag(conf.anno_alloc, ref).tag;
|
||||
}
|
||||
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
|
||||
switch (tag) {
|
||||
case Annotated_Color:
|
||||
case Annotated_Image:
|
||||
case Annotated_BeginClip:
|
||||
case Annotated_EndClip:
|
||||
// Note: we take advantage of the fact that these drawing elements
|
||||
// have the bbox at the same place in their layout.
|
||||
AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref);
|
||||
x0 = int(floor(clip.bbox.x * SX));
|
||||
y0 = int(floor(clip.bbox.y * SY));
|
||||
x1 = int(ceil(clip.bbox.z * SX));
|
||||
y1 = int(ceil(clip.bbox.w * SY));
|
||||
break;
|
||||
}
|
||||
|
||||
// At this point, we run an iterator over the coverage area,
|
||||
// trying to keep divergence low.
|
||||
// Right now, it's just a bbox, but we'll get finer with
|
||||
// segments.
|
||||
uint width_in_bins = (conf.width_in_tiles + N_TILE_X - 1)/N_TILE_X;
|
||||
uint height_in_bins = (conf.height_in_tiles + N_TILE_Y - 1)/N_TILE_Y;
|
||||
x0 = clamp(x0, 0, int(width_in_bins));
|
||||
x1 = clamp(x1, x0, int(width_in_bins));
|
||||
y0 = clamp(y0, 0, int(height_in_bins));
|
||||
y1 = clamp(y1, y0, int(height_in_bins));
|
||||
if (x0 == x1) y1 = y0;
|
||||
int x = x0, y = y0;
|
||||
uint my_slice = gl_LocalInvocationID.x / 32;
|
||||
uint my_mask = 1 << (gl_LocalInvocationID.x & 31);
|
||||
while (y < y1) {
|
||||
atomicOr(bitmaps[my_slice][y * width_in_bins + x], my_mask);
|
||||
x++;
|
||||
if (x == x1) {
|
||||
x = x0;
|
||||
y++;
|
||||
}
|
||||
}
|
||||
|
||||
barrier();
|
||||
// Allocate output segments.
|
||||
uint element_count = 0;
|
||||
for (uint i = 0; i < N_SLICE; i++) {
|
||||
element_count += bitCount(bitmaps[i][gl_LocalInvocationID.x]);
|
||||
count[i][gl_LocalInvocationID.x] = element_count;
|
||||
}
|
||||
// element_count is number of elements covering bin for this invocation.
|
||||
Alloc chunk_alloc = new_alloc(0, 0, true);
|
||||
if (element_count != 0) {
|
||||
// TODO: aggregate atomic adds (subgroup is probably fastest)
|
||||
MallocResult chunk = malloc(element_count * BinInstance_size);
|
||||
chunk_alloc = chunk.alloc;
|
||||
sh_chunk_alloc[gl_LocalInvocationID.x] = chunk_alloc;
|
||||
if (chunk.failed) {
|
||||
sh_alloc_failed = true;
|
||||
}
|
||||
}
|
||||
// Note: it might be more efficient for reading to do this in the
|
||||
// other order (each bin is a contiguous sequence of partitions)
|
||||
uint out_ix = (conf.bin_alloc.offset >> 2) + (my_partition * N_TILE + gl_LocalInvocationID.x) * 2;
|
||||
write_mem(conf.bin_alloc, out_ix, element_count);
|
||||
write_mem(conf.bin_alloc, out_ix + 1, chunk_alloc.offset);
|
||||
|
||||
barrier();
|
||||
if (sh_alloc_failed || mem_error != NO_ERROR) {
|
||||
return;
|
||||
}
|
||||
|
||||
// Use similar strategy as Laine & Karras paper; loop over bbox of bins
|
||||
// touched by this element
|
||||
x = x0;
|
||||
y = y0;
|
||||
while (y < y1) {
|
||||
uint bin_ix = y * width_in_bins + x;
|
||||
uint out_mask = bitmaps[my_slice][bin_ix];
|
||||
if ((out_mask & my_mask) != 0) {
|
||||
uint idx = bitCount(out_mask & (my_mask - 1));
|
||||
if (my_slice > 0) {
|
||||
idx += count[my_slice - 1][bin_ix];
|
||||
}
|
||||
Alloc out_alloc = sh_chunk_alloc[bin_ix];
|
||||
uint out_offset = out_alloc.offset + idx * BinInstance_size;
|
||||
BinInstance_write(out_alloc, BinInstanceRef(out_offset), BinInstance(element_ix));
|
||||
}
|
||||
x++;
|
||||
if (x == x1) {
|
||||
x = x0;
|
||||
y++;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// Code auto-generated by piet-gpu-derive
|
||||
|
||||
struct BinInstanceRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct BinInstance {
|
||||
uint element_ix;
|
||||
};
|
||||
|
||||
#define BinInstance_size 4
|
||||
|
||||
BinInstanceRef BinInstance_index(BinInstanceRef ref, uint index) {
|
||||
return BinInstanceRef(ref.offset + index * BinInstance_size);
|
||||
}
|
||||
|
||||
BinInstance BinInstance_read(Alloc a, BinInstanceRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = read_mem(a, ix + 0);
|
||||
BinInstance s;
|
||||
s.element_ix = raw0;
|
||||
return s;
|
||||
}
|
||||
|
||||
void BinInstance_write(Alloc a, BinInstanceRef ref, BinInstance s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
write_mem(a, ix + 0, s.element_ix);
|
||||
}
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
#version 310 es
|
||||
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
precision mediump float;
|
||||
|
||||
layout(location=0) in vec2 vUV;
|
||||
|
||||
{{.Header}}
|
||||
|
||||
layout(location = 0) out vec4 fragColor;
|
||||
|
||||
void main() {
|
||||
fragColor = {{.FetchColorExpr}};
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
#version 310 es
|
||||
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
precision highp float;
|
||||
|
||||
#include "common.h"
|
||||
|
||||
layout(binding = 0) uniform Block {
|
||||
vec4 transform;
|
||||
vec4 uvTransformR1;
|
||||
vec4 uvTransformR2;
|
||||
float z;
|
||||
} _block;
|
||||
|
||||
layout(location = 0) in vec2 pos;
|
||||
|
||||
layout(location = 1) in vec2 uv;
|
||||
|
||||
layout(location = 0) out vec2 vUV;
|
||||
|
||||
void main() {
|
||||
vec2 p = pos*_block.transform.xy + _block.transform.zw;
|
||||
gl_Position = toClipSpace(vec4(p, _block.z, 1));
|
||||
vUV = transform3x2(m3x2(_block.uvTransformR1.xyz, _block.uvTransformR2.xyz), vec3(uv,1)).xy;
|
||||
}
|
||||
@@ -1,426 +0,0 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// The coarse rasterizer stage of the pipeline.
|
||||
//
|
||||
// As input we have the ordered partitions of paths from the binning phase and
|
||||
// the annotated tile list of segments and backdrop per path.
|
||||
//
|
||||
// Each workgroup operating on one bin by stream compacting
|
||||
// the elements corresponding to the bin.
|
||||
//
|
||||
// As output we have an ordered command stream per tile. Every tile from a path (backdrop + segment list) will be encoded.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#include "mem.h"
|
||||
#include "setup.h"
|
||||
|
||||
layout(local_size_x = N_TILE, local_size_y = 1) in;
|
||||
|
||||
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
||||
Config conf;
|
||||
};
|
||||
|
||||
#include "annotated.h"
|
||||
#include "bins.h"
|
||||
#include "tile.h"
|
||||
#include "ptcl.h"
|
||||
|
||||
#define LG_N_PART_READ (7 + LG_WG_FACTOR)
|
||||
#define N_PART_READ (1 << LG_N_PART_READ)
|
||||
|
||||
shared uint sh_elements[N_TILE];
|
||||
|
||||
// Number of elements in the partition; prefix sum.
|
||||
shared uint sh_part_count[N_PART_READ];
|
||||
shared Alloc sh_part_elements[N_PART_READ];
|
||||
|
||||
shared uint sh_bitmaps[N_SLICE][N_TILE];
|
||||
|
||||
shared uint sh_tile_count[N_TILE];
|
||||
// The width of the tile rect for the element, intersected with this bin
|
||||
shared uint sh_tile_width[N_TILE];
|
||||
shared uint sh_tile_x0[N_TILE];
|
||||
shared uint sh_tile_y0[N_TILE];
|
||||
|
||||
// These are set up so base + tile_y * stride + tile_x points to a Tile.
|
||||
shared uint sh_tile_base[N_TILE];
|
||||
shared uint sh_tile_stride[N_TILE];
|
||||
|
||||
#ifdef MEM_DEBUG
|
||||
// Store allocs only when MEM_DEBUG to save shared memory traffic.
|
||||
shared Alloc sh_tile_alloc[N_TILE];
|
||||
|
||||
void write_tile_alloc(uint el_ix, Alloc a) {
|
||||
sh_tile_alloc[el_ix] = a;
|
||||
}
|
||||
|
||||
Alloc read_tile_alloc(uint el_ix, bool mem_ok) {
|
||||
return sh_tile_alloc[el_ix];
|
||||
}
|
||||
#else
|
||||
void write_tile_alloc(uint el_ix, Alloc a) {
|
||||
// No-op
|
||||
}
|
||||
|
||||
Alloc read_tile_alloc(uint el_ix, bool mem_ok) {
|
||||
// All memory.
|
||||
return new_alloc(0, memory.length()*4, mem_ok);
|
||||
}
|
||||
#endif
|
||||
|
||||
// The maximum number of commands per annotated element.
|
||||
#define ANNO_COMMANDS 2
|
||||
|
||||
// Perhaps cmd_alloc should be a global? This is a style question.
|
||||
bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit) {
|
||||
if (cmd_ref.offset < cmd_limit) {
|
||||
return true;
|
||||
}
|
||||
MallocResult new_cmd = malloc(PTCL_INITIAL_ALLOC);
|
||||
if (new_cmd.failed) {
|
||||
return false;
|
||||
}
|
||||
CmdJump jump = CmdJump(new_cmd.alloc.offset);
|
||||
Cmd_Jump_write(cmd_alloc, cmd_ref, jump);
|
||||
cmd_alloc = new_cmd.alloc;
|
||||
cmd_ref = CmdRef(cmd_alloc.offset);
|
||||
// Reserve space for the maximum number of commands and a potential jump.
|
||||
cmd_limit = cmd_alloc.offset + PTCL_INITIAL_ALLOC - (ANNO_COMMANDS + 1) * Cmd_size;
|
||||
return true;
|
||||
}
|
||||
|
||||
void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float linewidth) {
|
||||
if (fill_mode_from_flags(flags) == MODE_NONZERO) {
|
||||
if (tile.tile.offset != 0) {
|
||||
CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop);
|
||||
Cmd_Fill_write(alloc, cmd_ref, cmd_fill);
|
||||
cmd_ref.offset += 4 + CmdFill_size;
|
||||
} else {
|
||||
Cmd_Solid_write(alloc, cmd_ref);
|
||||
cmd_ref.offset += 4;
|
||||
}
|
||||
} else {
|
||||
CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * linewidth);
|
||||
Cmd_Stroke_write(alloc, cmd_ref, cmd_stroke);
|
||||
cmd_ref.offset += 4 + CmdStroke_size;
|
||||
}
|
||||
}
|
||||
|
||||
void main() {
|
||||
// Could use either linear or 2d layouts for both dispatch and
|
||||
// invocations within the workgroup. We'll use variables to abstract.
|
||||
uint width_in_bins = (conf.width_in_tiles + N_TILE_X - 1)/N_TILE_X;
|
||||
uint bin_ix = width_in_bins * gl_WorkGroupID.y + gl_WorkGroupID.x;
|
||||
uint partition_ix = 0;
|
||||
uint n_partitions = (conf.n_elements + N_TILE - 1) / N_TILE;
|
||||
uint th_ix = gl_LocalInvocationID.x;
|
||||
|
||||
// Coordinates of top left of bin, in tiles.
|
||||
uint bin_tile_x = N_TILE_X * gl_WorkGroupID.x;
|
||||
uint bin_tile_y = N_TILE_Y * gl_WorkGroupID.y;
|
||||
|
||||
// Per-tile state
|
||||
uint tile_x = gl_LocalInvocationID.x % N_TILE_X;
|
||||
uint tile_y = gl_LocalInvocationID.x / N_TILE_X;
|
||||
uint this_tile_ix = (bin_tile_y + tile_y) * conf.width_in_tiles + bin_tile_x + tile_x;
|
||||
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, this_tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
|
||||
CmdRef cmd_ref = CmdRef(cmd_alloc.offset);
|
||||
// Reserve space for the maximum number of commands and a potential jump.
|
||||
uint cmd_limit = cmd_ref.offset + PTCL_INITIAL_ALLOC - (ANNO_COMMANDS + 1) * Cmd_size;
|
||||
// The nesting depth of the clip stack
|
||||
uint clip_depth = 0;
|
||||
// State for the "clip zero" optimization. If it's nonzero, then we are
|
||||
// currently in a clip for which the entire tile has an alpha of zero, and
|
||||
// the value is the depth after the "begin clip" of that element.
|
||||
uint clip_zero_depth = 0;
|
||||
// State for the "clip one" optimization. If bit `i` is set, then that means
|
||||
// that the clip pushed at depth `i` has an alpha of all one.
|
||||
uint clip_one_mask = 0;
|
||||
|
||||
// I'm sure we can figure out how to do this with at least one fewer register...
|
||||
// Items up to rd_ix have been read from sh_elements
|
||||
uint rd_ix = 0;
|
||||
// Items up to wr_ix have been written into sh_elements
|
||||
uint wr_ix = 0;
|
||||
// Items between part_start_ix and ready_ix are ready to be transferred from sh_part_elements
|
||||
uint part_start_ix = 0;
|
||||
uint ready_ix = 0;
|
||||
|
||||
// Leave room for the fine rasterizer scratch allocation.
|
||||
Alloc scratch_alloc = slice_mem(cmd_alloc, 0, Alloc_size);
|
||||
cmd_ref.offset += Alloc_size;
|
||||
|
||||
uint num_begin_slots = 0;
|
||||
uint begin_slot = 0;
|
||||
bool mem_ok = mem_error == NO_ERROR;
|
||||
while (true) {
|
||||
for (uint i = 0; i < N_SLICE; i++) {
|
||||
sh_bitmaps[i][th_ix] = 0;
|
||||
}
|
||||
|
||||
// parallel read of input partitions
|
||||
do {
|
||||
if (ready_ix == wr_ix && partition_ix < n_partitions) {
|
||||
part_start_ix = ready_ix;
|
||||
uint count = 0;
|
||||
if (th_ix < N_PART_READ && partition_ix + th_ix < n_partitions) {
|
||||
uint in_ix = (conf.bin_alloc.offset >> 2) + ((partition_ix + th_ix) * N_TILE + bin_ix) * 2;
|
||||
count = read_mem(conf.bin_alloc, in_ix);
|
||||
uint offset = read_mem(conf.bin_alloc, in_ix + 1);
|
||||
sh_part_elements[th_ix] = new_alloc(offset, count*BinInstance_size, mem_ok);
|
||||
}
|
||||
// prefix sum of counts
|
||||
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
||||
if (th_ix < N_PART_READ) {
|
||||
sh_part_count[th_ix] = count;
|
||||
}
|
||||
barrier();
|
||||
if (th_ix < N_PART_READ) {
|
||||
if (th_ix >= (1 << i)) {
|
||||
count += sh_part_count[th_ix - (1 << i)];
|
||||
}
|
||||
}
|
||||
barrier();
|
||||
}
|
||||
if (th_ix < N_PART_READ) {
|
||||
sh_part_count[th_ix] = part_start_ix + count;
|
||||
}
|
||||
barrier();
|
||||
ready_ix = sh_part_count[N_PART_READ - 1];
|
||||
partition_ix += N_PART_READ;
|
||||
}
|
||||
// use binary search to find element to read
|
||||
uint ix = rd_ix + th_ix;
|
||||
if (ix >= wr_ix && ix < ready_ix && mem_ok) {
|
||||
uint part_ix = 0;
|
||||
for (uint i = 0; i < LG_N_PART_READ; i++) {
|
||||
uint probe = part_ix + ((N_PART_READ / 2) >> i);
|
||||
if (ix >= sh_part_count[probe - 1]) {
|
||||
part_ix = probe;
|
||||
}
|
||||
}
|
||||
ix -= part_ix > 0 ? sh_part_count[part_ix - 1] : part_start_ix;
|
||||
Alloc bin_alloc = sh_part_elements[part_ix];
|
||||
BinInstanceRef inst_ref = BinInstanceRef(bin_alloc.offset);
|
||||
BinInstance inst = BinInstance_read(bin_alloc, BinInstance_index(inst_ref, ix));
|
||||
sh_elements[th_ix] = inst.element_ix;
|
||||
}
|
||||
barrier();
|
||||
|
||||
wr_ix = min(rd_ix + N_TILE, ready_ix);
|
||||
} while (wr_ix - rd_ix < N_TILE && (wr_ix < ready_ix || partition_ix < n_partitions));
|
||||
|
||||
// We've done the merge and filled the buffer.
|
||||
|
||||
// Read one element, compute coverage.
|
||||
uint tag = Annotated_Nop;
|
||||
uint element_ix;
|
||||
AnnotatedRef ref;
|
||||
if (th_ix + rd_ix < wr_ix) {
|
||||
element_ix = sh_elements[th_ix];
|
||||
ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
|
||||
tag = Annotated_tag(conf.anno_alloc, ref).tag;
|
||||
}
|
||||
|
||||
// Bounding box of element in pixel coordinates.
|
||||
uint tile_count;
|
||||
switch (tag) {
|
||||
case Annotated_Color:
|
||||
case Annotated_Image:
|
||||
case Annotated_BeginClip:
|
||||
case Annotated_EndClip:
|
||||
// We have one "path" for each element, even if the element isn't
|
||||
// actually a path (currently EndClip, but images etc in the future).
|
||||
uint path_ix = element_ix;
|
||||
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
|
||||
uint stride = path.bbox.z - path.bbox.x;
|
||||
sh_tile_stride[th_ix] = stride;
|
||||
int dx = int(path.bbox.x) - int(bin_tile_x);
|
||||
int dy = int(path.bbox.y) - int(bin_tile_y);
|
||||
int x0 = clamp(dx, 0, N_TILE_X);
|
||||
int y0 = clamp(dy, 0, N_TILE_Y);
|
||||
int x1 = clamp(int(path.bbox.z) - int(bin_tile_x), 0, N_TILE_X);
|
||||
int y1 = clamp(int(path.bbox.w) - int(bin_tile_y), 0, N_TILE_Y);
|
||||
sh_tile_width[th_ix] = uint(x1 - x0);
|
||||
sh_tile_x0[th_ix] = x0;
|
||||
sh_tile_y0[th_ix] = y0;
|
||||
tile_count = uint(x1 - x0) * uint(y1 - y0);
|
||||
// base relative to bin
|
||||
uint base = path.tiles.offset - uint(dy * stride + dx) * Tile_size;
|
||||
sh_tile_base[th_ix] = base;
|
||||
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
|
||||
write_tile_alloc(th_ix, path_alloc);
|
||||
break;
|
||||
default:
|
||||
tile_count = 0;
|
||||
break;
|
||||
}
|
||||
|
||||
// Prefix sum of sh_tile_count
|
||||
sh_tile_count[th_ix] = tile_count;
|
||||
for (uint i = 0; i < LG_N_TILE; i++) {
|
||||
barrier();
|
||||
if (th_ix >= (1 << i)) {
|
||||
tile_count += sh_tile_count[th_ix - (1 << i)];
|
||||
}
|
||||
barrier();
|
||||
sh_tile_count[th_ix] = tile_count;
|
||||
}
|
||||
barrier();
|
||||
uint total_tile_count = sh_tile_count[N_TILE - 1];
|
||||
for (uint ix = th_ix; ix < total_tile_count; ix += N_TILE) {
|
||||
// Binary search to find element
|
||||
uint el_ix = 0;
|
||||
for (uint i = 0; i < LG_N_TILE; i++) {
|
||||
uint probe = el_ix + ((N_TILE / 2) >> i);
|
||||
if (ix >= sh_tile_count[probe - 1]) {
|
||||
el_ix = probe;
|
||||
}
|
||||
}
|
||||
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + sh_elements[el_ix] * Annotated_size);
|
||||
uint tag = Annotated_tag(conf.anno_alloc, ref).tag;
|
||||
uint seq_ix = ix - (el_ix > 0 ? sh_tile_count[el_ix - 1] : 0);
|
||||
uint width = sh_tile_width[el_ix];
|
||||
uint x = sh_tile_x0[el_ix] + seq_ix % width;
|
||||
uint y = sh_tile_y0[el_ix] + seq_ix / width;
|
||||
bool include_tile = false;
|
||||
if (tag == Annotated_BeginClip || tag == Annotated_EndClip) {
|
||||
include_tile = true;
|
||||
} else if (mem_ok) {
|
||||
Tile tile = Tile_read(read_tile_alloc(el_ix, mem_ok), TileRef(sh_tile_base[el_ix] + (sh_tile_stride[el_ix] * y + x) * Tile_size));
|
||||
// Include the path in the tile if
|
||||
// - the tile contains at least a segment (tile offset non-zero)
|
||||
// - the tile is completely covered (backdrop non-zero)
|
||||
include_tile = tile.tile.offset != 0 || tile.backdrop != 0;
|
||||
}
|
||||
if (include_tile) {
|
||||
uint el_slice = el_ix / 32;
|
||||
uint el_mask = 1 << (el_ix & 31);
|
||||
atomicOr(sh_bitmaps[el_slice][y * N_TILE_X + x], el_mask);
|
||||
}
|
||||
}
|
||||
|
||||
barrier();
|
||||
|
||||
// Output non-segment elements for this tile. The thread does a sequential walk
|
||||
// through the non-segment elements.
|
||||
uint slice_ix = 0;
|
||||
uint bitmap = sh_bitmaps[0][th_ix];
|
||||
while (mem_ok) {
|
||||
if (bitmap == 0) {
|
||||
slice_ix++;
|
||||
if (slice_ix == N_SLICE) {
|
||||
break;
|
||||
}
|
||||
bitmap = sh_bitmaps[slice_ix][th_ix];
|
||||
if (bitmap == 0) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
uint element_ref_ix = slice_ix * 32 + findLSB(bitmap);
|
||||
uint element_ix = sh_elements[element_ref_ix];
|
||||
|
||||
// Clear LSB
|
||||
bitmap &= bitmap - 1;
|
||||
|
||||
// At this point, we read the element again from global memory.
|
||||
// If that turns out to be expensive, maybe we can pack it into
|
||||
// shared memory (or perhaps just the tag).
|
||||
ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
|
||||
AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref);
|
||||
|
||||
if (clip_zero_depth == 0) {
|
||||
switch (tag.tag) {
|
||||
case Annotated_Color:
|
||||
Tile tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
|
||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||
AnnoColor fill = Annotated_Color_read(conf.anno_alloc, ref);
|
||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||
break;
|
||||
}
|
||||
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill.linewidth);
|
||||
Cmd_Color_write(cmd_alloc, cmd_ref, CmdColor(fill.rgba_color));
|
||||
cmd_ref.offset += 4 + CmdColor_size;
|
||||
break;
|
||||
case Annotated_Image:
|
||||
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
|
||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||
AnnoImage fill_img = Annotated_Image_read(conf.anno_alloc, ref);
|
||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||
break;
|
||||
}
|
||||
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill_img.linewidth);
|
||||
Cmd_Image_write(cmd_alloc, cmd_ref, CmdImage(fill_img.index, fill_img.offset));
|
||||
cmd_ref.offset += 4 + CmdImage_size;
|
||||
break;
|
||||
case Annotated_BeginClip:
|
||||
tile = Tile_read(read_tile_alloc(element_ref_ix, mem_ok), TileRef(sh_tile_base[element_ref_ix]
|
||||
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
|
||||
if (tile.tile.offset == 0 && tile.backdrop == 0) {
|
||||
clip_zero_depth = clip_depth + 1;
|
||||
} else if (tile.tile.offset == 0 && clip_depth < 32) {
|
||||
clip_one_mask |= (1 << clip_depth);
|
||||
} else {
|
||||
AnnoBeginClip begin_clip = Annotated_BeginClip_read(conf.anno_alloc, ref);
|
||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||
break;
|
||||
}
|
||||
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, begin_clip.linewidth);
|
||||
Cmd_BeginClip_write(cmd_alloc, cmd_ref);
|
||||
cmd_ref.offset += 4;
|
||||
if (clip_depth < 32) {
|
||||
clip_one_mask &= ~(1 << clip_depth);
|
||||
}
|
||||
begin_slot++;
|
||||
num_begin_slots = max(num_begin_slots, begin_slot);
|
||||
}
|
||||
clip_depth++;
|
||||
break;
|
||||
case Annotated_EndClip:
|
||||
clip_depth--;
|
||||
if (clip_depth >= 32 || (clip_one_mask & (1 << clip_depth)) == 0) {
|
||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||
break;
|
||||
}
|
||||
Cmd_Solid_write(cmd_alloc, cmd_ref);
|
||||
cmd_ref.offset += 4;
|
||||
begin_slot--;
|
||||
Cmd_EndClip_write(cmd_alloc, cmd_ref);
|
||||
cmd_ref.offset += 4;
|
||||
}
|
||||
break;
|
||||
}
|
||||
} else {
|
||||
// In "clip zero" state, suppress all drawing
|
||||
switch (tag.tag) {
|
||||
case Annotated_BeginClip:
|
||||
clip_depth++;
|
||||
break;
|
||||
case Annotated_EndClip:
|
||||
if (clip_depth == clip_zero_depth) {
|
||||
clip_zero_depth = 0;
|
||||
}
|
||||
clip_depth--;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
barrier();
|
||||
|
||||
rd_ix += N_TILE;
|
||||
if (rd_ix >= ready_ix && partition_ix >= n_partitions) break;
|
||||
}
|
||||
if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) {
|
||||
Cmd_End_write(cmd_alloc, cmd_ref);
|
||||
if (num_begin_slots > 0) {
|
||||
// Write scratch allocation: one state per BeginClip per rasterizer chunk.
|
||||
uint scratch_size = num_begin_slots * TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE * 4;
|
||||
MallocResult scratch = malloc(scratch_size);
|
||||
// Ignore scratch.failed; we don't use the allocation and kernel4
|
||||
// checks for memory overflow before using it.
|
||||
alloc_write(scratch_alloc, scratch_alloc.offset, scratch.alloc);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,51 +0,0 @@
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
struct m3x2 {
|
||||
vec3 r0;
|
||||
vec3 r1;
|
||||
};
|
||||
|
||||
// fboTextureTransform is the transformation
|
||||
// that cancels the implied transformation between
|
||||
// the framebuffer and its texture.
|
||||
// Only two rows are returned. The last is implied
|
||||
// to be [0, 0, 1].
|
||||
const m3x2 fboTextureTransform = m3x2(
|
||||
#ifdef HLSL
|
||||
vec3(1.0, 0.0, 0.0),
|
||||
vec3(0.0, -1.0, 1.0)
|
||||
#else
|
||||
vec3(1.0, 0.0, 0.0),
|
||||
vec3(0.0, 1.0, 0.0)
|
||||
#endif
|
||||
);
|
||||
|
||||
// fboTransform is the transformation
|
||||
// that cancels the implied transformation between
|
||||
// the clip space and the framebuffer.
|
||||
// Only two rows are returned. The last is implied
|
||||
// to be [0, 0, 1].
|
||||
const m3x2 fboTransform = m3x2(
|
||||
#ifdef HLSL
|
||||
vec3(1.0, 0.0, 0.0),
|
||||
vec3(0.0, 1.0, 0.0)
|
||||
#else
|
||||
vec3(1.0, 0.0, 0.0),
|
||||
vec3(0.0, -1.0, 0.0)
|
||||
#endif
|
||||
);
|
||||
|
||||
// toClipSpace converts an OpenGL gl_Position value to a
|
||||
// native GPU position.
|
||||
vec4 toClipSpace(vec4 pos) {
|
||||
#ifdef HLSL
|
||||
// Map depths to the Direct3D [0; 1] range.
|
||||
return vec4(pos.xy, (pos.z + pos.w)*.5, pos.w);
|
||||
#else
|
||||
return pos;
|
||||
#endif
|
||||
}
|
||||
|
||||
vec3 transform3x2(m3x2 t, vec3 v) {
|
||||
return vec3(dot(t.r0, v), dot(t.r1, v), dot(vec3(0.0, 0.0, 1.0), v));
|
||||
}
|
||||
@@ -1,24 +0,0 @@
|
||||
#version 310 es
|
||||
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
precision mediump float;
|
||||
|
||||
layout(binding = 0) uniform sampler2D tex;
|
||||
|
||||
layout(location = 0) in vec2 vUV;
|
||||
|
||||
layout(location = 0) out vec4 fragColor;
|
||||
|
||||
vec3 sRGBtoRGB(vec3 rgb) {
|
||||
bvec3 cutoff = greaterThanEqual(rgb, vec3(0.04045));
|
||||
vec3 below = rgb/vec3(12.92);
|
||||
vec3 above = pow((rgb + vec3(0.055))/vec3(1.055), vec3(2.4));
|
||||
return mix(below, above, cutoff);
|
||||
}
|
||||
|
||||
void main() {
|
||||
vec4 texel = texture(tex, vUV);
|
||||
texel.rgb = sRGBtoRGB(texel.rgb);
|
||||
fragColor = texel;
|
||||
}
|
||||
@@ -1,21 +0,0 @@
|
||||
#version 310 es
|
||||
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
precision highp float;
|
||||
|
||||
layout(binding = 0) uniform Block {
|
||||
vec2 scale;
|
||||
vec2 pos;
|
||||
vec2 uvScale;
|
||||
} _block;
|
||||
|
||||
layout(location = 0) in vec2 pos;
|
||||
layout(location = 1) in vec2 uv;
|
||||
|
||||
layout(location = 0) out vec2 vUV;
|
||||
|
||||
void main() {
|
||||
vUV = uv*_block.uvScale;
|
||||
gl_Position = vec4(pos*_block.scale + _block.pos, 0, 1);
|
||||
}
|
||||
@@ -1,22 +0,0 @@
|
||||
#version 310 es
|
||||
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
precision mediump float;
|
||||
|
||||
{{.Header}}
|
||||
|
||||
// Use high precision to be pixel accurate for
|
||||
// large cover atlases.
|
||||
layout(location = 0) in highp vec2 vCoverUV;
|
||||
layout(location = 1) in vec2 vUV;
|
||||
|
||||
layout(binding = 1) uniform sampler2D cover;
|
||||
|
||||
layout(location = 0) out vec4 fragColor;
|
||||
|
||||
void main() {
|
||||
fragColor = {{.FetchColorExpr}};
|
||||
float cover = min(abs(texture(cover, vCoverUV).r), 1.0);
|
||||
fragColor *= cover;
|
||||
}
|
||||
@@ -1,31 +0,0 @@
|
||||
#version 310 es
|
||||
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
precision highp float;
|
||||
|
||||
#include "common.h"
|
||||
|
||||
layout(binding = 0) uniform Block {
|
||||
vec4 transform;
|
||||
vec4 uvCoverTransform;
|
||||
vec4 uvTransformR1;
|
||||
vec4 uvTransformR2;
|
||||
float z;
|
||||
} _block;
|
||||
|
||||
layout(location = 0) in vec2 pos;
|
||||
|
||||
layout(location = 0) out vec2 vCoverUV;
|
||||
|
||||
layout(location = 1) in vec2 uv;
|
||||
layout(location = 1) out vec2 vUV;
|
||||
|
||||
void main() {
|
||||
gl_Position = toClipSpace(vec4(pos*_block.transform.xy + _block.transform.zw, _block.z, 1));
|
||||
vUV = transform3x2(m3x2(_block.uvTransformR1.xyz, _block.uvTransformR2.xyz), vec3(uv,1)).xy;
|
||||
vec3 uv3 = transform3x2(fboTextureTransform, vec3(uv, 1.0));
|
||||
vCoverUV = (uv3*vec3(_block.uvCoverTransform.xy, 1.0)+vec3(_block.uvCoverTransform.zw, 0.0)).xy;
|
||||
}
|
||||
@@ -1,410 +0,0 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// The element processing stage, first in the pipeline.
|
||||
//
|
||||
// This stage is primarily about applying transforms and computing bounding
|
||||
// boxes. It is organized as a scan over the input elements, producing
|
||||
// annotated output elements.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#include "mem.h"
|
||||
#include "setup.h"
|
||||
|
||||
#define N_ROWS 4
|
||||
#define WG_SIZE 32
|
||||
#define LG_WG_SIZE 5
|
||||
#define PARTITION_SIZE (WG_SIZE * N_ROWS)
|
||||
|
||||
layout(local_size_x = WG_SIZE, local_size_y = 1) in;
|
||||
|
||||
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
||||
Config conf;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 2) readonly buffer SceneBuf {
|
||||
uint[] scene;
|
||||
};
|
||||
|
||||
// It would be better to use the Vulkan memory model than
|
||||
// "volatile" but shooting for compatibility here rather
|
||||
// than doing things right.
|
||||
layout(set = 0, binding = 3) volatile buffer StateBuf {
|
||||
uint part_counter;
|
||||
uint[] state;
|
||||
};
|
||||
|
||||
#include "scene.h"
|
||||
#include "state.h"
|
||||
#include "annotated.h"
|
||||
#include "pathseg.h"
|
||||
#include "tile.h"
|
||||
|
||||
#define StateBuf_stride (4 + 2 * State_size)
|
||||
|
||||
StateRef state_aggregate_ref(uint partition_ix) {
|
||||
return StateRef(4 + partition_ix * StateBuf_stride);
|
||||
}
|
||||
|
||||
StateRef state_prefix_ref(uint partition_ix) {
|
||||
return StateRef(4 + partition_ix * StateBuf_stride + State_size);
|
||||
}
|
||||
|
||||
uint state_flag_index(uint partition_ix) {
|
||||
return partition_ix * (StateBuf_stride / 4);
|
||||
}
|
||||
|
||||
// These correspond to X, A, P respectively in the prefix sum paper.
|
||||
#define FLAG_NOT_READY 0
|
||||
#define FLAG_AGGREGATE_READY 1
|
||||
#define FLAG_PREFIX_READY 2
|
||||
|
||||
#define FLAG_SET_LINEWIDTH 1
|
||||
#define FLAG_SET_BBOX 2
|
||||
#define FLAG_RESET_BBOX 4
|
||||
#define FLAG_SET_FILL_MODE 8
|
||||
// Fill modes take up the next bit. Non-zero fill is 0, stroke is 1.
|
||||
#define LG_FILL_MODE 4
|
||||
#define FILL_MODE_BITS 1
|
||||
#define FILL_MODE_MASK (FILL_MODE_BITS << LG_FILL_MODE)
|
||||
|
||||
// This is almost like a monoid (the interaction between transformation and
|
||||
// bounding boxes is approximate)
|
||||
State combine_state(State a, State b) {
|
||||
State c;
|
||||
c.bbox.x = min(a.mat.x * b.bbox.x, a.mat.x * b.bbox.z) + min(a.mat.z * b.bbox.y, a.mat.z * b.bbox.w) + a.translate.x;
|
||||
c.bbox.y = min(a.mat.y * b.bbox.x, a.mat.y * b.bbox.z) + min(a.mat.w * b.bbox.y, a.mat.w * b.bbox.w) + a.translate.y;
|
||||
c.bbox.z = max(a.mat.x * b.bbox.x, a.mat.x * b.bbox.z) + max(a.mat.z * b.bbox.y, a.mat.z * b.bbox.w) + a.translate.x;
|
||||
c.bbox.w = max(a.mat.y * b.bbox.x, a.mat.y * b.bbox.z) + max(a.mat.w * b.bbox.y, a.mat.w * b.bbox.w) + a.translate.y;
|
||||
if ((a.flags & FLAG_RESET_BBOX) == 0 && b.bbox.z <= b.bbox.x && b.bbox.w <= b.bbox.y) {
|
||||
c.bbox = a.bbox;
|
||||
} else if ((a.flags & FLAG_RESET_BBOX) == 0 && (b.flags & FLAG_SET_BBOX) == 0 &&
|
||||
(a.bbox.z > a.bbox.x || a.bbox.w > a.bbox.y))
|
||||
{
|
||||
c.bbox.xy = min(a.bbox.xy, c.bbox.xy);
|
||||
c.bbox.zw = max(a.bbox.zw, c.bbox.zw);
|
||||
}
|
||||
// It would be more concise to cast to matrix types; ah well.
|
||||
c.mat.x = a.mat.x * b.mat.x + a.mat.z * b.mat.y;
|
||||
c.mat.y = a.mat.y * b.mat.x + a.mat.w * b.mat.y;
|
||||
c.mat.z = a.mat.x * b.mat.z + a.mat.z * b.mat.w;
|
||||
c.mat.w = a.mat.y * b.mat.z + a.mat.w * b.mat.w;
|
||||
c.translate.x = a.mat.x * b.translate.x + a.mat.z * b.translate.y + a.translate.x;
|
||||
c.translate.y = a.mat.y * b.translate.x + a.mat.w * b.translate.y + a.translate.y;
|
||||
c.linewidth = (b.flags & FLAG_SET_LINEWIDTH) == 0 ? a.linewidth : b.linewidth;
|
||||
c.flags = (a.flags & (FLAG_SET_LINEWIDTH | FLAG_SET_BBOX | FLAG_SET_FILL_MODE)) | b.flags;
|
||||
c.flags |= (a.flags & FLAG_RESET_BBOX) >> 1;
|
||||
uint fill_mode = (b.flags & FLAG_SET_FILL_MODE) == 0 ? a.flags : b.flags;
|
||||
fill_mode &= FILL_MODE_MASK;
|
||||
c.flags = (c.flags & ~FILL_MODE_MASK) | fill_mode;
|
||||
c.path_count = a.path_count + b.path_count;
|
||||
c.pathseg_count = a.pathseg_count + b.pathseg_count;
|
||||
c.trans_count = a.trans_count + b.trans_count;
|
||||
return c;
|
||||
}
|
||||
|
||||
State map_element(ElementRef ref) {
|
||||
// TODO: it would *probably* be more efficient to make the memory read patterns less
|
||||
// divergent, though it would be more wasted memory.
|
||||
uint tag = Element_tag(ref).tag;
|
||||
State c;
|
||||
c.bbox = vec4(0.0, 0.0, 0.0, 0.0);
|
||||
c.mat = vec4(1.0, 0.0, 0.0, 1.0);
|
||||
c.translate = vec2(0.0, 0.0);
|
||||
c.linewidth = 1.0; // TODO should be 0.0
|
||||
c.flags = 0;
|
||||
c.path_count = 0;
|
||||
c.pathseg_count = 0;
|
||||
c.trans_count = 0;
|
||||
switch (tag) {
|
||||
case Element_Line:
|
||||
LineSeg line = Element_Line_read(ref);
|
||||
c.bbox.xy = min(line.p0, line.p1);
|
||||
c.bbox.zw = max(line.p0, line.p1);
|
||||
c.pathseg_count = 1;
|
||||
break;
|
||||
case Element_Quad:
|
||||
QuadSeg quad = Element_Quad_read(ref);
|
||||
c.bbox.xy = min(min(quad.p0, quad.p1), quad.p2);
|
||||
c.bbox.zw = max(max(quad.p0, quad.p1), quad.p2);
|
||||
c.pathseg_count = 1;
|
||||
break;
|
||||
case Element_Cubic:
|
||||
CubicSeg cubic = Element_Cubic_read(ref);
|
||||
c.bbox.xy = min(min(cubic.p0, cubic.p1), min(cubic.p2, cubic.p3));
|
||||
c.bbox.zw = max(max(cubic.p0, cubic.p1), max(cubic.p2, cubic.p3));
|
||||
c.pathseg_count = 1;
|
||||
break;
|
||||
case Element_FillColor:
|
||||
case Element_FillImage:
|
||||
case Element_BeginClip:
|
||||
c.flags = FLAG_RESET_BBOX;
|
||||
c.path_count = 1;
|
||||
break;
|
||||
case Element_EndClip:
|
||||
c.path_count = 1;
|
||||
break;
|
||||
case Element_SetLineWidth:
|
||||
SetLineWidth lw = Element_SetLineWidth_read(ref);
|
||||
c.linewidth = lw.width;
|
||||
c.flags = FLAG_SET_LINEWIDTH;
|
||||
break;
|
||||
case Element_Transform:
|
||||
Transform t = Element_Transform_read(ref);
|
||||
c.mat = t.mat;
|
||||
c.translate = t.translate;
|
||||
c.trans_count = 1;
|
||||
break;
|
||||
case Element_SetFillMode:
|
||||
SetFillMode fm = Element_SetFillMode_read(ref);
|
||||
c.flags = FLAG_SET_FILL_MODE | (fm.fill_mode << LG_FILL_MODE);
|
||||
break;
|
||||
}
|
||||
return c;
|
||||
}
|
||||
|
||||
// Get the bounding box of a circle transformed by the matrix into an ellipse.
|
||||
vec2 get_linewidth(State st) {
|
||||
// See https://www.iquilezles.org/www/articles/ellipses/ellipses.htm
|
||||
return 0.5 * st.linewidth * vec2(length(st.mat.xz), length(st.mat.yw));
|
||||
}
|
||||
|
||||
shared State sh_state[WG_SIZE];
|
||||
|
||||
shared uint sh_part_ix;
|
||||
shared State sh_prefix;
|
||||
|
||||
void main() {
|
||||
State th_state[N_ROWS];
|
||||
// Determine partition to process by atomic counter (described in Section
|
||||
// 4.4 of prefix sum paper).
|
||||
if (gl_LocalInvocationID.x == 0) {
|
||||
sh_part_ix = atomicAdd(part_counter, 1);
|
||||
}
|
||||
barrier();
|
||||
uint part_ix = sh_part_ix;
|
||||
|
||||
uint ix = part_ix * PARTITION_SIZE + gl_LocalInvocationID.x * N_ROWS;
|
||||
ElementRef ref = ElementRef(ix * Element_size);
|
||||
|
||||
th_state[0] = map_element(ref);
|
||||
for (uint i = 1; i < N_ROWS; i++) {
|
||||
// discussion question: would it be faster to load using more coherent patterns
|
||||
// into thread memory? This is kinda strided.
|
||||
th_state[i] = combine_state(th_state[i - 1], map_element(Element_index(ref, i)));
|
||||
}
|
||||
State agg = th_state[N_ROWS - 1];
|
||||
sh_state[gl_LocalInvocationID.x] = agg;
|
||||
for (uint i = 0; i < LG_WG_SIZE; i++) {
|
||||
barrier();
|
||||
if (gl_LocalInvocationID.x >= (1 << i)) {
|
||||
State other = sh_state[gl_LocalInvocationID.x - (1 << i)];
|
||||
agg = combine_state(other, agg);
|
||||
}
|
||||
barrier();
|
||||
sh_state[gl_LocalInvocationID.x] = agg;
|
||||
}
|
||||
|
||||
State exclusive;
|
||||
exclusive.bbox = vec4(0.0, 0.0, 0.0, 0.0);
|
||||
exclusive.mat = vec4(1.0, 0.0, 0.0, 1.0);
|
||||
exclusive.translate = vec2(0.0, 0.0);
|
||||
exclusive.linewidth = 1.0; //TODO should be 0.0
|
||||
exclusive.flags = 0;
|
||||
exclusive.path_count = 0;
|
||||
exclusive.pathseg_count = 0;
|
||||
exclusive.trans_count = 0;
|
||||
|
||||
// Publish aggregate for this partition
|
||||
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
|
||||
// Note: with memory model, we'd want to generate the atomic store version of this.
|
||||
State_write(state_aggregate_ref(part_ix), agg);
|
||||
uint flag = FLAG_AGGREGATE_READY;
|
||||
memoryBarrierBuffer();
|
||||
if (part_ix == 0) {
|
||||
State_write(state_prefix_ref(part_ix), agg);
|
||||
flag = FLAG_PREFIX_READY;
|
||||
}
|
||||
state[state_flag_index(part_ix)] = flag;
|
||||
if (part_ix != 0) {
|
||||
// step 4 of paper: decoupled lookback
|
||||
uint look_back_ix = part_ix - 1;
|
||||
|
||||
State their_agg;
|
||||
uint their_ix = 0;
|
||||
while (true) {
|
||||
flag = state[state_flag_index(look_back_ix)];
|
||||
if (flag == FLAG_PREFIX_READY) {
|
||||
State their_prefix = State_read(state_prefix_ref(look_back_ix));
|
||||
exclusive = combine_state(their_prefix, exclusive);
|
||||
break;
|
||||
} else if (flag == FLAG_AGGREGATE_READY) {
|
||||
their_agg = State_read(state_aggregate_ref(look_back_ix));
|
||||
exclusive = combine_state(their_agg, exclusive);
|
||||
look_back_ix--;
|
||||
their_ix = 0;
|
||||
continue;
|
||||
}
|
||||
// else spin
|
||||
|
||||
// Unfortunately there's no guarantee of forward progress of other
|
||||
// workgroups, so compute a bit of the aggregate before trying again.
|
||||
// In the worst case, spinning stops when the aggregate is complete.
|
||||
ElementRef ref = ElementRef((look_back_ix * PARTITION_SIZE + their_ix) * Element_size);
|
||||
State s = map_element(ref);
|
||||
if (their_ix == 0) {
|
||||
their_agg = s;
|
||||
} else {
|
||||
their_agg = combine_state(their_agg, s);
|
||||
}
|
||||
their_ix++;
|
||||
if (their_ix == PARTITION_SIZE) {
|
||||
exclusive = combine_state(their_agg, exclusive);
|
||||
if (look_back_ix == 0) {
|
||||
break;
|
||||
}
|
||||
look_back_ix--;
|
||||
their_ix = 0;
|
||||
}
|
||||
}
|
||||
|
||||
// step 5 of paper: compute inclusive prefix
|
||||
State inclusive_prefix = combine_state(exclusive, agg);
|
||||
sh_prefix = exclusive;
|
||||
State_write(state_prefix_ref(part_ix), inclusive_prefix);
|
||||
memoryBarrierBuffer();
|
||||
flag = FLAG_PREFIX_READY;
|
||||
state[state_flag_index(part_ix)] = flag;
|
||||
}
|
||||
}
|
||||
barrier();
|
||||
if (part_ix != 0) {
|
||||
exclusive = sh_prefix;
|
||||
}
|
||||
|
||||
State row = exclusive;
|
||||
if (gl_LocalInvocationID.x > 0) {
|
||||
State other = sh_state[gl_LocalInvocationID.x - 1];
|
||||
row = combine_state(row, other);
|
||||
}
|
||||
for (uint i = 0; i < N_ROWS; i++) {
|
||||
State st = combine_state(row, th_state[i]);
|
||||
|
||||
// Here we read again from the original scene. There may be
|
||||
// gains to be had from stashing in shared memory or possibly
|
||||
// registers (though register pressure is an issue).
|
||||
ElementRef this_ref = Element_index(ref, i);
|
||||
ElementTag tag = Element_tag(this_ref);
|
||||
uint fill_mode = fill_mode_from_flags(st.flags >> LG_FILL_MODE);
|
||||
bool is_stroke = fill_mode == MODE_STROKE;
|
||||
switch (tag.tag) {
|
||||
case Element_Line:
|
||||
LineSeg line = Element_Line_read(this_ref);
|
||||
PathCubic path_cubic;
|
||||
path_cubic.p0 = line.p0;
|
||||
path_cubic.p1 = mix(line.p0, line.p1, 1.0 / 3.0);
|
||||
path_cubic.p2 = mix(line.p1, line.p0, 1.0 / 3.0);
|
||||
path_cubic.p3 = line.p1;
|
||||
path_cubic.path_ix = st.path_count;
|
||||
path_cubic.trans_ix = st.trans_count;
|
||||
if (is_stroke) {
|
||||
path_cubic.stroke = get_linewidth(st);
|
||||
} else {
|
||||
path_cubic.stroke = vec2(0.0);
|
||||
}
|
||||
PathSegRef path_out_ref = PathSegRef(conf.pathseg_alloc.offset + (st.pathseg_count - 1) * PathSeg_size);
|
||||
PathSeg_Cubic_write(conf.pathseg_alloc, path_out_ref, fill_mode, path_cubic);
|
||||
break;
|
||||
case Element_Quad:
|
||||
QuadSeg quad = Element_Quad_read(this_ref);
|
||||
path_cubic.p0 = quad.p0;
|
||||
path_cubic.p1 = mix(quad.p1, quad.p0, 1.0 / 3.0);
|
||||
path_cubic.p2 = mix(quad.p1, quad.p2, 1.0 / 3.0);
|
||||
path_cubic.p3 = quad.p2;
|
||||
path_cubic.path_ix = st.path_count;
|
||||
path_cubic.trans_ix = st.trans_count;
|
||||
if (is_stroke) {
|
||||
path_cubic.stroke = get_linewidth(st);
|
||||
} else {
|
||||
path_cubic.stroke = vec2(0.0);
|
||||
}
|
||||
path_out_ref = PathSegRef(conf.pathseg_alloc.offset + (st.pathseg_count - 1) * PathSeg_size);
|
||||
PathSeg_Cubic_write(conf.pathseg_alloc, path_out_ref, fill_mode, path_cubic);
|
||||
break;
|
||||
case Element_Cubic:
|
||||
CubicSeg cubic = Element_Cubic_read(this_ref);
|
||||
path_cubic.p0 = cubic.p0;
|
||||
path_cubic.p1 = cubic.p1;
|
||||
path_cubic.p2 = cubic.p2;
|
||||
path_cubic.p3 = cubic.p3;
|
||||
path_cubic.path_ix = st.path_count;
|
||||
path_cubic.trans_ix = st.trans_count;
|
||||
if (is_stroke) {
|
||||
path_cubic.stroke = get_linewidth(st);
|
||||
} else {
|
||||
path_cubic.stroke = vec2(0.0);
|
||||
}
|
||||
path_out_ref = PathSegRef(conf.pathseg_alloc.offset + (st.pathseg_count - 1) * PathSeg_size);
|
||||
PathSeg_Cubic_write(conf.pathseg_alloc, path_out_ref, fill_mode, path_cubic);
|
||||
break;
|
||||
case Element_FillColor:
|
||||
FillColor fill = Element_FillColor_read(this_ref);
|
||||
AnnoColor anno_fill;
|
||||
anno_fill.rgba_color = fill.rgba_color;
|
||||
if (is_stroke) {
|
||||
vec2 lw = get_linewidth(st);
|
||||
anno_fill.bbox = st.bbox + vec4(-lw, lw);
|
||||
anno_fill.linewidth = st.linewidth * sqrt(abs(st.mat.x * st.mat.w - st.mat.y * st.mat.z));
|
||||
} else {
|
||||
anno_fill.bbox = st.bbox;
|
||||
anno_fill.linewidth = 0.0;
|
||||
}
|
||||
AnnotatedRef out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
|
||||
Annotated_Color_write(conf.anno_alloc, out_ref, fill_mode, anno_fill);
|
||||
break;
|
||||
case Element_FillImage:
|
||||
FillImage fill_img = Element_FillImage_read(this_ref);
|
||||
AnnoImage anno_img;
|
||||
anno_img.index = fill_img.index;
|
||||
anno_img.offset = fill_img.offset;
|
||||
if (is_stroke) {
|
||||
vec2 lw = get_linewidth(st);
|
||||
anno_img.bbox = st.bbox + vec4(-lw, lw);
|
||||
anno_img.linewidth = st.linewidth * sqrt(abs(st.mat.x * st.mat.w - st.mat.y * st.mat.z));
|
||||
} else {
|
||||
anno_img.bbox = st.bbox;
|
||||
anno_img.linewidth = 0.0;
|
||||
}
|
||||
out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
|
||||
Annotated_Image_write(conf.anno_alloc, out_ref, fill_mode, anno_img);
|
||||
break;
|
||||
case Element_BeginClip:
|
||||
Clip begin_clip = Element_BeginClip_read(this_ref);
|
||||
AnnoBeginClip anno_begin_clip;
|
||||
// This is the absolute bbox, it's been transformed during encoding.
|
||||
anno_begin_clip.bbox = begin_clip.bbox;
|
||||
if (is_stroke) {
|
||||
vec2 lw = get_linewidth(st);
|
||||
anno_begin_clip.linewidth = st.linewidth * sqrt(abs(st.mat.x * st.mat.w - st.mat.y * st.mat.z));
|
||||
} else {
|
||||
anno_fill.linewidth = 0.0;
|
||||
}
|
||||
out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
|
||||
Annotated_BeginClip_write(conf.anno_alloc, out_ref, fill_mode, anno_begin_clip);
|
||||
break;
|
||||
case Element_EndClip:
|
||||
Clip end_clip = Element_EndClip_read(this_ref);
|
||||
// This bbox is expected to be the same as the begin one.
|
||||
AnnoEndClip anno_end_clip = AnnoEndClip(end_clip.bbox);
|
||||
out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
|
||||
Annotated_EndClip_write(conf.anno_alloc, out_ref, anno_end_clip);
|
||||
break;
|
||||
case Element_Transform:
|
||||
TransformSeg transform = TransformSeg(st.mat, st.translate);
|
||||
TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (st.trans_count - 1) * TransformSeg_size);
|
||||
TransformSeg_write(conf.trans_alloc, trans_ref, transform);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1,18 +0,0 @@
|
||||
#version 310 es
|
||||
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
precision mediump float;
|
||||
|
||||
// Use high precision to be pixel accurate for
|
||||
// large cover atlases.
|
||||
layout(location = 0) in highp vec2 vUV;
|
||||
|
||||
layout(binding = 0) uniform sampler2D cover;
|
||||
|
||||
layout(location = 0) out vec4 fragColor;
|
||||
|
||||
void main() {
|
||||
float cover = abs(texture(cover, vUV).r);
|
||||
fragColor.r = cover;
|
||||
}
|
||||
@@ -1,28 +0,0 @@
|
||||
#version 310 es
|
||||
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
precision highp float;
|
||||
|
||||
#include "common.h"
|
||||
|
||||
layout(location = 0) in vec2 pos;
|
||||
layout(location = 1) in vec2 uv;
|
||||
|
||||
layout(binding = 0) uniform Block {
|
||||
vec4 uvTransform;
|
||||
vec4 subUVTransform;
|
||||
} _block;
|
||||
|
||||
layout(location = 0) out vec2 vUV;
|
||||
|
||||
void main() {
|
||||
vec3 p = transform3x2(fboTransform, vec3(pos, 1.0));
|
||||
gl_Position = vec4(p, 1);
|
||||
vec3 uv3 = transform3x2(fboTextureTransform, vec3(uv, 1.0));
|
||||
vUV = uv3.xy*_block.subUVTransform.xy + _block.subUVTransform.zw;
|
||||
vUV = transform3x2(fboTextureTransform, vec3(vUV, 1.0)).xy;
|
||||
vUV = vUV*_block.uvTransform.xy + _block.uvTransform.zw;
|
||||
}
|
||||
@@ -1,248 +0,0 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// This is "kernel 4" in a 4-kernel pipeline. It renders the commands
|
||||
// in the per-tile command list to an image.
|
||||
|
||||
// Right now, this kernel stores the image in a buffer, but a better
|
||||
// plan is to use a texture. This is because of limited support.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
#ifdef ENABLE_IMAGE_INDICES
|
||||
#extension GL_EXT_nonuniform_qualifier : enable
|
||||
#endif
|
||||
|
||||
#include "mem.h"
|
||||
#include "setup.h"
|
||||
|
||||
#define CHUNK_X 2
|
||||
#define CHUNK_Y 4
|
||||
#define CHUNK CHUNK_X * CHUNK_Y
|
||||
#define CHUNK_DX (TILE_WIDTH_PX / CHUNK_X)
|
||||
#define CHUNK_DY (TILE_HEIGHT_PX / CHUNK_Y)
|
||||
layout(local_size_x = CHUNK_DX, local_size_y = CHUNK_DY) in;
|
||||
|
||||
layout(set = 0, binding = 1) restrict readonly buffer ConfigBuf {
|
||||
Config conf;
|
||||
};
|
||||
|
||||
layout(rgba8, set = 0, binding = 2) uniform restrict writeonly image2D image;
|
||||
|
||||
#ifdef ENABLE_IMAGE_INDICES
|
||||
layout(rgba8, set = 0, binding = 3) uniform restrict readonly image2D images[];
|
||||
#else
|
||||
layout(rgba8, set = 0, binding = 3) uniform restrict readonly image2D images[1];
|
||||
#endif
|
||||
|
||||
#include "ptcl.h"
|
||||
#include "tile.h"
|
||||
|
||||
mediump vec3 tosRGB(mediump vec3 rgb) {
|
||||
bvec3 cutoff = greaterThanEqual(rgb, vec3(0.0031308));
|
||||
mediump vec3 below = vec3(12.92)*rgb;
|
||||
mediump vec3 above = vec3(1.055)*pow(rgb, vec3(0.41666)) - vec3(0.055);
|
||||
return mix(below, above, cutoff);
|
||||
}
|
||||
|
||||
mediump vec3 fromsRGB(mediump vec3 srgb) {
|
||||
// Formula from EXT_sRGB.
|
||||
bvec3 cutoff = greaterThanEqual(srgb, vec3(0.04045));
|
||||
mediump vec3 below = srgb/vec3(12.92);
|
||||
mediump vec3 above = pow((srgb + vec3(0.055))/vec3(1.055), vec3(2.4));
|
||||
return mix(below, above, cutoff);
|
||||
}
|
||||
|
||||
// unpacksRGB unpacks a color in the sRGB color space to a vec4 in the linear color
|
||||
// space.
|
||||
mediump vec4 unpacksRGB(uint srgba) {
|
||||
mediump vec4 color = unpackUnorm4x8(srgba).wzyx;
|
||||
return vec4(fromsRGB(color.rgb), color.a);
|
||||
}
|
||||
|
||||
// packsRGB packs a color in the linear color space into its 8-bit sRGB equivalent.
|
||||
uint packsRGB(mediump vec4 rgba) {
|
||||
rgba = vec4(tosRGB(rgba.rgb), rgba.a);
|
||||
return packUnorm4x8(rgba.wzyx);
|
||||
}
|
||||
|
||||
uvec2 chunk_offset(uint i) {
|
||||
return uvec2(i % CHUNK_X * CHUNK_DX, i / CHUNK_X * CHUNK_DY);
|
||||
}
|
||||
|
||||
mediump vec4[CHUNK] fillImage(uvec2 xy, CmdImage cmd_img) {
|
||||
mediump vec4 rgba[CHUNK];
|
||||
for (uint i = 0; i < CHUNK; i++) {
|
||||
ivec2 uv = ivec2(xy + chunk_offset(i)) + cmd_img.offset;
|
||||
mediump vec4 fg_rgba;
|
||||
#ifdef ENABLE_IMAGE_INDICES
|
||||
fg_rgba = imageLoad(images[cmd_img.index], uv);
|
||||
#else
|
||||
fg_rgba = imageLoad(images[0], uv);
|
||||
#endif
|
||||
fg_rgba.rgb = fromsRGB(fg_rgba.rgb);
|
||||
rgba[i] = fg_rgba;
|
||||
}
|
||||
return rgba;
|
||||
}
|
||||
|
||||
void main() {
|
||||
uint tile_ix = gl_WorkGroupID.y * conf.width_in_tiles + gl_WorkGroupID.x;
|
||||
Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC);
|
||||
CmdRef cmd_ref = CmdRef(cmd_alloc.offset);
|
||||
|
||||
// Read scrach space allocation, written first in the command list.
|
||||
Alloc scratch_alloc = alloc_read(cmd_alloc, cmd_ref.offset);
|
||||
cmd_ref.offset += Alloc_size;
|
||||
|
||||
uvec2 xy_uint = uvec2(gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_WorkGroupID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
|
||||
vec2 xy = vec2(xy_uint);
|
||||
mediump vec4 rgba[CHUNK];
|
||||
for (uint i = 0; i < CHUNK; i++) {
|
||||
rgba[i] = vec4(0.0);
|
||||
// TODO: remove this debug image support when the actual image method is plumbed.
|
||||
#ifdef DEBUG_IMAGES
|
||||
#ifdef ENABLE_IMAGE_INDICES
|
||||
if (xy_uint.x < 1024 && xy_uint.y < 1024) {
|
||||
rgba[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint + chunk_offset(i))/4);
|
||||
}
|
||||
#else
|
||||
if (xy_uint.x < 1024 && xy_uint.y < 1024) {
|
||||
rgb[i] = imageLoad(images[0], ivec2(xy_uint + chunk_offset(i))/4).rgb;
|
||||
}
|
||||
#endif
|
||||
#endif
|
||||
}
|
||||
|
||||
mediump float area[CHUNK];
|
||||
uint clip_depth = 0;
|
||||
bool mem_ok = mem_error == NO_ERROR;
|
||||
while (mem_ok) {
|
||||
uint tag = Cmd_tag(cmd_alloc, cmd_ref).tag;
|
||||
if (tag == Cmd_End) {
|
||||
break;
|
||||
}
|
||||
switch (tag) {
|
||||
case Cmd_Stroke:
|
||||
// Calculate distance field from all the line segments in this tile.
|
||||
CmdStroke stroke = Cmd_Stroke_read(cmd_alloc, cmd_ref);
|
||||
mediump float df[CHUNK];
|
||||
for (uint k = 0; k < CHUNK; k++) df[k] = 1e9;
|
||||
TileSegRef tile_seg_ref = TileSegRef(stroke.tile_ref);
|
||||
do {
|
||||
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref);
|
||||
vec2 line_vec = seg.vector;
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
vec2 dpos = xy + vec2(0.5, 0.5) - seg.origin;
|
||||
dpos += vec2(chunk_offset(k));
|
||||
float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);
|
||||
df[k] = min(df[k], length(line_vec * t - dpos));
|
||||
}
|
||||
tile_seg_ref = seg.next;
|
||||
} while (tile_seg_ref.offset != 0);
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
area[k] = clamp(stroke.half_width + 0.5 - df[k], 0.0, 1.0);
|
||||
}
|
||||
cmd_ref.offset += 4 + CmdStroke_size;
|
||||
break;
|
||||
case Cmd_Fill:
|
||||
CmdFill fill = Cmd_Fill_read(cmd_alloc, cmd_ref);
|
||||
for (uint k = 0; k < CHUNK; k++) area[k] = float(fill.backdrop);
|
||||
tile_seg_ref = TileSegRef(fill.tile_ref);
|
||||
// Calculate coverage based on backdrop + coverage of each line segment
|
||||
do {
|
||||
TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size, mem_ok), tile_seg_ref);
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
vec2 my_xy = xy + vec2(chunk_offset(k));
|
||||
vec2 start = seg.origin - my_xy;
|
||||
vec2 end = start + seg.vector;
|
||||
vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0);
|
||||
if (window.x != window.y) {
|
||||
vec2 t = (window - start.y) / seg.vector.y;
|
||||
vec2 xs = vec2(mix(start.x, end.x, t.x), mix(start.x, end.x, t.y));
|
||||
float xmin = min(min(xs.x, xs.y), 1.0) - 1e-6;
|
||||
float xmax = max(xs.x, xs.y);
|
||||
float b = min(xmax, 1.0);
|
||||
float c = max(b, 0.0);
|
||||
float d = max(xmin, 0.0);
|
||||
float a = (b + 0.5 * (d * d - c * c) - xmin) / (xmax - xmin);
|
||||
area[k] += a * (window.x - window.y);
|
||||
}
|
||||
area[k] += sign(seg.vector.x) * clamp(my_xy.y - seg.y_edge + 1.0, 0.0, 1.0);
|
||||
}
|
||||
tile_seg_ref = seg.next;
|
||||
} while (tile_seg_ref.offset != 0);
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
area[k] = min(abs(area[k]), 1.0);
|
||||
}
|
||||
cmd_ref.offset += 4 + CmdFill_size;
|
||||
break;
|
||||
case Cmd_Solid:
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
area[k] = 1.0;
|
||||
}
|
||||
cmd_ref.offset += 4;
|
||||
break;
|
||||
case Cmd_Alpha:
|
||||
CmdAlpha alpha = Cmd_Alpha_read(cmd_alloc, cmd_ref);
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
area[k] = alpha.alpha;
|
||||
}
|
||||
cmd_ref.offset += 4 + CmdAlpha_size;
|
||||
break;
|
||||
case Cmd_Color:
|
||||
CmdColor color = Cmd_Color_read(cmd_alloc, cmd_ref);
|
||||
mediump vec4 fg = unpacksRGB(color.rgba_color);
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
mediump vec4 fg_k = fg * area[k];
|
||||
rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k;
|
||||
}
|
||||
cmd_ref.offset += 4 + CmdColor_size;
|
||||
break;
|
||||
case Cmd_Image:
|
||||
CmdImage fill_img = Cmd_Image_read(cmd_alloc, cmd_ref);
|
||||
mediump vec4 img[CHUNK] = fillImage(xy_uint, fill_img);
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
mediump vec4 fg_k = img[k] * area[k];
|
||||
rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k;
|
||||
}
|
||||
cmd_ref.offset += 4 + CmdImage_size;
|
||||
break;
|
||||
case Cmd_BeginClip:
|
||||
uint base_ix = (scratch_alloc.offset >> 2) + CLIP_STATE_SIZE * (clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX +
|
||||
gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y);
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
uvec2 offset = chunk_offset(k);
|
||||
uint srgb = packsRGB(vec4(rgba[k]));
|
||||
mediump float alpha = clamp(abs(area[k]), 0.0, 1.0);
|
||||
write_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), srgb);
|
||||
write_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), floatBitsToUint(alpha));
|
||||
rgba[k] = vec4(0.0);
|
||||
}
|
||||
clip_depth++;
|
||||
cmd_ref.offset += 4;
|
||||
break;
|
||||
case Cmd_EndClip:
|
||||
clip_depth--;
|
||||
base_ix = (scratch_alloc.offset >> 2) + CLIP_STATE_SIZE * (clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX +
|
||||
gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y);
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
uvec2 offset = chunk_offset(k);
|
||||
uint srgb = read_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX));
|
||||
uint alpha = read_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX));
|
||||
mediump vec4 bg = unpacksRGB(srgb);
|
||||
mediump vec4 fg = rgba[k] * area[k] * uintBitsToFloat(alpha);
|
||||
rgba[k] = bg * (1.0 - fg.a) + fg;
|
||||
}
|
||||
cmd_ref.offset += 4;
|
||||
break;
|
||||
case Cmd_Jump:
|
||||
cmd_ref = CmdRef(Cmd_Jump_read(cmd_alloc, cmd_ref).new_ref);
|
||||
cmd_alloc.offset = cmd_ref.offset;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
for (uint i = 0; i < CHUNK; i++) {
|
||||
imageStore(image, ivec2(xy_uint + chunk_offset(i)), vec4(tosRGB(rgba[i].rgb), rgba[i].a));
|
||||
}
|
||||
}
|
||||
@@ -1,32 +0,0 @@
|
||||
#version 310 es
|
||||
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
precision mediump float;
|
||||
|
||||
layout(binding = 0) uniform sampler2D tex;
|
||||
|
||||
layout(location = 0) in vec2 vUV;
|
||||
|
||||
layout(location = 0) out vec4 fragColor;
|
||||
|
||||
layout(binding=0) uniform Color {
|
||||
// If emulateSRGB is set (!= 0), the input texels are sRGB encoded. We save the
|
||||
// conversion step below, at the cost of texture filtering in sRGB space.
|
||||
float emulateSRGB;
|
||||
};
|
||||
|
||||
vec3 RGBtosRGB(vec3 rgb) {
|
||||
bvec3 cutoff = greaterThanEqual(rgb, vec3(0.0031308));
|
||||
vec3 below = vec3(12.92)*rgb;
|
||||
vec3 above = vec3(1.055)*pow(rgb, vec3(0.41666)) - vec3(0.055);
|
||||
return mix(below, above, cutoff);
|
||||
}
|
||||
|
||||
void main() {
|
||||
vec4 texel = texture(tex, vUV);
|
||||
if (emulateSRGB == 0.0) {
|
||||
texel.rgb = RGBtosRGB(texel.rgb);
|
||||
}
|
||||
fragColor = texel;
|
||||
}
|
||||
@@ -1,20 +0,0 @@
|
||||
#version 310 es
|
||||
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
precision highp float;
|
||||
|
||||
layout(binding = 0) uniform Block {
|
||||
vec2 scale;
|
||||
vec2 pos;
|
||||
} _block;
|
||||
|
||||
layout(location = 0) in vec2 pos;
|
||||
layout(location = 1) in vec2 uv;
|
||||
|
||||
layout(location = 0) out vec2 vUV;
|
||||
|
||||
void main() {
|
||||
vUV = uv;
|
||||
gl_Position = vec4(pos*_block.scale + _block.pos, 0, 1);
|
||||
}
|
||||
@@ -1,147 +0,0 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
layout(set = 0, binding = 0) buffer Memory {
|
||||
// offset into memory of the next allocation, initialized by the user.
|
||||
uint mem_offset;
|
||||
// mem_error tracks the status of memory accesses, initialized to NO_ERROR
|
||||
// by the user. ERR_MALLOC_FAILED is reported for insufficient memory.
|
||||
// If MEM_DEBUG is defined the following errors are reported:
|
||||
// - ERR_OUT_OF_BOUNDS is reported for out of bounds writes.
|
||||
// - ERR_UNALIGNED_ACCESS for memory access not aligned to 32-bit words.
|
||||
uint mem_error;
|
||||
uint[] memory;
|
||||
};
|
||||
|
||||
// Uncomment this line to add the size field to Alloc and enable memory checks.
|
||||
// Note that the Config struct in setup.h grows size fields as well.
|
||||
//#define MEM_DEBUG
|
||||
|
||||
#define NO_ERROR 0
|
||||
#define ERR_MALLOC_FAILED 1
|
||||
#define ERR_OUT_OF_BOUNDS 2
|
||||
#define ERR_UNALIGNED_ACCESS 3
|
||||
|
||||
#ifdef MEM_DEBUG
|
||||
#define Alloc_size 16
|
||||
#else
|
||||
#define Alloc_size 8
|
||||
#endif
|
||||
|
||||
// Alloc represents a memory allocation.
|
||||
struct Alloc {
|
||||
// offset in bytes into memory.
|
||||
uint offset;
|
||||
#ifdef MEM_DEBUG
|
||||
// size in bytes of the allocation.
|
||||
uint size;
|
||||
#endif
|
||||
};
|
||||
|
||||
struct MallocResult {
|
||||
Alloc alloc;
|
||||
// failed is true if the allocation overflowed memory.
|
||||
bool failed;
|
||||
};
|
||||
|
||||
// new_alloc synthesizes an Alloc from an offset and size.
|
||||
Alloc new_alloc(uint offset, uint size, bool mem_ok) {
|
||||
Alloc a;
|
||||
a.offset = offset;
|
||||
#ifdef MEM_DEBUG
|
||||
if (mem_ok) {
|
||||
a.size = size;
|
||||
} else {
|
||||
a.size = 0;
|
||||
}
|
||||
#endif
|
||||
return a;
|
||||
}
|
||||
|
||||
// malloc allocates size bytes of memory.
|
||||
MallocResult malloc(uint size) {
|
||||
MallocResult r;
|
||||
uint offset = atomicAdd(mem_offset, size);
|
||||
r.failed = offset + size > memory.length() * 4;
|
||||
r.alloc = new_alloc(offset, size, !r.failed);
|
||||
if (r.failed) {
|
||||
atomicMax(mem_error, ERR_MALLOC_FAILED);
|
||||
return r;
|
||||
}
|
||||
#ifdef MEM_DEBUG
|
||||
if ((size & 3) != 0) {
|
||||
r.failed = true;
|
||||
atomicMax(mem_error, ERR_UNALIGNED_ACCESS);
|
||||
return r;
|
||||
}
|
||||
#endif
|
||||
return r;
|
||||
}
|
||||
|
||||
// touch_mem checks whether access to the memory word at offset is valid.
|
||||
// If MEM_DEBUG is defined, touch_mem returns false if offset is out of bounds.
|
||||
// Offset is in words.
|
||||
bool touch_mem(Alloc alloc, uint offset) {
|
||||
#ifdef MEM_DEBUG
|
||||
if (offset < alloc.offset/4 || offset >= (alloc.offset + alloc.size)/4) {
|
||||
atomicMax(mem_error, ERR_OUT_OF_BOUNDS);
|
||||
return false;
|
||||
}
|
||||
#endif
|
||||
return true;
|
||||
}
|
||||
|
||||
// write_mem writes val to memory at offset.
|
||||
// Offset is in words.
|
||||
void write_mem(Alloc alloc, uint offset, uint val) {
|
||||
if (!touch_mem(alloc, offset)) {
|
||||
return;
|
||||
}
|
||||
memory[offset] = val;
|
||||
}
|
||||
|
||||
// read_mem reads the value from memory at offset.
|
||||
// Offset is in words.
|
||||
uint read_mem(Alloc alloc, uint offset) {
|
||||
if (!touch_mem(alloc, offset)) {
|
||||
return 0;
|
||||
}
|
||||
uint v = memory[offset];
|
||||
return v;
|
||||
}
|
||||
|
||||
// slice_mem returns a sub-allocation inside another. Offset and size are in
|
||||
// bytes, relative to a.offset.
|
||||
Alloc slice_mem(Alloc a, uint offset, uint size) {
|
||||
#ifdef MEM_DEBUG
|
||||
if ((offset & 3) != 0 || (size & 3) != 0) {
|
||||
atomicMax(mem_error, ERR_UNALIGNED_ACCESS);
|
||||
return Alloc(0, 0);
|
||||
}
|
||||
if (offset + size > a.size) {
|
||||
// slice_mem is sometimes used for slices outside bounds,
|
||||
// but never written.
|
||||
return Alloc(0, 0);
|
||||
}
|
||||
return Alloc(a.offset + offset, size);
|
||||
#else
|
||||
return Alloc(a.offset + offset);
|
||||
#endif
|
||||
}
|
||||
|
||||
// alloc_write writes alloc to memory at offset bytes.
|
||||
void alloc_write(Alloc a, uint offset, Alloc alloc) {
|
||||
write_mem(a, offset >> 2, alloc.offset);
|
||||
#ifdef MEM_DEBUG
|
||||
write_mem(a, (offset >> 2) + 1, alloc.size);
|
||||
#endif
|
||||
}
|
||||
|
||||
// alloc_read reads an Alloc from memory at offset bytes.
|
||||
Alloc alloc_read(Alloc a, uint offset) {
|
||||
Alloc alloc;
|
||||
alloc.offset = read_mem(a, offset >> 2);
|
||||
#ifdef MEM_DEBUG
|
||||
alloc.size = read_mem(a, (offset >> 2) + 1);
|
||||
#endif
|
||||
return alloc;
|
||||
}
|
||||
@@ -1,294 +0,0 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// Coarse rasterization of path segments.
|
||||
|
||||
// Allocation and initialization of tiles for paths.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#include "mem.h"
|
||||
#include "setup.h"
|
||||
|
||||
#define LG_COARSE_WG 5
|
||||
#define COARSE_WG (1 << LG_COARSE_WG)
|
||||
|
||||
layout(local_size_x = COARSE_WG, local_size_y = 1) in;
|
||||
|
||||
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
||||
Config conf;
|
||||
};
|
||||
|
||||
#include "pathseg.h"
|
||||
#include "tile.h"
|
||||
|
||||
// scale factors useful for converting coordinates to tiles
|
||||
#define SX (1.0 / float(TILE_WIDTH_PX))
|
||||
#define SY (1.0 / float(TILE_HEIGHT_PX))
|
||||
|
||||
#define ACCURACY 0.25
|
||||
#define Q_ACCURACY (ACCURACY * 0.1)
|
||||
#define REM_ACCURACY (ACCURACY - Q_ACCURACY)
|
||||
#define MAX_HYPOT2 (432.0 * Q_ACCURACY * Q_ACCURACY)
|
||||
#define MAX_QUADS 16
|
||||
|
||||
vec2 eval_quad(vec2 p0, vec2 p1, vec2 p2, float t) {
|
||||
float mt = 1.0 - t;
|
||||
return p0 * (mt * mt) + (p1 * (mt * 2.0) + p2 * t) * t;
|
||||
}
|
||||
|
||||
vec2 eval_cubic(vec2 p0, vec2 p1, vec2 p2, vec2 p3, float t) {
|
||||
float mt = 1.0 - t;
|
||||
return p0 * (mt * mt * mt) + (p1 * (mt * mt * 3.0) + (p2 * (mt * 3.0) + p3 * t) * t) * t;
|
||||
}
|
||||
|
||||
struct SubdivResult {
|
||||
float val;
|
||||
float a0;
|
||||
float a2;
|
||||
};
|
||||
|
||||
/// An approximation to $\int (1 + 4x^2) ^ -0.25 dx$
|
||||
///
|
||||
/// This is used for flattening curves.
|
||||
#define D 0.67
|
||||
float approx_parabola_integral(float x) {
|
||||
return x * inversesqrt(sqrt(1.0 - D + (D * D * D * D + 0.25 * x * x)));
|
||||
}
|
||||
|
||||
/// An approximation to the inverse parabola integral.
|
||||
#define B 0.39
|
||||
float approx_parabola_inv_integral(float x) {
|
||||
return x * sqrt(1.0 - B + (B * B + 0.25 * x * x));
|
||||
}
|
||||
|
||||
SubdivResult estimate_subdiv(vec2 p0, vec2 p1, vec2 p2, float sqrt_tol) {
|
||||
vec2 d01 = p1 - p0;
|
||||
vec2 d12 = p2 - p1;
|
||||
vec2 dd = d01 - d12;
|
||||
float cross = (p2.x - p0.x) * dd.y - (p2.y - p0.y) * dd.x;
|
||||
float x0 = (d01.x * dd.x + d01.y * dd.y) / cross;
|
||||
float x2 = (d12.x * dd.x + d12.y * dd.y) / cross;
|
||||
float scale = abs(cross / (length(dd) * (x2 - x0)));
|
||||
|
||||
float a0 = approx_parabola_integral(x0);
|
||||
float a2 = approx_parabola_integral(x2);
|
||||
float val = 0.0;
|
||||
if (scale < 1e9) {
|
||||
float da = abs(a2 - a0);
|
||||
float sqrt_scale = sqrt(scale);
|
||||
if (sign(x0) == sign(x2)) {
|
||||
val = da * sqrt_scale;
|
||||
} else {
|
||||
float xmin = sqrt_tol / sqrt_scale;
|
||||
val = sqrt_tol * da / approx_parabola_integral(xmin);
|
||||
}
|
||||
}
|
||||
return SubdivResult(val, a0, a2);
|
||||
}
|
||||
|
||||
void main() {
|
||||
uint element_ix = gl_GlobalInvocationID.x;
|
||||
PathSegRef ref = PathSegRef(conf.pathseg_alloc.offset + element_ix * PathSeg_size);
|
||||
|
||||
PathSegTag tag = PathSegTag(PathSeg_Nop, 0);
|
||||
if (element_ix < conf.n_pathseg) {
|
||||
tag = PathSeg_tag(conf.pathseg_alloc, ref);
|
||||
}
|
||||
bool mem_ok = mem_error == NO_ERROR;
|
||||
switch (tag.tag) {
|
||||
case PathSeg_Cubic:
|
||||
PathCubic cubic = PathSeg_Cubic_read(conf.pathseg_alloc, ref);
|
||||
|
||||
uint trans_ix = cubic.trans_ix;
|
||||
if (trans_ix > 0) {
|
||||
TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (trans_ix - 1) * TransformSeg_size);
|
||||
TransformSeg trans = TransformSeg_read(conf.trans_alloc, trans_ref);
|
||||
cubic.p0 = trans.mat.xy * cubic.p0.x + trans.mat.zw * cubic.p0.y + trans.translate;
|
||||
cubic.p1 = trans.mat.xy * cubic.p1.x + trans.mat.zw * cubic.p1.y + trans.translate;
|
||||
cubic.p2 = trans.mat.xy * cubic.p2.x + trans.mat.zw * cubic.p2.y + trans.translate;
|
||||
cubic.p3 = trans.mat.xy * cubic.p3.x + trans.mat.zw * cubic.p3.y + trans.translate;
|
||||
}
|
||||
|
||||
vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
|
||||
float err = err_v.x * err_v.x + err_v.y * err_v.y;
|
||||
// The number of quadratics.
|
||||
uint n_quads = max(uint(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0))), 1);
|
||||
n_quads = min(n_quads, MAX_QUADS);
|
||||
SubdivResult keep_params[MAX_QUADS];
|
||||
// Iterate over quadratics and tote up the estimated number of segments.
|
||||
float val = 0.0;
|
||||
vec2 qp0 = cubic.p0;
|
||||
float step = 1.0 / float(n_quads);
|
||||
for (uint i = 0; i < n_quads; i++) {
|
||||
float t = float(i + 1) * step;
|
||||
vec2 qp2 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t);
|
||||
vec2 qp1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step);
|
||||
qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2);
|
||||
SubdivResult params = estimate_subdiv(qp0, qp1, qp2, sqrt(REM_ACCURACY));
|
||||
keep_params[i] = params;
|
||||
val += params.val;
|
||||
|
||||
qp0 = qp2;
|
||||
}
|
||||
uint n = max(uint(ceil(val * 0.5 / sqrt(REM_ACCURACY))), 1);
|
||||
|
||||
bool is_stroke = fill_mode_from_flags(tag.flags) == MODE_STROKE;
|
||||
uint path_ix = cubic.path_ix;
|
||||
Path path = Path_read(conf.tile_alloc, PathRef(conf.tile_alloc.offset + path_ix * Path_size));
|
||||
Alloc path_alloc = new_alloc(path.tiles.offset, (path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y) * Tile_size, mem_ok);
|
||||
ivec4 bbox = ivec4(path.bbox);
|
||||
vec2 p0 = cubic.p0;
|
||||
qp0 = cubic.p0;
|
||||
float v_step = val / float(n);
|
||||
int n_out = 1;
|
||||
float val_sum = 0.0;
|
||||
for (uint i = 0; i < n_quads; i++) {
|
||||
float t = float(i + 1) * step;
|
||||
vec2 qp2 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t);
|
||||
vec2 qp1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step);
|
||||
qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2);
|
||||
SubdivResult params = keep_params[i];
|
||||
float u0 = approx_parabola_inv_integral(params.a0);
|
||||
float u2 = approx_parabola_inv_integral(params.a2);
|
||||
float uscale = 1.0 / (u2 - u0);
|
||||
float target = float(n_out) * v_step;
|
||||
while (n_out == n || target < val_sum + params.val) {
|
||||
vec2 p1;
|
||||
if (n_out == n) {
|
||||
p1 = cubic.p3;
|
||||
} else {
|
||||
float u = (target - val_sum) / params.val;
|
||||
float a = mix(params.a0, params.a2, u);
|
||||
float au = approx_parabola_inv_integral(a);
|
||||
float t = (au - u0) * uscale;
|
||||
p1 = eval_quad(qp0, qp1, qp2, t);
|
||||
}
|
||||
|
||||
// Output line segment
|
||||
|
||||
// Bounding box of element in pixel coordinates.
|
||||
float xmin = min(p0.x, p1.x) - cubic.stroke.x;
|
||||
float xmax = max(p0.x, p1.x) + cubic.stroke.x;
|
||||
float ymin = min(p0.y, p1.y) - cubic.stroke.y;
|
||||
float ymax = max(p0.y, p1.y) + cubic.stroke.y;
|
||||
float dx = p1.x - p0.x;
|
||||
float dy = p1.y - p0.y;
|
||||
// Set up for per-scanline coverage formula, below.
|
||||
float invslope = abs(dy) < 1e-9 ? 1e9 : dx / dy;
|
||||
float c = (cubic.stroke.x + abs(invslope) * (0.5 * float(TILE_HEIGHT_PX) + cubic.stroke.y)) * SX;
|
||||
float b = invslope; // Note: assumes square tiles, otherwise scale.
|
||||
float a = (p0.x - (p0.y - 0.5 * float(TILE_HEIGHT_PX)) * b) * SX;
|
||||
|
||||
int x0 = int(floor(xmin * SX));
|
||||
int x1 = int(floor(xmax * SX) + 1);
|
||||
int y0 = int(floor(ymin * SY));
|
||||
int y1 = int(floor(ymax * SY) + 1);
|
||||
|
||||
x0 = clamp(x0, bbox.x, bbox.z);
|
||||
y0 = clamp(y0, bbox.y, bbox.w);
|
||||
x1 = clamp(x1, bbox.x, bbox.z);
|
||||
y1 = clamp(y1, bbox.y, bbox.w);
|
||||
float xc = a + b * float(y0);
|
||||
int stride = bbox.z - bbox.x;
|
||||
int base = (y0 - bbox.y) * stride - bbox.x;
|
||||
// TODO: can be tighter, use c to bound width
|
||||
uint n_tile_alloc = uint((x1 - x0) * (y1 - y0));
|
||||
// Consider using subgroups to aggregate atomic add.
|
||||
MallocResult tile_alloc = malloc(n_tile_alloc * TileSeg_size);
|
||||
if (tile_alloc.failed || !mem_ok) {
|
||||
return;
|
||||
}
|
||||
uint tile_offset = tile_alloc.alloc.offset;
|
||||
|
||||
TileSeg tile_seg;
|
||||
|
||||
int xray = int(floor(p0.x*SX));
|
||||
int last_xray = int(floor(p1.x*SX));
|
||||
if (p0.y > p1.y) {
|
||||
int tmp = xray;
|
||||
xray = last_xray;
|
||||
last_xray = tmp;
|
||||
}
|
||||
for (int y = y0; y < y1; y++) {
|
||||
float tile_y0 = float(y * TILE_HEIGHT_PX);
|
||||
int xbackdrop = max(xray + 1, bbox.x);
|
||||
if (!is_stroke && min(p0.y, p1.y) < tile_y0 && xbackdrop < bbox.z) {
|
||||
int backdrop = p1.y < p0.y ? 1 : -1;
|
||||
TileRef tile_ref = Tile_index(path.tiles, uint(base + xbackdrop));
|
||||
uint tile_el = tile_ref.offset >> 2;
|
||||
if (touch_mem(path_alloc, tile_el + 1)) {
|
||||
atomicAdd(memory[tile_el + 1], backdrop);
|
||||
}
|
||||
}
|
||||
|
||||
// next_xray is the xray for the next scanline; the line segment intersects
|
||||
// all tiles between xray and next_xray.
|
||||
int next_xray = last_xray;
|
||||
if (y < y1 - 1) {
|
||||
float tile_y1 = float((y + 1) * TILE_HEIGHT_PX);
|
||||
float x_edge = mix(p0.x, p1.x, (tile_y1 - p0.y) / dy);
|
||||
next_xray = int(floor(x_edge*SX));
|
||||
}
|
||||
|
||||
int min_xray = min(xray, next_xray);
|
||||
int max_xray = max(xray, next_xray);
|
||||
int xx0 = min(int(floor(xc - c)), min_xray);
|
||||
int xx1 = max(int(ceil(xc + c)), max_xray + 1);
|
||||
xx0 = clamp(xx0, x0, x1);
|
||||
xx1 = clamp(xx1, x0, x1);
|
||||
|
||||
for (int x = xx0; x < xx1; x++) {
|
||||
float tile_x0 = float(x * TILE_WIDTH_PX);
|
||||
TileRef tile_ref = Tile_index(TileRef(path.tiles.offset), uint(base + x));
|
||||
uint tile_el = tile_ref.offset >> 2;
|
||||
uint old = 0;
|
||||
if (touch_mem(path_alloc, tile_el)) {
|
||||
old = atomicExchange(memory[tile_el], tile_offset);
|
||||
}
|
||||
tile_seg.origin = p0;
|
||||
tile_seg.vector = p1 - p0;
|
||||
float y_edge = 0.0;
|
||||
if (!is_stroke) {
|
||||
y_edge = mix(p0.y, p1.y, (tile_x0 - p0.x) / dx);
|
||||
if (min(p0.x, p1.x) < tile_x0) {
|
||||
vec2 p = vec2(tile_x0, y_edge);
|
||||
if (p0.x > p1.x) {
|
||||
tile_seg.vector = p - p0;
|
||||
} else {
|
||||
tile_seg.origin = p;
|
||||
tile_seg.vector = p1 - p;
|
||||
}
|
||||
// kernel4 uses sign(vector.x) for the sign of the intersection backdrop.
|
||||
// Nudge zeroes towards the intended sign.
|
||||
if (tile_seg.vector.x == 0) {
|
||||
tile_seg.vector.x = sign(p1.x - p0.x)*1e-9;
|
||||
}
|
||||
}
|
||||
if (x <= min_xray || max_xray < x) {
|
||||
// Reject inconsistent intersections.
|
||||
y_edge = 1e9;
|
||||
}
|
||||
}
|
||||
tile_seg.y_edge = y_edge;
|
||||
tile_seg.next.offset = old;
|
||||
TileSeg_write(tile_alloc.alloc, TileSegRef(tile_offset), tile_seg);
|
||||
tile_offset += TileSeg_size;
|
||||
}
|
||||
xc += b;
|
||||
base += stride;
|
||||
xray = next_xray;
|
||||
}
|
||||
|
||||
n_out += 1;
|
||||
target += v_step;
|
||||
p0 = p1;
|
||||
}
|
||||
val_sum += params.val;
|
||||
|
||||
qp0 = qp2;
|
||||
}
|
||||
|
||||
break;
|
||||
}
|
||||
}
|
||||
@@ -1,100 +0,0 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// Code auto-generated by piet-gpu-derive
|
||||
|
||||
struct PathCubicRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct PathSegRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct PathCubic {
|
||||
vec2 p0;
|
||||
vec2 p1;
|
||||
vec2 p2;
|
||||
vec2 p3;
|
||||
uint path_ix;
|
||||
uint trans_ix;
|
||||
vec2 stroke;
|
||||
};
|
||||
|
||||
#define PathCubic_size 48
|
||||
|
||||
PathCubicRef PathCubic_index(PathCubicRef ref, uint index) {
|
||||
return PathCubicRef(ref.offset + index * PathCubic_size);
|
||||
}
|
||||
|
||||
#define PathSeg_Nop 0
|
||||
#define PathSeg_Cubic 1
|
||||
#define PathSeg_size 52
|
||||
|
||||
PathSegRef PathSeg_index(PathSegRef ref, uint index) {
|
||||
return PathSegRef(ref.offset + index * PathSeg_size);
|
||||
}
|
||||
|
||||
struct PathSegTag {
|
||||
uint tag;
|
||||
uint flags;
|
||||
};
|
||||
|
||||
PathCubic PathCubic_read(Alloc a, PathCubicRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = read_mem(a, ix + 0);
|
||||
uint raw1 = read_mem(a, ix + 1);
|
||||
uint raw2 = read_mem(a, ix + 2);
|
||||
uint raw3 = read_mem(a, ix + 3);
|
||||
uint raw4 = read_mem(a, ix + 4);
|
||||
uint raw5 = read_mem(a, ix + 5);
|
||||
uint raw6 = read_mem(a, ix + 6);
|
||||
uint raw7 = read_mem(a, ix + 7);
|
||||
uint raw8 = read_mem(a, ix + 8);
|
||||
uint raw9 = read_mem(a, ix + 9);
|
||||
uint raw10 = read_mem(a, ix + 10);
|
||||
uint raw11 = read_mem(a, ix + 11);
|
||||
PathCubic s;
|
||||
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||
s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));
|
||||
s.path_ix = raw8;
|
||||
s.trans_ix = raw9;
|
||||
s.stroke = vec2(uintBitsToFloat(raw10), uintBitsToFloat(raw11));
|
||||
return s;
|
||||
}
|
||||
|
||||
void PathCubic_write(Alloc a, PathCubicRef ref, PathCubic s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
write_mem(a, ix + 0, floatBitsToUint(s.p0.x));
|
||||
write_mem(a, ix + 1, floatBitsToUint(s.p0.y));
|
||||
write_mem(a, ix + 2, floatBitsToUint(s.p1.x));
|
||||
write_mem(a, ix + 3, floatBitsToUint(s.p1.y));
|
||||
write_mem(a, ix + 4, floatBitsToUint(s.p2.x));
|
||||
write_mem(a, ix + 5, floatBitsToUint(s.p2.y));
|
||||
write_mem(a, ix + 6, floatBitsToUint(s.p3.x));
|
||||
write_mem(a, ix + 7, floatBitsToUint(s.p3.y));
|
||||
write_mem(a, ix + 8, s.path_ix);
|
||||
write_mem(a, ix + 9, s.trans_ix);
|
||||
write_mem(a, ix + 10, floatBitsToUint(s.stroke.x));
|
||||
write_mem(a, ix + 11, floatBitsToUint(s.stroke.y));
|
||||
}
|
||||
|
||||
PathSegTag PathSeg_tag(Alloc a, PathSegRef ref) {
|
||||
uint tag_and_flags = read_mem(a, ref.offset >> 2);
|
||||
return PathSegTag(tag_and_flags & 0xffff, tag_and_flags >> 16);
|
||||
}
|
||||
|
||||
PathCubic PathSeg_Cubic_read(Alloc a, PathSegRef ref) {
|
||||
return PathCubic_read(a, PathCubicRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
void PathSeg_Nop_write(Alloc a, PathSegRef ref) {
|
||||
write_mem(a, ref.offset >> 2, PathSeg_Nop);
|
||||
}
|
||||
|
||||
void PathSeg_Cubic_write(Alloc a, PathSegRef ref, uint flags, PathCubic s) {
|
||||
write_mem(a, ref.offset >> 2, (flags << 16) | PathSeg_Cubic);
|
||||
PathCubic_write(a, PathCubicRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
@@ -1,278 +0,0 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// Code auto-generated by piet-gpu-derive
|
||||
|
||||
struct CmdStrokeRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdFillRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdColorRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdImageRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdAlphaRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdJumpRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CmdStroke {
|
||||
uint tile_ref;
|
||||
float half_width;
|
||||
};
|
||||
|
||||
#define CmdStroke_size 8
|
||||
|
||||
CmdStrokeRef CmdStroke_index(CmdStrokeRef ref, uint index) {
|
||||
return CmdStrokeRef(ref.offset + index * CmdStroke_size);
|
||||
}
|
||||
|
||||
struct CmdFill {
|
||||
uint tile_ref;
|
||||
int backdrop;
|
||||
};
|
||||
|
||||
#define CmdFill_size 8
|
||||
|
||||
CmdFillRef CmdFill_index(CmdFillRef ref, uint index) {
|
||||
return CmdFillRef(ref.offset + index * CmdFill_size);
|
||||
}
|
||||
|
||||
struct CmdColor {
|
||||
uint rgba_color;
|
||||
};
|
||||
|
||||
#define CmdColor_size 4
|
||||
|
||||
CmdColorRef CmdColor_index(CmdColorRef ref, uint index) {
|
||||
return CmdColorRef(ref.offset + index * CmdColor_size);
|
||||
}
|
||||
|
||||
struct CmdImage {
|
||||
uint index;
|
||||
ivec2 offset;
|
||||
};
|
||||
|
||||
#define CmdImage_size 8
|
||||
|
||||
CmdImageRef CmdImage_index(CmdImageRef ref, uint index) {
|
||||
return CmdImageRef(ref.offset + index * CmdImage_size);
|
||||
}
|
||||
|
||||
struct CmdAlpha {
|
||||
float alpha;
|
||||
};
|
||||
|
||||
#define CmdAlpha_size 4
|
||||
|
||||
CmdAlphaRef CmdAlpha_index(CmdAlphaRef ref, uint index) {
|
||||
return CmdAlphaRef(ref.offset + index * CmdAlpha_size);
|
||||
}
|
||||
|
||||
struct CmdJump {
|
||||
uint new_ref;
|
||||
};
|
||||
|
||||
#define CmdJump_size 4
|
||||
|
||||
CmdJumpRef CmdJump_index(CmdJumpRef ref, uint index) {
|
||||
return CmdJumpRef(ref.offset + index * CmdJump_size);
|
||||
}
|
||||
|
||||
#define Cmd_End 0
|
||||
#define Cmd_Fill 1
|
||||
#define Cmd_Stroke 2
|
||||
#define Cmd_Solid 3
|
||||
#define Cmd_Alpha 4
|
||||
#define Cmd_Color 5
|
||||
#define Cmd_Image 6
|
||||
#define Cmd_BeginClip 7
|
||||
#define Cmd_EndClip 8
|
||||
#define Cmd_Jump 9
|
||||
#define Cmd_size 12
|
||||
|
||||
CmdRef Cmd_index(CmdRef ref, uint index) {
|
||||
return CmdRef(ref.offset + index * Cmd_size);
|
||||
}
|
||||
|
||||
struct CmdTag {
|
||||
uint tag;
|
||||
uint flags;
|
||||
};
|
||||
|
||||
CmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = read_mem(a, ix + 0);
|
||||
uint raw1 = read_mem(a, ix + 1);
|
||||
CmdStroke s;
|
||||
s.tile_ref = raw0;
|
||||
s.half_width = uintBitsToFloat(raw1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void CmdStroke_write(Alloc a, CmdStrokeRef ref, CmdStroke s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
write_mem(a, ix + 0, s.tile_ref);
|
||||
write_mem(a, ix + 1, floatBitsToUint(s.half_width));
|
||||
}
|
||||
|
||||
CmdFill CmdFill_read(Alloc a, CmdFillRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = read_mem(a, ix + 0);
|
||||
uint raw1 = read_mem(a, ix + 1);
|
||||
CmdFill s;
|
||||
s.tile_ref = raw0;
|
||||
s.backdrop = int(raw1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void CmdFill_write(Alloc a, CmdFillRef ref, CmdFill s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
write_mem(a, ix + 0, s.tile_ref);
|
||||
write_mem(a, ix + 1, uint(s.backdrop));
|
||||
}
|
||||
|
||||
CmdColor CmdColor_read(Alloc a, CmdColorRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = read_mem(a, ix + 0);
|
||||
CmdColor s;
|
||||
s.rgba_color = raw0;
|
||||
return s;
|
||||
}
|
||||
|
||||
void CmdColor_write(Alloc a, CmdColorRef ref, CmdColor s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
write_mem(a, ix + 0, s.rgba_color);
|
||||
}
|
||||
|
||||
CmdImage CmdImage_read(Alloc a, CmdImageRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = read_mem(a, ix + 0);
|
||||
uint raw1 = read_mem(a, ix + 1);
|
||||
CmdImage s;
|
||||
s.index = raw0;
|
||||
s.offset = ivec2(int(raw1 << 16) >> 16, int(raw1) >> 16);
|
||||
return s;
|
||||
}
|
||||
|
||||
void CmdImage_write(Alloc a, CmdImageRef ref, CmdImage s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
write_mem(a, ix + 0, s.index);
|
||||
write_mem(a, ix + 1, (uint(s.offset.x) & 0xffff) | (uint(s.offset.y) << 16));
|
||||
}
|
||||
|
||||
CmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = read_mem(a, ix + 0);
|
||||
CmdAlpha s;
|
||||
s.alpha = uintBitsToFloat(raw0);
|
||||
return s;
|
||||
}
|
||||
|
||||
void CmdAlpha_write(Alloc a, CmdAlphaRef ref, CmdAlpha s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
write_mem(a, ix + 0, floatBitsToUint(s.alpha));
|
||||
}
|
||||
|
||||
CmdJump CmdJump_read(Alloc a, CmdJumpRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = read_mem(a, ix + 0);
|
||||
CmdJump s;
|
||||
s.new_ref = raw0;
|
||||
return s;
|
||||
}
|
||||
|
||||
void CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
write_mem(a, ix + 0, s.new_ref);
|
||||
}
|
||||
|
||||
CmdTag Cmd_tag(Alloc a, CmdRef ref) {
|
||||
uint tag_and_flags = read_mem(a, ref.offset >> 2);
|
||||
return CmdTag(tag_and_flags & 0xffff, tag_and_flags >> 16);
|
||||
}
|
||||
|
||||
CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) {
|
||||
return CmdFill_read(a, CmdFillRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
CmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref) {
|
||||
return CmdStroke_read(a, CmdStrokeRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
CmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref) {
|
||||
return CmdAlpha_read(a, CmdAlphaRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
CmdColor Cmd_Color_read(Alloc a, CmdRef ref) {
|
||||
return CmdColor_read(a, CmdColorRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
CmdImage Cmd_Image_read(Alloc a, CmdRef ref) {
|
||||
return CmdImage_read(a, CmdImageRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) {
|
||||
return CmdJump_read(a, CmdJumpRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
void Cmd_End_write(Alloc a, CmdRef ref) {
|
||||
write_mem(a, ref.offset >> 2, Cmd_End);
|
||||
}
|
||||
|
||||
void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s) {
|
||||
write_mem(a, ref.offset >> 2, Cmd_Fill);
|
||||
CmdFill_write(a, CmdFillRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s) {
|
||||
write_mem(a, ref.offset >> 2, Cmd_Stroke);
|
||||
CmdStroke_write(a, CmdStrokeRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Cmd_Solid_write(Alloc a, CmdRef ref) {
|
||||
write_mem(a, ref.offset >> 2, Cmd_Solid);
|
||||
}
|
||||
|
||||
void Cmd_Alpha_write(Alloc a, CmdRef ref, CmdAlpha s) {
|
||||
write_mem(a, ref.offset >> 2, Cmd_Alpha);
|
||||
CmdAlpha_write(a, CmdAlphaRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Cmd_Color_write(Alloc a, CmdRef ref, CmdColor s) {
|
||||
write_mem(a, ref.offset >> 2, Cmd_Color);
|
||||
CmdColor_write(a, CmdColorRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s) {
|
||||
write_mem(a, ref.offset >> 2, Cmd_Image);
|
||||
CmdImage_write(a, CmdImageRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
void Cmd_BeginClip_write(Alloc a, CmdRef ref) {
|
||||
write_mem(a, ref.offset >> 2, Cmd_BeginClip);
|
||||
}
|
||||
|
||||
void Cmd_EndClip_write(Alloc a, CmdRef ref) {
|
||||
write_mem(a, ref.offset >> 2, Cmd_EndClip);
|
||||
}
|
||||
|
||||
void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s) {
|
||||
write_mem(a, ref.offset >> 2, Cmd_Jump);
|
||||
CmdJump_write(a, CmdJumpRef(ref.offset + 4), s);
|
||||
}
|
||||
|
||||
@@ -1,313 +0,0 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// Code auto-generated by piet-gpu-derive
|
||||
|
||||
struct LineSegRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct QuadSegRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct CubicSegRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct FillColorRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct FillImageRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct SetLineWidthRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct TransformRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct ClipRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct SetFillModeRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct ElementRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct LineSeg {
|
||||
vec2 p0;
|
||||
vec2 p1;
|
||||
};
|
||||
|
||||
#define LineSeg_size 16
|
||||
|
||||
LineSegRef LineSeg_index(LineSegRef ref, uint index) {
|
||||
return LineSegRef(ref.offset + index * LineSeg_size);
|
||||
}
|
||||
|
||||
struct QuadSeg {
|
||||
vec2 p0;
|
||||
vec2 p1;
|
||||
vec2 p2;
|
||||
};
|
||||
|
||||
#define QuadSeg_size 24
|
||||
|
||||
QuadSegRef QuadSeg_index(QuadSegRef ref, uint index) {
|
||||
return QuadSegRef(ref.offset + index * QuadSeg_size);
|
||||
}
|
||||
|
||||
struct CubicSeg {
|
||||
vec2 p0;
|
||||
vec2 p1;
|
||||
vec2 p2;
|
||||
vec2 p3;
|
||||
};
|
||||
|
||||
#define CubicSeg_size 32
|
||||
|
||||
CubicSegRef CubicSeg_index(CubicSegRef ref, uint index) {
|
||||
return CubicSegRef(ref.offset + index * CubicSeg_size);
|
||||
}
|
||||
|
||||
struct FillColor {
|
||||
uint rgba_color;
|
||||
};
|
||||
|
||||
#define FillColor_size 4
|
||||
|
||||
FillColorRef FillColor_index(FillColorRef ref, uint index) {
|
||||
return FillColorRef(ref.offset + index * FillColor_size);
|
||||
}
|
||||
|
||||
struct FillImage {
|
||||
uint index;
|
||||
ivec2 offset;
|
||||
};
|
||||
|
||||
#define FillImage_size 8
|
||||
|
||||
FillImageRef FillImage_index(FillImageRef ref, uint index) {
|
||||
return FillImageRef(ref.offset + index * FillImage_size);
|
||||
}
|
||||
|
||||
struct SetLineWidth {
|
||||
float width;
|
||||
};
|
||||
|
||||
#define SetLineWidth_size 4
|
||||
|
||||
SetLineWidthRef SetLineWidth_index(SetLineWidthRef ref, uint index) {
|
||||
return SetLineWidthRef(ref.offset + index * SetLineWidth_size);
|
||||
}
|
||||
|
||||
struct Transform {
|
||||
vec4 mat;
|
||||
vec2 translate;
|
||||
};
|
||||
|
||||
#define Transform_size 24
|
||||
|
||||
TransformRef Transform_index(TransformRef ref, uint index) {
|
||||
return TransformRef(ref.offset + index * Transform_size);
|
||||
}
|
||||
|
||||
struct Clip {
|
||||
vec4 bbox;
|
||||
};
|
||||
|
||||
#define Clip_size 16
|
||||
|
||||
ClipRef Clip_index(ClipRef ref, uint index) {
|
||||
return ClipRef(ref.offset + index * Clip_size);
|
||||
}
|
||||
|
||||
struct SetFillMode {
|
||||
uint fill_mode;
|
||||
};
|
||||
|
||||
#define SetFillMode_size 4
|
||||
|
||||
SetFillModeRef SetFillMode_index(SetFillModeRef ref, uint index) {
|
||||
return SetFillModeRef(ref.offset + index * SetFillMode_size);
|
||||
}
|
||||
|
||||
#define Element_Nop 0
|
||||
#define Element_Line 1
|
||||
#define Element_Quad 2
|
||||
#define Element_Cubic 3
|
||||
#define Element_FillColor 4
|
||||
#define Element_SetLineWidth 5
|
||||
#define Element_Transform 6
|
||||
#define Element_BeginClip 7
|
||||
#define Element_EndClip 8
|
||||
#define Element_FillImage 9
|
||||
#define Element_SetFillMode 10
|
||||
#define Element_size 36
|
||||
|
||||
ElementRef Element_index(ElementRef ref, uint index) {
|
||||
return ElementRef(ref.offset + index * Element_size);
|
||||
}
|
||||
|
||||
struct ElementTag {
|
||||
uint tag;
|
||||
uint flags;
|
||||
};
|
||||
|
||||
LineSeg LineSeg_read(LineSegRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = scene[ix + 0];
|
||||
uint raw1 = scene[ix + 1];
|
||||
uint raw2 = scene[ix + 2];
|
||||
uint raw3 = scene[ix + 3];
|
||||
LineSeg s;
|
||||
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
return s;
|
||||
}
|
||||
|
||||
QuadSeg QuadSeg_read(QuadSegRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = scene[ix + 0];
|
||||
uint raw1 = scene[ix + 1];
|
||||
uint raw2 = scene[ix + 2];
|
||||
uint raw3 = scene[ix + 3];
|
||||
uint raw4 = scene[ix + 4];
|
||||
uint raw5 = scene[ix + 5];
|
||||
QuadSeg s;
|
||||
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||
return s;
|
||||
}
|
||||
|
||||
CubicSeg CubicSeg_read(CubicSegRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = scene[ix + 0];
|
||||
uint raw1 = scene[ix + 1];
|
||||
uint raw2 = scene[ix + 2];
|
||||
uint raw3 = scene[ix + 3];
|
||||
uint raw4 = scene[ix + 4];
|
||||
uint raw5 = scene[ix + 5];
|
||||
uint raw6 = scene[ix + 6];
|
||||
uint raw7 = scene[ix + 7];
|
||||
CubicSeg s;
|
||||
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||
s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));
|
||||
return s;
|
||||
}
|
||||
|
||||
FillColor FillColor_read(FillColorRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = scene[ix + 0];
|
||||
FillColor s;
|
||||
s.rgba_color = raw0;
|
||||
return s;
|
||||
}
|
||||
|
||||
FillImage FillImage_read(FillImageRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = scene[ix + 0];
|
||||
uint raw1 = scene[ix + 1];
|
||||
FillImage s;
|
||||
s.index = raw0;
|
||||
s.offset = ivec2(int(raw1 << 16) >> 16, int(raw1) >> 16);
|
||||
return s;
|
||||
}
|
||||
|
||||
SetLineWidth SetLineWidth_read(SetLineWidthRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = scene[ix + 0];
|
||||
SetLineWidth s;
|
||||
s.width = uintBitsToFloat(raw0);
|
||||
return s;
|
||||
}
|
||||
|
||||
Transform Transform_read(TransformRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = scene[ix + 0];
|
||||
uint raw1 = scene[ix + 1];
|
||||
uint raw2 = scene[ix + 2];
|
||||
uint raw3 = scene[ix + 3];
|
||||
uint raw4 = scene[ix + 4];
|
||||
uint raw5 = scene[ix + 5];
|
||||
Transform s;
|
||||
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||
return s;
|
||||
}
|
||||
|
||||
Clip Clip_read(ClipRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = scene[ix + 0];
|
||||
uint raw1 = scene[ix + 1];
|
||||
uint raw2 = scene[ix + 2];
|
||||
uint raw3 = scene[ix + 3];
|
||||
Clip s;
|
||||
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
return s;
|
||||
}
|
||||
|
||||
SetFillMode SetFillMode_read(SetFillModeRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = scene[ix + 0];
|
||||
SetFillMode s;
|
||||
s.fill_mode = raw0;
|
||||
return s;
|
||||
}
|
||||
|
||||
ElementTag Element_tag(ElementRef ref) {
|
||||
uint tag_and_flags = scene[ref.offset >> 2];
|
||||
return ElementTag(tag_and_flags & 0xffff, tag_and_flags >> 16);
|
||||
}
|
||||
|
||||
LineSeg Element_Line_read(ElementRef ref) {
|
||||
return LineSeg_read(LineSegRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
QuadSeg Element_Quad_read(ElementRef ref) {
|
||||
return QuadSeg_read(QuadSegRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
CubicSeg Element_Cubic_read(ElementRef ref) {
|
||||
return CubicSeg_read(CubicSegRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
FillColor Element_FillColor_read(ElementRef ref) {
|
||||
return FillColor_read(FillColorRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
SetLineWidth Element_SetLineWidth_read(ElementRef ref) {
|
||||
return SetLineWidth_read(SetLineWidthRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
Transform Element_Transform_read(ElementRef ref) {
|
||||
return Transform_read(TransformRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
Clip Element_BeginClip_read(ElementRef ref) {
|
||||
return Clip_read(ClipRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
Clip Element_EndClip_read(ElementRef ref) {
|
||||
return Clip_read(ClipRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
FillImage Element_FillImage_read(ElementRef ref) {
|
||||
return FillImage_read(FillImageRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
SetFillMode Element_SetFillMode_read(ElementRef ref) {
|
||||
return SetFillMode_read(SetFillModeRef(ref.offset + 4));
|
||||
}
|
||||
|
||||
@@ -1,51 +0,0 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// Various constants for the sizes of groups and tiles.
|
||||
|
||||
// Much of this will be made dynamic in various ways, but for now it's easiest
|
||||
// to hardcode and keep all in one place.
|
||||
|
||||
// A LG_WG_FACTOR of n scales workgroup sizes by 2^n. Use 0 for a
|
||||
// maximum workgroup size of 128, or 1 for a maximum size of 256.
|
||||
#define LG_WG_FACTOR 0
|
||||
#define WG_FACTOR (1<<LG_WG_FACTOR)
|
||||
|
||||
#define TILE_WIDTH_PX 32
|
||||
#define TILE_HEIGHT_PX 32
|
||||
|
||||
#define PTCL_INITIAL_ALLOC 1024
|
||||
|
||||
// These should probably be renamed and/or reworked. In the binning
|
||||
// kernel, they represent the number of bins. Also, the workgroup size
|
||||
// of that kernel is equal to the number of bins, but should probably
|
||||
// be more flexible (it's 512 in the K&L paper).
|
||||
#define N_TILE_X 16
|
||||
#define N_TILE_Y (8 * WG_FACTOR)
|
||||
#define N_TILE (N_TILE_X * N_TILE_Y)
|
||||
#define LG_N_TILE (7 + LG_WG_FACTOR)
|
||||
#define N_SLICE (N_TILE / 32)
|
||||
|
||||
struct Config {
|
||||
uint n_elements; // paths
|
||||
uint n_pathseg;
|
||||
uint width_in_tiles;
|
||||
uint height_in_tiles;
|
||||
Alloc tile_alloc;
|
||||
Alloc bin_alloc;
|
||||
Alloc ptcl_alloc;
|
||||
Alloc pathseg_alloc;
|
||||
Alloc anno_alloc;
|
||||
Alloc trans_alloc;
|
||||
};
|
||||
|
||||
// Fill modes.
|
||||
#define MODE_NONZERO 0
|
||||
#define MODE_STROKE 1
|
||||
|
||||
// Size of kernel4 clip state, in words.
|
||||
#define CLIP_STATE_SIZE 2
|
||||
|
||||
// fill_mode_from_flags extracts the fill mode from tag flags.
|
||||
uint fill_mode_from_flags(uint flags) {
|
||||
return flags & 0x1;
|
||||
}
|
||||
@@ -1,73 +0,0 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// Code auto-generated by piet-gpu-derive
|
||||
|
||||
struct StateRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct State {
|
||||
vec4 mat;
|
||||
vec2 translate;
|
||||
vec4 bbox;
|
||||
float linewidth;
|
||||
uint flags;
|
||||
uint path_count;
|
||||
uint pathseg_count;
|
||||
uint trans_count;
|
||||
};
|
||||
|
||||
#define State_size 60
|
||||
|
||||
StateRef State_index(StateRef ref, uint index) {
|
||||
return StateRef(ref.offset + index * State_size);
|
||||
}
|
||||
|
||||
State State_read(StateRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = state[ix + 0];
|
||||
uint raw1 = state[ix + 1];
|
||||
uint raw2 = state[ix + 2];
|
||||
uint raw3 = state[ix + 3];
|
||||
uint raw4 = state[ix + 4];
|
||||
uint raw5 = state[ix + 5];
|
||||
uint raw6 = state[ix + 6];
|
||||
uint raw7 = state[ix + 7];
|
||||
uint raw8 = state[ix + 8];
|
||||
uint raw9 = state[ix + 9];
|
||||
uint raw10 = state[ix + 10];
|
||||
uint raw11 = state[ix + 11];
|
||||
uint raw12 = state[ix + 12];
|
||||
uint raw13 = state[ix + 13];
|
||||
uint raw14 = state[ix + 14];
|
||||
State s;
|
||||
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||
s.bbox = vec4(uintBitsToFloat(raw6), uintBitsToFloat(raw7), uintBitsToFloat(raw8), uintBitsToFloat(raw9));
|
||||
s.linewidth = uintBitsToFloat(raw10);
|
||||
s.flags = raw11;
|
||||
s.path_count = raw12;
|
||||
s.pathseg_count = raw13;
|
||||
s.trans_count = raw14;
|
||||
return s;
|
||||
}
|
||||
|
||||
void State_write(StateRef ref, State s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
state[ix + 0] = floatBitsToUint(s.mat.x);
|
||||
state[ix + 1] = floatBitsToUint(s.mat.y);
|
||||
state[ix + 2] = floatBitsToUint(s.mat.z);
|
||||
state[ix + 3] = floatBitsToUint(s.mat.w);
|
||||
state[ix + 4] = floatBitsToUint(s.translate.x);
|
||||
state[ix + 5] = floatBitsToUint(s.translate.y);
|
||||
state[ix + 6] = floatBitsToUint(s.bbox.x);
|
||||
state[ix + 7] = floatBitsToUint(s.bbox.y);
|
||||
state[ix + 8] = floatBitsToUint(s.bbox.z);
|
||||
state[ix + 9] = floatBitsToUint(s.bbox.w);
|
||||
state[ix + 10] = floatBitsToUint(s.linewidth);
|
||||
state[ix + 11] = s.flags;
|
||||
state[ix + 12] = s.path_count;
|
||||
state[ix + 13] = s.pathseg_count;
|
||||
state[ix + 14] = s.trans_count;
|
||||
}
|
||||
|
||||
@@ -1,81 +0,0 @@
|
||||
#version 310 es
|
||||
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
precision mediump float;
|
||||
|
||||
layout(location=0) in vec2 vFrom;
|
||||
layout(location=1) in vec2 vCtrl;
|
||||
layout(location=2) in vec2 vTo;
|
||||
|
||||
layout(location = 0) out vec4 fragCover;
|
||||
|
||||
void main() {
|
||||
float dx = vTo.x - vFrom.x;
|
||||
// Sort from and to in increasing order so the root below
|
||||
// is always the positive square root, if any.
|
||||
// We need the direction of the curve below, so this can't be
|
||||
// done from the vertex shader.
|
||||
bool increasing = vTo.x >= vFrom.x;
|
||||
vec2 left = increasing ? vFrom : vTo;
|
||||
vec2 right = increasing ? vTo : vFrom;
|
||||
|
||||
// The signed horizontal extent of the fragment.
|
||||
vec2 extent = clamp(vec2(vFrom.x, vTo.x), -0.5, 0.5);
|
||||
// Find the t where the curve crosses the middle of the
|
||||
// extent, x₀.
|
||||
// Given the Bézier curve with x coordinates P₀, P₁, P₂
|
||||
// where P₀ is at the origin, its x coordinate in t
|
||||
// is given by:
|
||||
//
|
||||
// x(t) = 2(1-t)tP₁ + t²P₂
|
||||
//
|
||||
// Rearranging:
|
||||
//
|
||||
// x(t) = (P₂ - 2P₁)t² + 2P₁t
|
||||
//
|
||||
// Setting x(t) = x₀ and using Muller's quadratic formula ("Citardauq")
|
||||
// for robustnesss,
|
||||
//
|
||||
// t = 2x₀/(2P₁±√(4P₁²+4(P₂-2P₁)x₀))
|
||||
//
|
||||
// which simplifies to
|
||||
//
|
||||
// t = x₀/(P₁±√(P₁²+(P₂-2P₁)x₀))
|
||||
//
|
||||
// Setting v = P₂-P₁,
|
||||
//
|
||||
// t = x₀/(P₁±√(P₁²+(v-P₁)x₀))
|
||||
//
|
||||
// t lie in [0; 1]; P₂ ≥ P₁ and P₁ ≥ 0 since we split curves where
|
||||
// the control point lies before the start point or after the end point.
|
||||
// It can then be shown that only the positive square root is valid.
|
||||
float midx = mix(extent.x, extent.y, 0.5);
|
||||
float x0 = midx - left.x;
|
||||
vec2 p1 = vCtrl - left;
|
||||
vec2 v = right - vCtrl;
|
||||
float t = x0/(p1.x+sqrt(p1.x*p1.x+(v.x-p1.x)*x0));
|
||||
// Find y(t) on the curve.
|
||||
float y = mix(mix(left.y, vCtrl.y, t), mix(vCtrl.y, right.y, t), t);
|
||||
// And the slope.
|
||||
vec2 d_half = mix(p1, v, t);
|
||||
float dy = d_half.y/d_half.x;
|
||||
// Together, y and dy form a line approximation.
|
||||
|
||||
// Compute the fragment area above the line.
|
||||
// The area is symmetric around dy = 0. Scale slope with extent width.
|
||||
float width = extent.y - extent.x;
|
||||
dy = abs(dy*width);
|
||||
|
||||
vec4 sides = vec4(dy*+0.5 + y, dy*-0.5 + y, (+0.5-y)/dy, (-0.5-y)/dy);
|
||||
sides = clamp(sides+0.5, 0.0, 1.0);
|
||||
|
||||
float area = 0.5*(sides.z - sides.z*sides.y + 1.0 - sides.x+sides.x*sides.w);
|
||||
area *= width;
|
||||
|
||||
// Work around issue #13.
|
||||
if (width == 0.0)
|
||||
area = 0.0;
|
||||
|
||||
fragCover.r = area;
|
||||
}
|
||||
@@ -1,53 +0,0 @@
|
||||
#version 310 es
|
||||
|
||||
// SPDX-License-Identifier: Unlicense OR MIT
|
||||
|
||||
precision highp float;
|
||||
|
||||
layout(binding = 0) uniform Block {
|
||||
vec4 transform;
|
||||
vec2 pathOffset;
|
||||
} _block;
|
||||
|
||||
layout(location=0) in float corner;
|
||||
layout(location=1) in float maxy;
|
||||
layout(location=2) in vec2 from;
|
||||
layout(location=3) in vec2 ctrl;
|
||||
layout(location=4) in vec2 to;
|
||||
|
||||
layout(location=0) out vec2 vFrom;
|
||||
layout(location=1) out vec2 vCtrl;
|
||||
layout(location=2) out vec2 vTo;
|
||||
|
||||
void main() {
|
||||
// Add a one pixel overlap so curve quads cover their
|
||||
// entire curves. Could use conservative rasterization
|
||||
// if available.
|
||||
vec2 from = from + _block.pathOffset;
|
||||
vec2 ctrl = ctrl + _block.pathOffset;
|
||||
vec2 to = to + _block.pathOffset;
|
||||
float maxy = maxy + _block.pathOffset.y;
|
||||
vec2 pos;
|
||||
float c = corner;
|
||||
if (c >= 0.375) {
|
||||
// North.
|
||||
c -= 0.5;
|
||||
pos.y = maxy + 1.0;
|
||||
} else {
|
||||
// South.
|
||||
pos.y = min(min(from.y, ctrl.y), to.y) - 1.0;
|
||||
}
|
||||
if (c >= 0.125) {
|
||||
// East.
|
||||
pos.x = max(max(from.x, ctrl.x), to.x)+1.0;
|
||||
} else {
|
||||
// West.
|
||||
pos.x = min(min(from.x, ctrl.x), to.x)-1.0;
|
||||
}
|
||||
vFrom = from-pos;
|
||||
vCtrl = ctrl-pos;
|
||||
vTo = to-pos;
|
||||
pos = pos*_block.transform.xy + _block.transform.zw;
|
||||
gl_Position = vec4(pos, 1, 1);
|
||||
}
|
||||
|
||||
@@ -1,150 +0,0 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// Code auto-generated by piet-gpu-derive
|
||||
|
||||
struct PathRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct TileRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct TileSegRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct TransformSegRef {
|
||||
uint offset;
|
||||
};
|
||||
|
||||
struct Path {
|
||||
uvec4 bbox;
|
||||
TileRef tiles;
|
||||
};
|
||||
|
||||
#define Path_size 12
|
||||
|
||||
PathRef Path_index(PathRef ref, uint index) {
|
||||
return PathRef(ref.offset + index * Path_size);
|
||||
}
|
||||
|
||||
struct Tile {
|
||||
TileSegRef tile;
|
||||
int backdrop;
|
||||
};
|
||||
|
||||
#define Tile_size 8
|
||||
|
||||
TileRef Tile_index(TileRef ref, uint index) {
|
||||
return TileRef(ref.offset + index * Tile_size);
|
||||
}
|
||||
|
||||
struct TileSeg {
|
||||
vec2 origin;
|
||||
vec2 vector;
|
||||
float y_edge;
|
||||
TileSegRef next;
|
||||
};
|
||||
|
||||
#define TileSeg_size 24
|
||||
|
||||
TileSegRef TileSeg_index(TileSegRef ref, uint index) {
|
||||
return TileSegRef(ref.offset + index * TileSeg_size);
|
||||
}
|
||||
|
||||
struct TransformSeg {
|
||||
vec4 mat;
|
||||
vec2 translate;
|
||||
};
|
||||
|
||||
#define TransformSeg_size 24
|
||||
|
||||
TransformSegRef TransformSeg_index(TransformSegRef ref, uint index) {
|
||||
return TransformSegRef(ref.offset + index * TransformSeg_size);
|
||||
}
|
||||
|
||||
Path Path_read(Alloc a, PathRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = read_mem(a, ix + 0);
|
||||
uint raw1 = read_mem(a, ix + 1);
|
||||
uint raw2 = read_mem(a, ix + 2);
|
||||
Path s;
|
||||
s.bbox = uvec4(raw0 & 0xffff, raw0 >> 16, raw1 & 0xffff, raw1 >> 16);
|
||||
s.tiles = TileRef(raw2);
|
||||
return s;
|
||||
}
|
||||
|
||||
void Path_write(Alloc a, PathRef ref, Path s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
write_mem(a, ix + 0, s.bbox.x | (s.bbox.y << 16));
|
||||
write_mem(a, ix + 1, s.bbox.z | (s.bbox.w << 16));
|
||||
write_mem(a, ix + 2, s.tiles.offset);
|
||||
}
|
||||
|
||||
Tile Tile_read(Alloc a, TileRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = read_mem(a, ix + 0);
|
||||
uint raw1 = read_mem(a, ix + 1);
|
||||
Tile s;
|
||||
s.tile = TileSegRef(raw0);
|
||||
s.backdrop = int(raw1);
|
||||
return s;
|
||||
}
|
||||
|
||||
void Tile_write(Alloc a, TileRef ref, Tile s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
write_mem(a, ix + 0, s.tile.offset);
|
||||
write_mem(a, ix + 1, uint(s.backdrop));
|
||||
}
|
||||
|
||||
TileSeg TileSeg_read(Alloc a, TileSegRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = read_mem(a, ix + 0);
|
||||
uint raw1 = read_mem(a, ix + 1);
|
||||
uint raw2 = read_mem(a, ix + 2);
|
||||
uint raw3 = read_mem(a, ix + 3);
|
||||
uint raw4 = read_mem(a, ix + 4);
|
||||
uint raw5 = read_mem(a, ix + 5);
|
||||
TileSeg s;
|
||||
s.origin = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
|
||||
s.vector = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.y_edge = uintBitsToFloat(raw4);
|
||||
s.next = TileSegRef(raw5);
|
||||
return s;
|
||||
}
|
||||
|
||||
void TileSeg_write(Alloc a, TileSegRef ref, TileSeg s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
write_mem(a, ix + 0, floatBitsToUint(s.origin.x));
|
||||
write_mem(a, ix + 1, floatBitsToUint(s.origin.y));
|
||||
write_mem(a, ix + 2, floatBitsToUint(s.vector.x));
|
||||
write_mem(a, ix + 3, floatBitsToUint(s.vector.y));
|
||||
write_mem(a, ix + 4, floatBitsToUint(s.y_edge));
|
||||
write_mem(a, ix + 5, s.next.offset);
|
||||
}
|
||||
|
||||
TransformSeg TransformSeg_read(Alloc a, TransformSegRef ref) {
|
||||
uint ix = ref.offset >> 2;
|
||||
uint raw0 = read_mem(a, ix + 0);
|
||||
uint raw1 = read_mem(a, ix + 1);
|
||||
uint raw2 = read_mem(a, ix + 2);
|
||||
uint raw3 = read_mem(a, ix + 3);
|
||||
uint raw4 = read_mem(a, ix + 4);
|
||||
uint raw5 = read_mem(a, ix + 5);
|
||||
TransformSeg s;
|
||||
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
|
||||
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
|
||||
return s;
|
||||
}
|
||||
|
||||
void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s) {
|
||||
uint ix = ref.offset >> 2;
|
||||
write_mem(a, ix + 0, floatBitsToUint(s.mat.x));
|
||||
write_mem(a, ix + 1, floatBitsToUint(s.mat.y));
|
||||
write_mem(a, ix + 2, floatBitsToUint(s.mat.z));
|
||||
write_mem(a, ix + 3, floatBitsToUint(s.mat.w));
|
||||
write_mem(a, ix + 4, floatBitsToUint(s.translate.x));
|
||||
write_mem(a, ix + 5, floatBitsToUint(s.translate.y));
|
||||
}
|
||||
|
||||
@@ -1,104 +0,0 @@
|
||||
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
||||
|
||||
// Allocation and initialization of tiles for paths.
|
||||
|
||||
#version 450
|
||||
#extension GL_GOOGLE_include_directive : enable
|
||||
|
||||
#include "mem.h"
|
||||
#include "setup.h"
|
||||
|
||||
#define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
|
||||
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
|
||||
|
||||
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
|
||||
|
||||
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
||||
Config conf;
|
||||
};
|
||||
|
||||
#include "annotated.h"
|
||||
#include "tile.h"
|
||||
|
||||
// scale factors useful for converting coordinates to tiles
|
||||
#define SX (1.0 / float(TILE_WIDTH_PX))
|
||||
#define SY (1.0 / float(TILE_HEIGHT_PX))
|
||||
|
||||
shared uint sh_tile_count[TILE_ALLOC_WG];
|
||||
shared MallocResult sh_tile_alloc;
|
||||
|
||||
void main() {
|
||||
uint th_ix = gl_LocalInvocationID.x;
|
||||
uint element_ix = gl_GlobalInvocationID.x;
|
||||
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
|
||||
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
|
||||
|
||||
uint tag = Annotated_Nop;
|
||||
if (element_ix < conf.n_elements) {
|
||||
tag = Annotated_tag(conf.anno_alloc, ref).tag;
|
||||
}
|
||||
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
|
||||
switch (tag) {
|
||||
case Annotated_Color:
|
||||
case Annotated_Image:
|
||||
case Annotated_BeginClip:
|
||||
case Annotated_EndClip:
|
||||
// Note: we take advantage of the fact that fills, strokes, and
|
||||
// clips have compatible layout.
|
||||
AnnoEndClip clip = Annotated_EndClip_read(conf.anno_alloc, ref);
|
||||
x0 = int(floor(clip.bbox.x * SX));
|
||||
y0 = int(floor(clip.bbox.y * SY));
|
||||
x1 = int(ceil(clip.bbox.z * SX));
|
||||
y1 = int(ceil(clip.bbox.w * SY));
|
||||
break;
|
||||
}
|
||||
x0 = clamp(x0, 0, int(conf.width_in_tiles));
|
||||
y0 = clamp(y0, 0, int(conf.height_in_tiles));
|
||||
x1 = clamp(x1, 0, int(conf.width_in_tiles));
|
||||
y1 = clamp(y1, 0, int(conf.height_in_tiles));
|
||||
|
||||
Path path;
|
||||
path.bbox = uvec4(x0, y0, x1, y1);
|
||||
uint tile_count = (x1 - x0) * (y1 - y0);
|
||||
if (tag == Annotated_EndClip) {
|
||||
// Don't actually allocate tiles for an end clip, but we do want
|
||||
// the path structure (especially bbox) allocated for it.
|
||||
tile_count = 0;
|
||||
}
|
||||
|
||||
sh_tile_count[th_ix] = tile_count;
|
||||
uint total_tile_count = tile_count;
|
||||
// Prefix sum of sh_tile_count
|
||||
for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) {
|
||||
barrier();
|
||||
if (th_ix >= (1 << i)) {
|
||||
total_tile_count += sh_tile_count[th_ix - (1 << i)];
|
||||
}
|
||||
barrier();
|
||||
sh_tile_count[th_ix] = total_tile_count;
|
||||
}
|
||||
if (th_ix == TILE_ALLOC_WG - 1) {
|
||||
sh_tile_alloc = malloc(total_tile_count * Tile_size);
|
||||
}
|
||||
barrier();
|
||||
MallocResult alloc_start = sh_tile_alloc;
|
||||
if (alloc_start.failed || mem_error != NO_ERROR) {
|
||||
return;
|
||||
}
|
||||
|
||||
if (element_ix < conf.n_elements) {
|
||||
uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0;
|
||||
Alloc tiles_alloc = slice_mem(alloc_start.alloc, Tile_size * tile_subix, Tile_size * tile_count);
|
||||
path.tiles = TileRef(tiles_alloc.offset);
|
||||
Path_write(conf.tile_alloc, path_ref, path);
|
||||
}
|
||||
|
||||
// Zero out allocated tiles efficiently
|
||||
uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4);
|
||||
uint start_ix = alloc_start.alloc.offset >> 2;
|
||||
for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) {
|
||||
// Note: this interleaving is faster than using Tile_write
|
||||
// by a significant amount.
|
||||
write_mem(alloc_start.alloc, start_ix + i, 0);
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user