gpu: [compute] pre-transform images before rendering

We're about to change the last stage of the compute pipeline to only
accept images, not sampled textures. This change prepares materials
for pixel-aligned image copying by pre-rendering images to a texture,
applying transforms.

Signed-off-by: Elias Naur <mail@eliasnaur.com>
This commit is contained in:
Elias Naur
2021-02-08 17:35:31 +01:00
parent 87ffaaf8c4
commit c9a8265126
13 changed files with 533 additions and 241 deletions
+253 -51
View File
@@ -9,6 +9,7 @@ import (
"image"
"image/color"
"math"
"math/bits"
"time"
"unsafe"
@@ -58,6 +59,23 @@ type compute struct {
positions map[interface{}]image.Point
tex backend.Texture
}
// materials contains the pre-processed materials (transformed images for
// now, gradients etc. later) packed in a texture atlas. The atlas is used
// as source in kernel4.
materials struct {
prog backend.Program
layout backend.InputLayout
packer packer
texSize image.Point
tex backend.Texture
fbo backend.Framebuffer
quads []materialVertex
bufSize int
buffer backend.Buffer
}
timers struct {
profile string
t *timers
@@ -75,6 +93,13 @@ type compute struct {
conf *config
}
// materialVertex describes a vertex of a quad used to render a transformed
// material.
type materialVertex struct {
posX, posY float32
u, v float32
}
type encoder struct {
scene []byte
npath int
@@ -127,7 +152,7 @@ const (
pathSize = 12
binSize = 8
pathsegSize = 48
annoSize = 52
annoSize = 28
stateSize = 56
stateStride = 4 + 2*stateSize
sceneElemSize = 36
@@ -148,7 +173,7 @@ const (
elemTransform
elemBeginClip
elemEndClip
elemFillTexture
elemFillImage
)
// mem.h constants.
@@ -180,6 +205,22 @@ func newCompute(ctx backend.Device) (*compute, error) {
}
g.output.blitProg = blitProg
materialProg, err := ctx.NewProgram(shader_material_vert, shader_material_frag)
if err != nil {
g.Release()
return nil, err
}
g.materials.prog = materialProg
progLayout, err := ctx.NewInputLayout(shader_material_vert, []backend.InputDesc{
{Type: backend.DataTypeFloat, Size: 2, Offset: 0},
{Type: backend.DataTypeFloat, Size: 2, Offset: 4 * 2},
})
if err != nil {
g.Release()
return nil, err
}
g.materials.layout = progLayout
g.drawOps.pathCache = newOpCache()
g.drawOps.retainPathData = true
@@ -249,7 +290,12 @@ func (g *compute) Frame() error {
if err := g.uploadImages(g.drawOps.allImageOps); err != nil {
return err
}
g.encode(viewport)
if err := g.encode(viewport); err != nil {
return err
}
if err := g.renderMaterials(); err != nil {
return err
}
if err := g.render(tileDims); err != nil {
return err
}
@@ -286,8 +332,13 @@ func (g *compute) blitOutput(viewport image.Point) {
g.ctx.DrawArrays(backend.DrawModeTriangleStrip, 0, 4)
}
func (g *compute) encode(viewport image.Point) {
func (g *compute) encode(viewport image.Point) error {
g.materials.packer.maxDim = g.maxTextureDim
g.materials.packer.clear()
g.materials.packer.newPage()
g.materials.quads = g.materials.quads[:0]
g.enc.reset()
// Flip Y-axis.
flipY := f32.Affine2D{}.Scale(f32.Pt(0, 0), f32.Pt(1, -1)).Offset(f32.Pt(0, float32(viewport.Y)))
g.enc.transform(flipY)
@@ -296,7 +347,7 @@ func (g *compute) encode(viewport image.Point) {
g.enc.rect(f32.Rectangle{Max: layout.FPt(viewport)}, false)
g.enc.fill(f32color.NRGBAToRGBA(g.drawOps.clearColor.SRGB()))
}
g.encodeOps(flipY, viewport, g.drawOps.allImageOps)
return g.encodeOps(flipY, viewport, g.drawOps.allImageOps)
}
func (g *compute) uploadImages(ops []imageOp) error {
@@ -332,7 +383,7 @@ restart:
a.packer.maxDim += 256
resize = true
if a.packer.maxDim > g.maxTextureDim {
return errors.New("compute: no space left in atlas texture")
return errors.New("compute: no space left in image atlas")
}
}
a.packer.newPage()
@@ -361,7 +412,7 @@ restart:
sz := a.packer.maxDim
handle, err := g.ctx.NewTexture(backend.TextureFormatSRGB, sz, sz, backend.FilterLinear, backend.FilterLinear, backend.BufferBindingTexture)
if err != nil {
return fmt.Errorf("compute: failed to create atlas texture: %v", err)
return fmt.Errorf("compute: failed to create image atlas: %v", err)
}
a.tex = handle
}
@@ -380,47 +431,180 @@ restart:
return nil
}
func (g *compute) encodeOps(trans f32.Affine2D, viewport image.Point, ops []imageOp) {
func (g *compute) renderMaterials() error {
m := &g.materials
outSize := g.materials.packer.sizes[0]
if outSize == (image.Point{}) {
return nil
}
if outSize.X > m.texSize.X || outSize.Y > m.texSize.Y {
if m.fbo != nil {
m.fbo.Release()
m.fbo = nil
}
if m.tex != nil {
m.tex.Release()
m.tex = nil
}
// Round to nearest power of 2 while we're doing an expensive recreation anyway.
sz := image.Pt(pow2Ceil(outSize.X), pow2Ceil(outSize.Y))
m.texSize = sz
handle, err := g.ctx.NewTexture(backend.TextureFormatRGBA8, sz.X, sz.Y, backend.FilterNearest, backend.FilterNearest, backend.BufferBindingShaderStorage|backend.BufferBindingFramebuffer)
if err != nil {
return fmt.Errorf("compute: failed to create material atlas: %v", err)
}
m.tex = handle
fbo, err := g.ctx.NewFramebuffer(handle, 0)
if err != nil {
return fmt.Errorf("compute: failed to create material framebuffer: %v", err)
}
m.fbo = fbo
}
// TODO: move to shaders.
// Transform to clip space: [-1, -1] - [1, 1].
clip := f32.Affine2D{}.Scale(f32.Pt(0, 0), f32.Pt(2/float32(m.texSize.X), 2/float32(m.texSize.Y))).Offset(f32.Pt(-1, -1))
for i, v := range m.quads {
p := clip.Transform(f32.Pt(v.posX, v.posY))
m.quads[i].posX = p.X
m.quads[i].posY = p.Y
}
vertexData := gunsafe.BytesView(m.quads)
if len(vertexData) > m.bufSize {
if m.buffer != nil {
m.buffer.Release()
m.buffer = nil
}
// Ditto.
n := pow2Ceil(len(vertexData))
buf, err := g.ctx.NewBuffer(backend.BufferBindingVertices, n)
if err != nil {
return err
}
m.bufSize = n
m.buffer = buf
}
m.buffer.Upload(vertexData)
g.ctx.BindTexture(0, g.images.tex)
g.ctx.BindFramebuffer(m.fbo)
g.ctx.Viewport(0, 0, m.texSize.X, m.texSize.Y)
g.ctx.Clear(0, 0, 0, 0)
g.ctx.BindProgram(m.prog)
g.ctx.BindVertexBuffer(m.buffer, int(unsafe.Sizeof(m.quads[0])), 0)
g.ctx.BindInputLayout(m.layout)
g.ctx.DrawArrays(backend.DrawModeTriangles, 0, len(m.quads))
return nil
}
func pow2Ceil(v int) int {
exp := bits.Len(uint(v))
if bits.OnesCount(uint(v)) == 1 {
exp--
}
return 1 << exp
}
// addMaterialQuad appends a render of an image to materials and returns the pixel offset
// that maps the material texture to the correct position in the rendered image.
func (g *compute) addMaterialQuad(M f32.Affine2D, img imageOpData) (image.Point, error) {
imgSize := layout.FPt(img.src.Bounds().Size())
sx, hx, ox, hy, sy, oy := M.Elems()
transOff := f32.Pt(ox, oy)
// The 4 corners of the image rectangle transformed by M, excluding its offset, are:
//
// q0: M * (0, 0) q3: M * (w, 0)
// q1: M * (0, h) q2: M * (w, h)
//
// Note that q0 = M*0 = 0, q2 = q1 + q3.
q0 := f32.Pt(0, 0)
q1 := f32.Pt(hx*imgSize.Y, sy*imgSize.Y)
q3 := f32.Pt(sx*imgSize.X, hy*imgSize.X)
q2 := q1.Add(q3)
q0 = q0.Add(transOff)
q1 = q1.Add(transOff)
q2 = q2.Add(transOff)
q3 = q3.Add(transOff)
boundsf := f32.Rectangle{
Min: min(min(q0, q1), min(q2, q3)),
Max: max(max(q0, q1), max(q2, q3)),
}
bounds := boundRectF(boundsf)
size := bounds.Size()
// A material is clipped to avoid drawing outside its bounds inside the atlas. However,
// imprecision in the clipping may cause a single pixel overflow. Be safe.
size = size.Add(image.Pt(1, 1))
place, fits := g.materials.packer.tryAdd(size)
if !fits {
return image.Point{}, errors.New("compute: no space left in image atlas")
}
// Position quad to match place.
offset := place.Pos.Sub(bounds.Min)
offsetf := layout.FPt(offset)
q0 = q0.Add(offsetf)
q1 = q1.Add(offsetf)
q2 = q2.Add(offsetf)
q3 = q3.Add(offsetf)
uvPos, ok := g.images.positions[img.handle]
if !ok {
panic("compute: internal error: image not placed")
}
uvPosf := layout.FPt(uvPos)
atlasScale := 1 / float32(g.images.packer.maxDim)
uvBounds := f32.Rectangle{
Min: uvPosf.Mul(atlasScale),
Max: uvPosf.Add(imgSize).Mul(atlasScale),
}
quad := [4]materialVertex{
{posX: q0.X, posY: q0.Y, u: uvBounds.Min.X, v: uvBounds.Min.Y},
{posX: q1.X, posY: q1.Y, u: uvBounds.Min.X, v: uvBounds.Max.Y},
{posX: q2.X, posY: q2.Y, u: uvBounds.Max.X, v: uvBounds.Max.Y},
{posX: q3.X, posY: q3.Y, u: uvBounds.Max.X, v: uvBounds.Min.Y},
}
// Draw quad as two triangles.
g.materials.quads = append(g.materials.quads, quad[0], quad[1], quad[3], quad[3], quad[1], quad[2])
return offset, nil
}
func max(p1, p2 f32.Point) f32.Point {
p := p1
if p2.X > p.X {
p.X = p2.X
}
if p2.Y > p.Y {
p.Y = p2.Y
}
return p
}
func min(p1, p2 f32.Point) f32.Point {
p := p1
if p2.X < p.X {
p.X = p2.X
}
if p2.Y < p.Y {
p.Y = p2.Y
}
return p
}
func (g *compute) encodeOps(trans f32.Affine2D, viewport image.Point, ops []imageOp) error {
for _, op := range ops {
bounds := layout.FRect(op.clip)
// clip is the union of all drawing affected by the clipping
// operation. TODO: tigthen.
// operation. TODO: tighten.
clip := f32.Rect(0, 0, float32(viewport.X), float32(viewport.Y))
nclips := g.encodeClipStack(clip, bounds, op.path)
m := op.material
switch m.material {
case materialTexture:
img := m.data
pos, ok := g.images.positions[img.handle]
if !ok {
panic("compute: internal error: image not placed")
t := trans.Mul(m.trans)
off, err := g.addMaterialQuad(t, m.data)
if err != nil {
return err
}
bounds := image.Rectangle{
Min: pos,
Max: pos.Add(img.src.Bounds().Size()),
}
maxDim := g.images.packer.maxDim
atlasSize := f32.Pt(float32(maxDim), float32(maxDim))
uvBounds := f32.Rectangle{
Min: f32.Point{
X: float32(bounds.Min.X) / atlasSize.X,
Y: float32(bounds.Min.Y) / atlasSize.Y,
},
Max: f32.Point{
X: float32(bounds.Max.X) / atlasSize.X,
Y: float32(bounds.Max.Y) / atlasSize.Y,
},
}
fpos := layout.FPt(pos)
texScale := f32.Pt(1.0/atlasSize.X, 1.0/atlasSize.Y)
mat := f32.Affine2D{}.
Mul(trans.Invert()).
Mul(f32.Affine2D{}.Scale(f32.Pt(0, 0), texScale)).
Mul(f32.Affine2D{}.Offset(fpos)).
Mul(trans.Mul(m.trans).Invert())
g.enc.transform(mat)
g.enc.fillTexture(uvBounds)
g.enc.transform(mat.Invert())
g.enc.fillImage(0, off)
case materialColor:
g.enc.fill(f32color.NRGBAToRGBA(op.material.color.SRGB()))
case materialLinearGradient:
@@ -434,6 +618,7 @@ func (g *compute) encodeOps(trans f32.Affine2D, viewport image.Point, ops []imag
g.enc.endClip(clip)
}
}
return nil
}
// encodeClips encodes a stack of clip paths and return the stack depth.
@@ -519,8 +704,8 @@ func (g *compute) render(tileDims image.Point) error {
}
}
g.ctx.BindImageTexture(kernel4OutputUnit, g.output.image, backend.AccessWrite, backend.TextureFormatRGBA8)
if g.images.tex != nil {
g.ctx.BindTexture(kernel4AtlasUnit, g.images.tex)
if t := g.materials.tex; t != nil {
g.ctx.BindImageTexture(kernel4AtlasUnit, t, backend.AccessRead, backend.TextureFormatRGBA8)
}
// alloc is the number of allocated bytes for static buffers.
@@ -663,8 +848,12 @@ func (g *compute) resizeOutput(size image.Point) error {
}
func (g *compute) Release() {
g.drawOps.pathCache.release()
g.cache.release()
if g.drawOps.pathCache != nil {
g.drawOps.pathCache.release()
}
if g.cache != nil {
g.cache.release()
}
progs := []backend.Program{
g.programs.elements,
g.programs.tileAlloc,
@@ -694,6 +883,21 @@ func (g *compute) Release() {
if g.images.tex != nil {
g.images.tex.Release()
}
if g.materials.layout != nil {
g.materials.layout.Release()
}
if g.materials.prog != nil {
g.materials.prog.Release()
}
if g.materials.fbo != nil {
g.materials.fbo.Release()
}
if g.materials.tex != nil {
g.materials.tex.Release()
}
if g.materials.buffer != nil {
g.materials.buffer.Release()
}
if g.timers.t != nil {
g.timers.t.release()
}
@@ -828,15 +1032,13 @@ func (e *encoder) fill(col color.RGBA) {
e.npath++
}
func (e *encoder) fillTexture(uvBounds f32.Rectangle) {
func (e *encoder) fillImage(index int, offset image.Point) {
cmd := make([]byte, sceneElemSize)
bo.PutUint32(cmd, elemFillTexture)
umin := uint16(uvBounds.Min.X*math.MaxUint16 + .5)
vmin := uint16(uvBounds.Min.Y*math.MaxUint16 + .5)
umax := uint16(uvBounds.Max.X*math.MaxUint16 + .5)
vmax := uint16(uvBounds.Max.Y*math.MaxUint16 + .5)
bo.PutUint32(cmd[4:8], uint32(umin)|uint32(vmin)<<16)
bo.PutUint32(cmd[8:12], uint32(umax)|uint32(vmax)<<16)
bo.PutUint32(cmd, elemFillImage)
x := int16(offset.X)
y := int16(offset.Y)
bo.PutUint32(cmd[4:8], uint32(index))
bo.PutUint32(cmd[8:12], uint32(uint16(x))|uint32(uint16(y))<<16)
e.cmd(cmd)
e.npath++
}
+105 -7
View File
File diff suppressed because one or more lines are too long
+21 -35
View File
@@ -6,7 +6,7 @@ struct AnnoFillRef {
uint offset;
};
struct AnnoFillTextureRef {
struct AnnoFillImageRef {
uint offset;
};
@@ -33,17 +33,16 @@ AnnoFillRef AnnoFill_index(AnnoFillRef ref, uint index) {
return AnnoFillRef(ref.offset + index * AnnoFill_size);
}
struct AnnoFillTexture {
struct AnnoFillImage {
vec4 bbox;
vec4 mat;
vec2 translate;
uvec2 uv_bounds;
uint index;
ivec2 offset;
};
#define AnnoFillTexture_size 48
#define AnnoFillImage_size 24
AnnoFillTextureRef AnnoFillTexture_index(AnnoFillTextureRef ref, uint index) {
return AnnoFillTextureRef(ref.offset + index * AnnoFillTexture_size);
AnnoFillImageRef AnnoFillImage_index(AnnoFillImageRef ref, uint index) {
return AnnoFillImageRef(ref.offset + index * AnnoFillImage_size);
}
struct AnnoStroke {
@@ -71,10 +70,10 @@ AnnoClipRef AnnoClip_index(AnnoClipRef ref, uint index) {
#define Annotated_Nop 0
#define Annotated_Stroke 1
#define Annotated_Fill 2
#define Annotated_FillTexture 3
#define Annotated_FillImage 3
#define Annotated_BeginClip 4
#define Annotated_EndClip 5
#define Annotated_size 52
#define Annotated_size 28
AnnotatedRef Annotated_index(AnnotatedRef ref, uint index) {
return AnnotatedRef(ref.offset + index * Annotated_size);
@@ -102,7 +101,7 @@ void AnnoFill_write(Alloc a, AnnoFillRef ref, AnnoFill s) {
write_mem(a, ix + 4, s.rgba_color);
}
AnnoFillTexture AnnoFillTexture_read(Alloc a, AnnoFillTextureRef ref) {
AnnoFillImage AnnoFillImage_read(Alloc a, AnnoFillImageRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
@@ -110,34 +109,21 @@ AnnoFillTexture AnnoFillTexture_read(Alloc a, AnnoFillTextureRef ref) {
uint raw3 = read_mem(a, ix + 3);
uint raw4 = read_mem(a, ix + 4);
uint raw5 = read_mem(a, ix + 5);
uint raw6 = read_mem(a, ix + 6);
uint raw7 = read_mem(a, ix + 7);
uint raw8 = read_mem(a, ix + 8);
uint raw9 = read_mem(a, ix + 9);
uint raw10 = read_mem(a, ix + 10);
uint raw11 = read_mem(a, ix + 11);
AnnoFillTexture s;
AnnoFillImage s;
s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.mat = vec4(uintBitsToFloat(raw4), uintBitsToFloat(raw5), uintBitsToFloat(raw6), uintBitsToFloat(raw7));
s.translate = vec2(uintBitsToFloat(raw8), uintBitsToFloat(raw9));
s.uv_bounds = uvec2(raw10, raw11);
s.index = raw4;
s.offset = ivec2(int(raw5 << 16) >> 16, int(raw5) >> 16);
return s;
}
void AnnoFillTexture_write(Alloc a, AnnoFillTextureRef ref, AnnoFillTexture s) {
void AnnoFillImage_write(Alloc a, AnnoFillImageRef ref, AnnoFillImage s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, floatBitsToUint(s.bbox.x));
write_mem(a, ix + 1, floatBitsToUint(s.bbox.y));
write_mem(a, ix + 2, floatBitsToUint(s.bbox.z));
write_mem(a, ix + 3, floatBitsToUint(s.bbox.w));
write_mem(a, ix + 4, floatBitsToUint(s.mat.x));
write_mem(a, ix + 5, floatBitsToUint(s.mat.y));
write_mem(a, ix + 6, floatBitsToUint(s.mat.z));
write_mem(a, ix + 7, floatBitsToUint(s.mat.w));
write_mem(a, ix + 8, floatBitsToUint(s.translate.x));
write_mem(a, ix + 9, floatBitsToUint(s.translate.y));
write_mem(a, ix + 10, s.uv_bounds.x);
write_mem(a, ix + 11, s.uv_bounds.y);
write_mem(a, ix + 4, s.index);
write_mem(a, ix + 5, (uint(s.offset.x) & 0xffff) | (uint(s.offset.y) << 16));
}
AnnoStroke AnnoStroke_read(Alloc a, AnnoStrokeRef ref) {
@@ -196,8 +182,8 @@ AnnoFill Annotated_Fill_read(Alloc a, AnnotatedRef ref) {
return AnnoFill_read(a, AnnoFillRef(ref.offset + 4));
}
AnnoFillTexture Annotated_FillTexture_read(Alloc a, AnnotatedRef ref) {
return AnnoFillTexture_read(a, AnnoFillTextureRef(ref.offset + 4));
AnnoFillImage Annotated_FillImage_read(Alloc a, AnnotatedRef ref) {
return AnnoFillImage_read(a, AnnoFillImageRef(ref.offset + 4));
}
AnnoClip Annotated_BeginClip_read(Alloc a, AnnotatedRef ref) {
@@ -222,9 +208,9 @@ void Annotated_Fill_write(Alloc a, AnnotatedRef ref, AnnoFill s) {
AnnoFill_write(a, AnnoFillRef(ref.offset + 4), s);
}
void Annotated_FillTexture_write(Alloc a, AnnotatedRef ref, AnnoFillTexture s) {
write_mem(a, ref.offset >> 2, Annotated_FillTexture);
AnnoFillTexture_write(a, AnnoFillTextureRef(ref.offset + 4), s);
void Annotated_FillImage_write(Alloc a, AnnotatedRef ref, AnnoFillImage s) {
write_mem(a, ref.offset >> 2, Annotated_FillImage);
AnnoFillImage_write(a, AnnoFillImageRef(ref.offset + 4), s);
}
void Annotated_BeginClip_write(Alloc a, AnnotatedRef ref, AnnoClip s) {
+1 -1
View File
@@ -49,7 +49,7 @@ void main() {
uint tag = Annotated_tag(conf.anno_alloc, ref);
switch (tag) {
case Annotated_Fill:
case Annotated_FillTexture:
case Annotated_FillImage:
case Annotated_BeginClip:
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
Path path = Path_read(conf.tile_alloc, path_ref);
+1 -1
View File
@@ -61,7 +61,7 @@ void main() {
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
switch (tag) {
case Annotated_Fill:
case Annotated_FillTexture:
case Annotated_FillImage:
case Annotated_Stroke:
case Annotated_BeginClip:
case Annotated_EndClip:
+13 -15
View File
@@ -203,7 +203,7 @@ void main() {
uint tile_count;
switch (tag) {
case Annotated_Fill:
case Annotated_FillTexture:
case Annotated_FillImage:
case Annotated_Stroke:
case Annotated_BeginClip:
case Annotated_EndClip:
@@ -327,27 +327,25 @@ void main() {
}
cmd_ref.offset += Cmd_size;
break;
case Annotated_FillTexture:
case Annotated_FillImage:
tile = Tile_read(read_tile_alloc(element_ref_ix), TileRef(sh_tile_base[element_ref_ix]
+ (sh_tile_stride[element_ref_ix] * tile_y + tile_x) * Tile_size));
AnnoFillTexture fill_tex = Annotated_FillTexture_read(conf.anno_alloc, ref);
AnnoFillImage fill_img = Annotated_FillImage_read(conf.anno_alloc, ref);
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
break;
}
if (tile.tile.offset != 0) {
CmdFillTexture cmd_fill_tex;
cmd_fill_tex.tile_ref = tile.tile.offset;
cmd_fill_tex.backdrop = tile.backdrop;
cmd_fill_tex.mat = fill_tex.mat;
cmd_fill_tex.translate = fill_tex.translate;
cmd_fill_tex.uv_bounds = fill_tex.uv_bounds;
Cmd_FillTexture_write(cmd_alloc, cmd_ref, cmd_fill_tex);
CmdFillImage cmd_fill_img;
cmd_fill_img.tile_ref = tile.tile.offset;
cmd_fill_img.backdrop = tile.backdrop;
cmd_fill_img.index = fill_img.index;
cmd_fill_img.offset = fill_img.offset;
Cmd_FillImage_write(cmd_alloc, cmd_ref, cmd_fill_img);
} else {
CmdSolidTexture cmd_solid_tex;
cmd_solid_tex.mat = fill_tex.mat;
cmd_solid_tex.translate = fill_tex.translate;
cmd_solid_tex.uv_bounds = fill_tex.uv_bounds;
Cmd_SolidTexture_write(cmd_alloc, cmd_ref, cmd_solid_tex);
CmdSolidImage cmd_solid_img;
cmd_solid_img.index = fill_img.index;
cmd_solid_img.offset = fill_img.offset;
Cmd_SolidImage_write(cmd_alloc, cmd_ref, cmd_solid_img);
}
cmd_ref.offset += Cmd_size;
break;
+8 -9
View File
@@ -129,7 +129,7 @@ State map_element(ElementRef ref) {
c.pathseg_count = 1;
break;
case Element_Fill:
case Element_FillTexture:
case Element_FillImage:
case Element_Stroke:
case Element_BeginClip:
c.flags = FLAG_RESET_BBOX;
@@ -411,15 +411,14 @@ void main() {
out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
Annotated_Fill_write(conf.anno_alloc, out_ref, anno_fill);
break;
case Element_FillTexture:
FillTexture fill_tex = Element_FillTexture_read(this_ref);
AnnoFillTexture anno_fill_tex;
anno_fill_tex.uv_bounds = fill_tex.uv_bounds;
anno_fill_tex.bbox = st.bbox;
anno_fill_tex.mat = st.mat;
anno_fill_tex.translate = st.translate;
case Element_FillImage:
FillImage fill_img = Element_FillImage_read(this_ref);
AnnoFillImage anno_fill_img;
anno_fill_img.index = fill_img.index;
anno_fill_img.offset = fill_img.offset;
anno_fill_img.bbox = st.bbox;
out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
Annotated_FillTexture_write(conf.anno_alloc, out_ref, anno_fill_tex);
Annotated_FillImage_write(conf.anno_alloc, out_ref, anno_fill_img);
break;
case Element_BeginClip:
Clip begin_clip = Element_BeginClip_read(this_ref);
+36 -40
View File
@@ -8,9 +8,7 @@
#version 450
#extension GL_GOOGLE_include_directive : enable
#ifdef VULKAN
#extension GL_EXT_nonuniform_qualifier : enable
#endif
#include "mem.h"
#include "setup.h"
@@ -25,10 +23,10 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf {
layout(rgba8, set = 0, binding = 2) uniform writeonly image2D image;
#ifdef VULKAN
layout(set = 0, binding = 3) uniform sampler2D textures[];
#if GL_EXT_nonuniform_qualifier
layout(rgba8, set = 0, binding = 3) uniform readonly image2D images[];
#else
layout(set = 0, binding = 3) uniform sampler2D atlas;
layout(rgba8, set = 0, binding = 3) uniform readonly image2D images[1];
#endif
#include "ptcl.h"
@@ -92,25 +90,6 @@ float[CHUNK] computeArea(vec2 xy, int backdrop, uint tile_ref) {
return area;
}
vec4[CHUNK] fillTexture(vec2 xy, CmdSolidTexture cmd_tex) {
vec2 uvmin = unpackUnorm2x16(cmd_tex.uv_bounds.x);
vec2 uvmax = unpackUnorm2x16(cmd_tex.uv_bounds.y);
vec4 rgba[CHUNK];
for (uint i = 0; i < CHUNK; i++) {
float dy = float(i * CHUNK_DY);
vec2 uv = vec2(xy.x, xy.y + dy) + vec2(0.5, 0.5);
uv = cmd_tex.mat.xy * uv.x + cmd_tex.mat.zw * uv.y + cmd_tex.translate;
uv = clamp(uv, uvmin, uvmax);
#ifdef VULKAN
vec4 fg_rgba = textureGrad(textures[0], uv, cmd_tex.mat.xy, cmd_tex.mat.zw);
#else
vec4 fg_rgba = textureGrad(atlas, uv, cmd_tex.mat.xy, cmd_tex.mat.zw);
#endif
rgba[i] = fg_rgba;
}
return rgba;
}
vec3 tosRGB(vec3 rgb) {
bvec3 cutoff = greaterThanEqual(rgb, vec3(0.0031308));
vec3 below = vec3(12.92)*rgb;
@@ -118,17 +97,19 @@ vec3 tosRGB(vec3 rgb) {
return mix(below, above, cutoff);
}
vec3 fromsRGB(vec3 srgb) {
// Formula from EXT_sRGB.
bvec3 cutoff = greaterThanEqual(srgb, vec3(0.04045));
vec3 below = srgb/vec3(12.92);
vec3 above = pow((srgb + vec3(0.055))/vec3(1.055), vec3(2.4));
return mix(below, above, cutoff);
}
// unpacksRGB unpacks a color in the sRGB color space to a vec4 in the linear color
// space.
vec4 unpacksRGB(uint srgba) {
vec4 color = unpackUnorm4x8(srgba).wzyx;
// Formula from EXT_sRGB.
vec3 rgb = color.rgb;
bvec3 cutoff = greaterThanEqual(rgb, vec3(0.04045));
vec3 below = rgb/vec3(12.92);
vec3 above = pow((rgb + vec3(0.055))/vec3(1.055), vec3(2.4));
rgb = mix(below, above, cutoff);
return vec4(rgb, color.a);
return vec4(fromsRGB(color.rgb), color.a);
}
// packsRGB packs a color in the linear color space into its 8-bit sRGB equivalent.
@@ -137,6 +118,21 @@ uint packsRGB(vec4 rgba) {
return packUnorm4x8(rgba.wzyx);
}
vec4[CHUNK] fillImage(uvec2 xy, CmdSolidImage cmd_img) {
vec4 rgba[CHUNK];
for (uint i = 0; i < CHUNK; i++) {
ivec2 uv = ivec2(xy.x, xy.y + i * CHUNK_DY) + cmd_img.offset;
#ifdef ENABLE_IMAGE_INDICES
vec4 fg_rgba = imageLoad(images[cmd_img.index], uv);
#else
vec4 fg_rgba = imageLoad(images[0], uv);
#endif
fg_rgba.rgb = fromsRGB(fg_rgba.rgb);
rgba[i] = fg_rgba;
}
return rgba;
}
void main() {
if (mem_error != NO_ERROR) {
return;
@@ -156,9 +152,9 @@ void main() {
Alloc clip_tos = new_alloc(0, 0);
for (uint i = 0; i < CHUNK; i++) {
rgb[i] = vec3(0.5);
#ifdef VULKAN
#ifdef ENABLE_IMAGE_INDICES
if (xy_uint.x < 1024 && xy_uint.y < 1024) {
rgb[i] = texture(textures[gl_WorkGroupID.x / 64], vec2(xy_uint.x, xy_uint.y + CHUNK_DY * i) / 1024.0).rgb;
rgb[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i)/4).rgb;
}
#endif
mask[i] = 1.0;
@@ -212,10 +208,10 @@ void main() {
rgb[k] = mix(rgb[k], fg_rgba.rgb, mask[k] * area[k] * fg_rgba.a);
}
break;
case Cmd_FillTexture:
CmdFillTexture fill_tex = Cmd_FillTexture_read(cmd_alloc, cmd_ref);
area = computeArea(xy, fill_tex.backdrop, fill_tex.tile_ref);
vec4 rgba[CHUNK] = fillTexture(xy, CmdSolidTexture(fill_tex.mat, fill_tex.translate, fill_tex.uv_bounds));
case Cmd_FillImage:
CmdFillImage fill_img = Cmd_FillImage_read(cmd_alloc, cmd_ref);
area = computeArea(xy, fill_img.backdrop, fill_img.tile_ref);
vec4 rgba[CHUNK] = fillImage(xy_uint, CmdSolidImage(fill_img.index, fill_img.offset));
for (uint k = 0; k < CHUNK; k++) {
rgb[k] = mix(rgb[k], rgba[k].rgb, mask[k] * area[k] * rgba[k].a);
}
@@ -275,9 +271,9 @@ void main() {
rgb[k] = mix(rgb[k], fg_rgba.rgb, mask[k] * fg_rgba.a);
}
break;
case Cmd_SolidTexture:
CmdSolidTexture solid_tex = Cmd_SolidTexture_read(cmd_alloc, cmd_ref);
rgba = fillTexture(xy, solid_tex);
case Cmd_SolidImage:
CmdSolidImage solid_img = Cmd_SolidImage_read(cmd_alloc, cmd_ref);
rgba = fillImage(xy_uint, solid_img);
for (uint k = 0; k < CHUNK; k++) {
rgb[k] = mix(rgb[k], rgba[k].rgb, mask[k] * rgba[k].a);
}
+24
View File
@@ -0,0 +1,24 @@
#version 310 es
// SPDX-License-Identifier: Unlicense OR MIT
precision mediump float;
layout(binding = 0) uniform sampler2D tex;
layout(location = 0) in vec2 vUV;
layout(location = 0) out vec4 fragColor;
vec3 RGBtosRGB(vec3 rgb) {
bvec3 cutoff = greaterThanEqual(rgb, vec3(0.0031308));
vec3 below = vec3(12.92)*rgb;
vec3 above = vec3(1.055)*pow(rgb, vec3(0.41666)) - vec3(0.055);
return mix(below, above, cutoff);
}
void main() {
vec4 texel = texture(tex, vUV);
texel.rgb = RGBtosRGB(texel.rgb);
fragColor = texel;
}
+15
View File
@@ -0,0 +1,15 @@
#version 310 es
// SPDX-License-Identifier: Unlicense OR MIT
precision highp float;
layout(location = 0) in vec2 pos;
layout(location = 1) in vec2 uv;
layout(location = 0) out vec2 vUV;
void main() {
vUV = uv;
gl_Position = vec4(pos, 0, 1);
}
+41 -69
View File
@@ -18,7 +18,7 @@ struct CmdFillRef {
uint offset;
};
struct CmdFillTextureRef {
struct CmdFillImageRef {
uint offset;
};
@@ -38,7 +38,7 @@ struct CmdSolidRef {
uint offset;
};
struct CmdSolidTextureRef {
struct CmdSolidImageRef {
uint offset;
};
@@ -101,18 +101,17 @@ CmdFillRef CmdFill_index(CmdFillRef ref, uint index) {
return CmdFillRef(ref.offset + index * CmdFill_size);
}
struct CmdFillTexture {
struct CmdFillImage {
uint tile_ref;
int backdrop;
vec4 mat;
vec2 translate;
uvec2 uv_bounds;
uint index;
ivec2 offset;
};
#define CmdFillTexture_size 40
#define CmdFillImage_size 16
CmdFillTextureRef CmdFillTexture_index(CmdFillTextureRef ref, uint index) {
return CmdFillTextureRef(ref.offset + index * CmdFillTexture_size);
CmdFillImageRef CmdFillImage_index(CmdFillImageRef ref, uint index) {
return CmdFillImageRef(ref.offset + index * CmdFillImage_size);
}
struct CmdBeginClip {
@@ -156,16 +155,15 @@ CmdSolidRef CmdSolid_index(CmdSolidRef ref, uint index) {
return CmdSolidRef(ref.offset + index * CmdSolid_size);
}
struct CmdSolidTexture {
vec4 mat;
vec2 translate;
uvec2 uv_bounds;
struct CmdSolidImage {
uint index;
ivec2 offset;
};
#define CmdSolidTexture_size 32
#define CmdSolidImage_size 8
CmdSolidTextureRef CmdSolidTexture_index(CmdSolidTextureRef ref, uint index) {
return CmdSolidTextureRef(ref.offset + index * CmdSolidTexture_size);
CmdSolidImageRef CmdSolidImage_index(CmdSolidImageRef ref, uint index) {
return CmdSolidImageRef(ref.offset + index * CmdSolidImage_size);
}
struct CmdSolidMask {
@@ -192,16 +190,16 @@ CmdJumpRef CmdJump_index(CmdJumpRef ref, uint index) {
#define Cmd_Circle 1
#define Cmd_Line 2
#define Cmd_Fill 3
#define Cmd_FillTexture 4
#define Cmd_FillImage 4
#define Cmd_BeginClip 5
#define Cmd_BeginSolidClip 6
#define Cmd_EndClip 7
#define Cmd_Stroke 8
#define Cmd_Solid 9
#define Cmd_SolidMask 10
#define Cmd_SolidTexture 11
#define Cmd_SolidImage 11
#define Cmd_Jump 12
#define Cmd_size 44
#define Cmd_size 20
CmdRef Cmd_index(CmdRef ref, uint index) {
return CmdRef(ref.offset + index * Cmd_size);
@@ -286,39 +284,26 @@ void CmdFill_write(Alloc a, CmdFillRef ref, CmdFill s) {
write_mem(a, ix + 2, s.rgba_color);
}
CmdFillTexture CmdFillTexture_read(Alloc a, CmdFillTextureRef ref) {
CmdFillImage CmdFillImage_read(Alloc a, CmdFillImageRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
uint raw4 = read_mem(a, ix + 4);
uint raw5 = read_mem(a, ix + 5);
uint raw6 = read_mem(a, ix + 6);
uint raw7 = read_mem(a, ix + 7);
uint raw8 = read_mem(a, ix + 8);
uint raw9 = read_mem(a, ix + 9);
CmdFillTexture s;
CmdFillImage s;
s.tile_ref = raw0;
s.backdrop = int(raw1);
s.mat = vec4(uintBitsToFloat(raw2), uintBitsToFloat(raw3), uintBitsToFloat(raw4), uintBitsToFloat(raw5));
s.translate = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));
s.uv_bounds = uvec2(raw8, raw9);
s.index = raw2;
s.offset = ivec2(int(raw3 << 16) >> 16, int(raw3) >> 16);
return s;
}
void CmdFillTexture_write(Alloc a, CmdFillTextureRef ref, CmdFillTexture s) {
void CmdFillImage_write(Alloc a, CmdFillImageRef ref, CmdFillImage s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, s.tile_ref);
write_mem(a, ix + 1, uint(s.backdrop));
write_mem(a, ix + 2, floatBitsToUint(s.mat.x));
write_mem(a, ix + 3, floatBitsToUint(s.mat.y));
write_mem(a, ix + 4, floatBitsToUint(s.mat.z));
write_mem(a, ix + 5, floatBitsToUint(s.mat.w));
write_mem(a, ix + 6, floatBitsToUint(s.translate.x));
write_mem(a, ix + 7, floatBitsToUint(s.translate.y));
write_mem(a, ix + 8, s.uv_bounds.x);
write_mem(a, ix + 9, s.uv_bounds.y);
write_mem(a, ix + 2, s.index);
write_mem(a, ix + 3, (uint(s.offset.x) & 0xffff) | (uint(s.offset.y) << 16));
}
CmdBeginClip CmdBeginClip_read(Alloc a, CmdBeginClipRef ref) {
@@ -376,33 +361,20 @@ void CmdSolid_write(Alloc a, CmdSolidRef ref, CmdSolid s) {
write_mem(a, ix + 0, s.rgba_color);
}
CmdSolidTexture CmdSolidTexture_read(Alloc a, CmdSolidTextureRef ref) {
CmdSolidImage CmdSolidImage_read(Alloc a, CmdSolidImageRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
uint raw4 = read_mem(a, ix + 4);
uint raw5 = read_mem(a, ix + 5);
uint raw6 = read_mem(a, ix + 6);
uint raw7 = read_mem(a, ix + 7);
CmdSolidTexture s;
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
s.uv_bounds = uvec2(raw6, raw7);
CmdSolidImage s;
s.index = raw0;
s.offset = ivec2(int(raw1 << 16) >> 16, int(raw1) >> 16);
return s;
}
void CmdSolidTexture_write(Alloc a, CmdSolidTextureRef ref, CmdSolidTexture s) {
void CmdSolidImage_write(Alloc a, CmdSolidImageRef ref, CmdSolidImage s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, floatBitsToUint(s.mat.x));
write_mem(a, ix + 1, floatBitsToUint(s.mat.y));
write_mem(a, ix + 2, floatBitsToUint(s.mat.z));
write_mem(a, ix + 3, floatBitsToUint(s.mat.w));
write_mem(a, ix + 4, floatBitsToUint(s.translate.x));
write_mem(a, ix + 5, floatBitsToUint(s.translate.y));
write_mem(a, ix + 6, s.uv_bounds.x);
write_mem(a, ix + 7, s.uv_bounds.y);
write_mem(a, ix + 0, s.index);
write_mem(a, ix + 1, (uint(s.offset.x) & 0xffff) | (uint(s.offset.y) << 16));
}
CmdSolidMask CmdSolidMask_read(Alloc a, CmdSolidMaskRef ref) {
@@ -447,8 +419,8 @@ CmdFill Cmd_Fill_read(Alloc a, CmdRef ref) {
return CmdFill_read(a, CmdFillRef(ref.offset + 4));
}
CmdFillTexture Cmd_FillTexture_read(Alloc a, CmdRef ref) {
return CmdFillTexture_read(a, CmdFillTextureRef(ref.offset + 4));
CmdFillImage Cmd_FillImage_read(Alloc a, CmdRef ref) {
return CmdFillImage_read(a, CmdFillImageRef(ref.offset + 4));
}
CmdBeginClip Cmd_BeginClip_read(Alloc a, CmdRef ref) {
@@ -475,8 +447,8 @@ CmdSolidMask Cmd_SolidMask_read(Alloc a, CmdRef ref) {
return CmdSolidMask_read(a, CmdSolidMaskRef(ref.offset + 4));
}
CmdSolidTexture Cmd_SolidTexture_read(Alloc a, CmdRef ref) {
return CmdSolidTexture_read(a, CmdSolidTextureRef(ref.offset + 4));
CmdSolidImage Cmd_SolidImage_read(Alloc a, CmdRef ref) {
return CmdSolidImage_read(a, CmdSolidImageRef(ref.offset + 4));
}
CmdJump Cmd_Jump_read(Alloc a, CmdRef ref) {
@@ -502,9 +474,9 @@ void Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s) {
CmdFill_write(a, CmdFillRef(ref.offset + 4), s);
}
void Cmd_FillTexture_write(Alloc a, CmdRef ref, CmdFillTexture s) {
write_mem(a, ref.offset >> 2, Cmd_FillTexture);
CmdFillTexture_write(a, CmdFillTextureRef(ref.offset + 4), s);
void Cmd_FillImage_write(Alloc a, CmdRef ref, CmdFillImage s) {
write_mem(a, ref.offset >> 2, Cmd_FillImage);
CmdFillImage_write(a, CmdFillImageRef(ref.offset + 4), s);
}
void Cmd_BeginClip_write(Alloc a, CmdRef ref, CmdBeginClip s) {
@@ -537,9 +509,9 @@ void Cmd_SolidMask_write(Alloc a, CmdRef ref, CmdSolidMask s) {
CmdSolidMask_write(a, CmdSolidMaskRef(ref.offset + 4), s);
}
void Cmd_SolidTexture_write(Alloc a, CmdRef ref, CmdSolidTexture s) {
write_mem(a, ref.offset >> 2, Cmd_SolidTexture);
CmdSolidTexture_write(a, CmdSolidTextureRef(ref.offset + 4), s);
void Cmd_SolidImage_write(Alloc a, CmdRef ref, CmdSolidImage s) {
write_mem(a, ref.offset >> 2, Cmd_SolidImage);
CmdSolidImage_write(a, CmdSolidImageRef(ref.offset + 4), s);
}
void Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s) {
+14 -12
View File
@@ -18,7 +18,7 @@ struct FillRef {
uint offset;
};
struct FillTextureRef {
struct FillImageRef {
uint offset;
};
@@ -88,14 +88,15 @@ FillRef Fill_index(FillRef ref, uint index) {
return FillRef(ref.offset + index * Fill_size);
}
struct FillTexture {
uvec2 uv_bounds;
struct FillImage {
uint index;
ivec2 offset;
};
#define FillTexture_size 8
#define FillImage_size 8
FillTextureRef FillTexture_index(FillTextureRef ref, uint index) {
return FillTextureRef(ref.offset + index * FillTexture_size);
FillImageRef FillImage_index(FillImageRef ref, uint index) {
return FillImageRef(ref.offset + index * FillImage_size);
}
struct Stroke {
@@ -152,7 +153,7 @@ ClipRef Clip_index(ClipRef ref, uint index) {
#define Element_Transform 10
#define Element_BeginClip 11
#define Element_EndClip 12
#define Element_FillTexture 13
#define Element_FillImage 13
#define Element_size 36
ElementRef Element_index(ElementRef ref, uint index) {
@@ -212,12 +213,13 @@ Fill Fill_read(FillRef ref) {
return s;
}
FillTexture FillTexture_read(FillTextureRef ref) {
FillImage FillImage_read(FillImageRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = scene[ix + 0];
uint raw1 = scene[ix + 1];
FillTexture s;
s.uv_bounds = uvec2(raw0, raw1);
FillImage s;
s.index = raw0;
s.offset = ivec2(int(raw1 << 16) >> 16, int(raw1) >> 16);
return s;
}
@@ -314,7 +316,7 @@ Clip Element_EndClip_read(ElementRef ref) {
return Clip_read(ClipRef(ref.offset + 4));
}
FillTexture Element_FillTexture_read(ElementRef ref) {
return FillTexture_read(FillTextureRef(ref.offset + 4));
FillImage Element_FillImage_read(ElementRef ref) {
return FillImage_read(FillImageRef(ref.offset + 4));
}
+1 -1
View File
@@ -44,7 +44,7 @@ void main() {
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
switch (tag) {
case Annotated_Fill:
case Annotated_FillTexture:
case Annotated_FillImage:
case Annotated_Stroke:
case Annotated_BeginClip:
case Annotated_EndClip: