gpu: eliminate gaps by ensuring consistent transformations

This is another attempt at fixing the issue described in [0], the
previous attempt was reverted[1].

This change fixes the issue by tracking resolved transformations and
ensure that all segments within a path share a single transformation.

[0] https://github.com/linebender/piet-gpu/issues/62
[1] https://gioui.org/commit/2b21b48a7c5c4451deb642c164548a134bb9ad06

Signed-off-by: Elias Naur <mail@eliasnaur.com>
This commit is contained in:
Elias Naur
2021-03-15 09:55:56 +01:00
parent 9cb9e67a8e
commit 258033d0b0
8 changed files with 116 additions and 77 deletions
+9 -2
View File
@@ -126,6 +126,7 @@ type encoder struct {
scene []scene.Command
npath int
npathseg int
ntrans int
}
type encodeState struct {
@@ -149,6 +150,7 @@ type config struct {
ptcl_alloc memAlloc
pathseg_alloc memAlloc
anno_alloc memAlloc
trans_alloc memAlloc
}
// memAlloc matches Alloc in mem.h
@@ -173,9 +175,10 @@ const (
pathSize = 12
binSize = 8
pathsegSize = 48
pathsegSize = 52
annoSize = 28
stateSize = 56
transSize = 24
stateSize = 60
stateStride = 4 + 2*stateSize
)
@@ -748,6 +751,7 @@ func (g *compute) render(tileDims image.Point) error {
ptcl_alloc: malloc(tileDims.X * tileDims.Y * ptclInitialAlloc),
pathseg_alloc: malloc(g.enc.npathseg * pathsegSize),
anno_alloc: malloc(g.enc.npath * annoSize),
trans_alloc: malloc(g.enc.ntrans * transSize),
}
numPartitions := (g.enc.numElements() + 127) / 128
@@ -972,6 +976,7 @@ func (e *encoder) reset() {
e.scene = e.scene[:0]
e.npath = 0
e.npathseg = 0
e.ntrans = 0
}
func (e *encoder) numElements() int {
@@ -982,10 +987,12 @@ func (e *encoder) append(e2 encoder) {
e.scene = append(e.scene, e2.scene...)
e.npath += e2.npath
e.npathseg += e2.npathseg
e.ntrans += e2.ntrans
}
func (e *encoder) transform(m f32.Affine2D) {
e.scene = append(e.scene, scene.Transform(m))
e.ntrans++
}
func (e *encoder) lineWidth(width float32) {
+7 -7
View File
File diff suppressed because one or more lines are too long
+30 -61
View File
@@ -39,6 +39,7 @@ layout(set = 0, binding = 3) volatile buffer StateBuf {
#include "state.h"
#include "annotated.h"
#include "pathseg.h"
#include "tile.h"
#define StateBuf_stride (4 + 2 * State_size)
@@ -91,6 +92,7 @@ State combine_state(State a, State b) {
c.flags |= (a.flags & FLAG_RESET_BBOX) >> 1;
c.path_count = a.path_count + b.path_count;
c.pathseg_count = a.pathseg_count + b.pathseg_count;
c.trans_count = a.trans_count + b.trans_count;
return c;
}
@@ -106,6 +108,7 @@ State map_element(ElementRef ref) {
c.flags = 0;
c.path_count = 0;
c.pathseg_count = 0;
c.trans_count = 0;
switch (tag) {
case Element_FillLine:
case Element_StrokeLine:
@@ -147,6 +150,7 @@ State map_element(ElementRef ref) {
Transform t = Element_Transform_read(ref);
c.mat = t.mat;
c.translate = t.translate;
c.trans_count = 1;
break;
}
return c;
@@ -158,16 +162,7 @@ vec2 get_linewidth(State st) {
return 0.5 * st.linewidth * vec2(length(st.mat.xz), length(st.mat.yw));
}
// We should be able to use an array of structs but the NV shader compiler
// doesn't seem to like it :/
//shared State sh_state[WG_SIZE];
shared vec4 sh_mat[WG_SIZE];
shared vec2 sh_translate[WG_SIZE];
shared vec4 sh_bbox[WG_SIZE];
shared float sh_width[WG_SIZE];
shared uint sh_flags[WG_SIZE];
shared uint sh_path_count[WG_SIZE];
shared uint sh_pathseg_count[WG_SIZE];
shared State sh_state[WG_SIZE];
shared uint sh_part_ix;
shared State sh_prefix;
@@ -196,35 +191,15 @@ void main() {
th_state[i] = combine_state(th_state[i - 1], map_element(Element_index(ref, i)));
}
State agg = th_state[N_ROWS - 1];
sh_mat[gl_LocalInvocationID.x] = agg.mat;
sh_translate[gl_LocalInvocationID.x] = agg.translate;
sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
sh_width[gl_LocalInvocationID.x] = agg.linewidth;
sh_flags[gl_LocalInvocationID.x] = agg.flags;
sh_path_count[gl_LocalInvocationID.x] = agg.path_count;
sh_pathseg_count[gl_LocalInvocationID.x] = agg.pathseg_count;
sh_state[gl_LocalInvocationID.x] = agg;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
if (gl_LocalInvocationID.x >= (1 << i)) {
State other;
uint ix = gl_LocalInvocationID.x - (1 << i);
other.mat = sh_mat[ix];
other.translate = sh_translate[ix];
other.bbox = sh_bbox[ix];
other.linewidth = sh_width[ix];
other.flags = sh_flags[ix];
other.path_count = sh_path_count[ix];
other.pathseg_count = sh_pathseg_count[ix];
State other = sh_state[gl_LocalInvocationID.x - (1 << i)];
agg = combine_state(other, agg);
}
barrier();
sh_mat[gl_LocalInvocationID.x] = agg.mat;
sh_translate[gl_LocalInvocationID.x] = agg.translate;
sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
sh_width[gl_LocalInvocationID.x] = agg.linewidth;
sh_flags[gl_LocalInvocationID.x] = agg.flags;
sh_path_count[gl_LocalInvocationID.x] = agg.path_count;
sh_pathseg_count[gl_LocalInvocationID.x] = agg.pathseg_count;
sh_state[gl_LocalInvocationID.x] = agg;
}
State exclusive;
@@ -235,6 +210,7 @@ void main() {
exclusive.flags = 0;
exclusive.path_count = 0;
exclusive.pathseg_count = 0;
exclusive.trans_count = 0;
// Publish aggregate for this partition
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
@@ -305,15 +281,7 @@ void main() {
State row = exclusive;
if (gl_LocalInvocationID.x > 0) {
uint ix = gl_LocalInvocationID.x - 1;
State other;
other.mat = sh_mat[ix];
other.translate = sh_translate[ix];
other.bbox = sh_bbox[ix];
other.linewidth = sh_width[ix];
other.flags = sh_flags[ix];
other.path_count = sh_path_count[ix];
other.pathseg_count = sh_pathseg_count[ix];
State other = sh_state[gl_LocalInvocationID.x - 1];
row = combine_state(row, other);
}
for (uint i = 0; i < N_ROWS; i++) {
@@ -328,14 +296,13 @@ void main() {
case Element_FillLine:
case Element_StrokeLine:
LineSeg line = Element_StrokeLine_read(this_ref);
vec2 p0 = st.mat.xy * line.p0.x + st.mat.zw * line.p0.y + st.translate;
vec2 p1 = st.mat.xy * line.p1.x + st.mat.zw * line.p1.y + st.translate;
PathStrokeCubic path_cubic;
path_cubic.p0 = p0;
path_cubic.p1 = mix(p0, p1, 1.0 / 3.0);
path_cubic.p2 = mix(p1, p0, 1.0 / 3.0);
path_cubic.p3 = p1;
path_cubic.p0 = line.p0;
path_cubic.p1 = mix(line.p0, line.p1, 1.0 / 3.0);
path_cubic.p2 = mix(line.p1, line.p0, 1.0 / 3.0);
path_cubic.p3 = line.p1;
path_cubic.path_ix = st.path_count;
path_cubic.trans_ix = st.trans_count;
if (tag == Element_StrokeLine) {
path_cubic.stroke = get_linewidth(st);
} else {
@@ -351,15 +318,12 @@ void main() {
case Element_FillQuad:
case Element_StrokeQuad:
QuadSeg quad = Element_StrokeQuad_read(this_ref);
p0 = st.mat.xy * quad.p0.x + st.mat.zw * quad.p0.y + st.translate;
p1 = st.mat.xy * quad.p1.x + st.mat.zw * quad.p1.y + st.translate;
vec2 p2 = st.mat.xy * quad.p2.x + st.mat.zw * quad.p2.y + st.translate;
path_cubic;
path_cubic.p0 = p0;
path_cubic.p1 = mix(p1, p0, 1.0 / 3.0);
path_cubic.p2 = mix(p1, p2, 1.0 / 3.0);
path_cubic.p3 = p2;
path_cubic.p0 = quad.p0;
path_cubic.p1 = mix(quad.p1, quad.p0, 1.0 / 3.0);
path_cubic.p2 = mix(quad.p1, quad.p2, 1.0 / 3.0);
path_cubic.p3 = quad.p2;
path_cubic.path_ix = st.path_count;
path_cubic.trans_ix = st.trans_count;
if (tag == Element_StrokeQuad) {
path_cubic.stroke = get_linewidth(st);
} else {
@@ -375,12 +339,12 @@ void main() {
case Element_FillCubic:
case Element_StrokeCubic:
CubicSeg cubic = Element_StrokeCubic_read(this_ref);
path_cubic;
path_cubic.p0 = st.mat.xy * cubic.p0.x + st.mat.zw * cubic.p0.y + st.translate;
path_cubic.p1 = st.mat.xy * cubic.p1.x + st.mat.zw * cubic.p1.y + st.translate;
path_cubic.p2 = st.mat.xy * cubic.p2.x + st.mat.zw * cubic.p2.y + st.translate;
path_cubic.p3 = st.mat.xy * cubic.p3.x + st.mat.zw * cubic.p3.y + st.translate;
path_cubic.p0 = cubic.p0;
path_cubic.p1 = cubic.p1;
path_cubic.p2 = cubic.p2;
path_cubic.p3 = cubic.p3;
path_cubic.path_ix = st.path_count;
path_cubic.trans_ix = st.trans_count;
if (tag == Element_StrokeCubic) {
path_cubic.stroke = get_linewidth(st);
} else {
@@ -435,6 +399,11 @@ void main() {
out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
Annotated_EndClip_write(conf.anno_alloc, out_ref, anno_end_clip);
break;
case Element_Transform:
TransformSeg transform = TransformSeg(st.mat, st.translate);
TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (st.trans_count - 1) * TransformSeg_size);
TransformSeg_write(conf.trans_alloc, trans_ref, transform);
break;
}
}
}
+11
View File
@@ -102,6 +102,17 @@ void main() {
case PathSeg_FillCubic:
case PathSeg_StrokeCubic:
PathStrokeCubic cubic = PathSeg_StrokeCubic_read(conf.pathseg_alloc, ref);
uint trans_ix = cubic.trans_ix;
if (trans_ix > 0) {
TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (trans_ix - 1) * TransformSeg_size);
TransformSeg trans = TransformSeg_read(conf.trans_alloc, trans_ref);
cubic.p0 = trans.mat.xy * cubic.p0.x + trans.mat.zw * cubic.p0.y + trans.translate;
cubic.p1 = trans.mat.xy * cubic.p1.x + trans.mat.zw * cubic.p1.y + trans.translate;
cubic.p2 = trans.mat.xy * cubic.p2.x + trans.mat.zw * cubic.p2.y + trans.translate;
cubic.p3 = trans.mat.xy * cubic.p3.x + trans.mat.zw * cubic.p3.y + trans.translate;
}
vec2 err_v = 3.0 * (cubic.p2 - cubic.p1) + cubic.p0 - cubic.p3;
float err = err_v.x * err_v.x + err_v.y * err_v.y;
// The number of quadratics.
+14 -6
View File
@@ -20,9 +20,10 @@ struct PathFillCubic {
vec2 p2;
vec2 p3;
uint path_ix;
uint trans_ix;
};
#define PathFillCubic_size 36
#define PathFillCubic_size 40
PathFillCubicRef PathFillCubic_index(PathFillCubicRef ref, uint index) {
return PathFillCubicRef(ref.offset + index * PathFillCubic_size);
@@ -34,10 +35,11 @@ struct PathStrokeCubic {
vec2 p2;
vec2 p3;
uint path_ix;
uint trans_ix;
vec2 stroke;
};
#define PathStrokeCubic_size 44
#define PathStrokeCubic_size 48
PathStrokeCubicRef PathStrokeCubic_index(PathStrokeCubicRef ref, uint index) {
return PathStrokeCubicRef(ref.offset + index * PathStrokeCubic_size);
@@ -46,7 +48,7 @@ PathStrokeCubicRef PathStrokeCubic_index(PathStrokeCubicRef ref, uint index) {
#define PathSeg_Nop 0
#define PathSeg_FillCubic 1
#define PathSeg_StrokeCubic 2
#define PathSeg_size 48
#define PathSeg_size 52
PathSegRef PathSeg_index(PathSegRef ref, uint index) {
return PathSegRef(ref.offset + index * PathSeg_size);
@@ -63,12 +65,14 @@ PathFillCubic PathFillCubic_read(Alloc a, PathFillCubicRef ref) {
uint raw6 = read_mem(a, ix + 6);
uint raw7 = read_mem(a, ix + 7);
uint raw8 = read_mem(a, ix + 8);
uint raw9 = read_mem(a, ix + 9);
PathFillCubic s;
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));
s.path_ix = raw8;
s.trans_ix = raw9;
return s;
}
@@ -83,6 +87,7 @@ void PathFillCubic_write(Alloc a, PathFillCubicRef ref, PathFillCubic s) {
write_mem(a, ix + 6, floatBitsToUint(s.p3.x));
write_mem(a, ix + 7, floatBitsToUint(s.p3.y));
write_mem(a, ix + 8, s.path_ix);
write_mem(a, ix + 9, s.trans_ix);
}
PathStrokeCubic PathStrokeCubic_read(Alloc a, PathStrokeCubicRef ref) {
@@ -98,13 +103,15 @@ PathStrokeCubic PathStrokeCubic_read(Alloc a, PathStrokeCubicRef ref) {
uint raw8 = read_mem(a, ix + 8);
uint raw9 = read_mem(a, ix + 9);
uint raw10 = read_mem(a, ix + 10);
uint raw11 = read_mem(a, ix + 11);
PathStrokeCubic s;
s.p0 = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));
s.p1 = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.p2 = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
s.p3 = vec2(uintBitsToFloat(raw6), uintBitsToFloat(raw7));
s.path_ix = raw8;
s.stroke = vec2(uintBitsToFloat(raw9), uintBitsToFloat(raw10));
s.trans_ix = raw9;
s.stroke = vec2(uintBitsToFloat(raw10), uintBitsToFloat(raw11));
return s;
}
@@ -119,8 +126,9 @@ void PathStrokeCubic_write(Alloc a, PathStrokeCubicRef ref, PathStrokeCubic s) {
write_mem(a, ix + 6, floatBitsToUint(s.p3.x));
write_mem(a, ix + 7, floatBitsToUint(s.p3.y));
write_mem(a, ix + 8, s.path_ix);
write_mem(a, ix + 9, floatBitsToUint(s.stroke.x));
write_mem(a, ix + 10, floatBitsToUint(s.stroke.y));
write_mem(a, ix + 9, s.trans_ix);
write_mem(a, ix + 10, floatBitsToUint(s.stroke.x));
write_mem(a, ix + 11, floatBitsToUint(s.stroke.y));
}
uint PathSeg_tag(Alloc a, PathSegRef ref) {
+1
View File
@@ -35,4 +35,5 @@ struct Config {
Alloc ptcl_alloc;
Alloc pathseg_alloc;
Alloc anno_alloc;
Alloc trans_alloc;
};
+5 -1
View File
@@ -14,9 +14,10 @@ struct State {
uint flags;
uint path_count;
uint pathseg_count;
uint trans_count;
};
#define State_size 56
#define State_size 60
StateRef State_index(StateRef ref, uint index) {
return StateRef(ref.offset + index * State_size);
@@ -38,6 +39,7 @@ State State_read(StateRef ref) {
uint raw11 = state[ix + 11];
uint raw12 = state[ix + 12];
uint raw13 = state[ix + 13];
uint raw14 = state[ix + 14];
State s;
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
@@ -46,6 +48,7 @@ State State_read(StateRef ref) {
s.flags = raw11;
s.path_count = raw12;
s.pathseg_count = raw13;
s.trans_count = raw14;
return s;
}
@@ -65,5 +68,6 @@ void State_write(StateRef ref, State s) {
state[ix + 11] = s.flags;
state[ix + 12] = s.path_count;
state[ix + 13] = s.pathseg_count;
state[ix + 14] = s.trans_count;
}
+39
View File
@@ -14,6 +14,10 @@ struct TileSegRef {
uint offset;
};
struct TransformSegRef {
uint offset;
};
struct Path {
uvec4 bbox;
TileRef tiles;
@@ -49,6 +53,17 @@ TileSegRef TileSeg_index(TileSegRef ref, uint index) {
return TileSegRef(ref.offset + index * TileSeg_size);
}
struct TransformSeg {
vec4 mat;
vec2 translate;
};
#define TransformSeg_size 24
TransformSegRef TransformSeg_index(TransformSegRef ref, uint index) {
return TransformSegRef(ref.offset + index * TransformSeg_size);
}
Path Path_read(Alloc a, PathRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
@@ -109,3 +124,27 @@ void TileSeg_write(Alloc a, TileSegRef ref, TileSeg s) {
write_mem(a, ix + 5, s.next.offset);
}
TransformSeg TransformSeg_read(Alloc a, TransformSegRef ref) {
uint ix = ref.offset >> 2;
uint raw0 = read_mem(a, ix + 0);
uint raw1 = read_mem(a, ix + 1);
uint raw2 = read_mem(a, ix + 2);
uint raw3 = read_mem(a, ix + 3);
uint raw4 = read_mem(a, ix + 4);
uint raw5 = read_mem(a, ix + 5);
TransformSeg s;
s.mat = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));
s.translate = vec2(uintBitsToFloat(raw4), uintBitsToFloat(raw5));
return s;
}
void TransformSeg_write(Alloc a, TransformSegRef ref, TransformSeg s) {
uint ix = ref.offset >> 2;
write_mem(a, ix + 0, floatBitsToUint(s.mat.x));
write_mem(a, ix + 1, floatBitsToUint(s.mat.y));
write_mem(a, ix + 2, floatBitsToUint(s.mat.z));
write_mem(a, ix + 3, floatBitsToUint(s.mat.w));
write_mem(a, ix + 4, floatBitsToUint(s.translate.x));
write_mem(a, ix + 5, floatBitsToUint(s.translate.y));
}