gpu: eliminate gaps by ensuring consistent transformations

This is another attempt at fixing the issue described in [0], the
previous attempt was reverted[1].

This change fixes the issue by tracking resolved transformations and
ensure that all segments within a path share a single transformation.

[0] https://github.com/linebender/piet-gpu/issues/62
[1] https://gioui.org/commit/2b21b48a7c5c4451deb642c164548a134bb9ad06

Signed-off-by: Elias Naur <mail@eliasnaur.com>
This commit is contained in:
Elias Naur
2021-03-15 09:55:56 +01:00
parent 9cb9e67a8e
commit 258033d0b0
8 changed files with 116 additions and 77 deletions
+30 -61
View File
@@ -39,6 +39,7 @@ layout(set = 0, binding = 3) volatile buffer StateBuf {
#include "state.h"
#include "annotated.h"
#include "pathseg.h"
#include "tile.h"
#define StateBuf_stride (4 + 2 * State_size)
@@ -91,6 +92,7 @@ State combine_state(State a, State b) {
c.flags |= (a.flags & FLAG_RESET_BBOX) >> 1;
c.path_count = a.path_count + b.path_count;
c.pathseg_count = a.pathseg_count + b.pathseg_count;
c.trans_count = a.trans_count + b.trans_count;
return c;
}
@@ -106,6 +108,7 @@ State map_element(ElementRef ref) {
c.flags = 0;
c.path_count = 0;
c.pathseg_count = 0;
c.trans_count = 0;
switch (tag) {
case Element_FillLine:
case Element_StrokeLine:
@@ -147,6 +150,7 @@ State map_element(ElementRef ref) {
Transform t = Element_Transform_read(ref);
c.mat = t.mat;
c.translate = t.translate;
c.trans_count = 1;
break;
}
return c;
@@ -158,16 +162,7 @@ vec2 get_linewidth(State st) {
return 0.5 * st.linewidth * vec2(length(st.mat.xz), length(st.mat.yw));
}
// We should be able to use an array of structs but the NV shader compiler
// doesn't seem to like it :/
//shared State sh_state[WG_SIZE];
shared vec4 sh_mat[WG_SIZE];
shared vec2 sh_translate[WG_SIZE];
shared vec4 sh_bbox[WG_SIZE];
shared float sh_width[WG_SIZE];
shared uint sh_flags[WG_SIZE];
shared uint sh_path_count[WG_SIZE];
shared uint sh_pathseg_count[WG_SIZE];
shared State sh_state[WG_SIZE];
shared uint sh_part_ix;
shared State sh_prefix;
@@ -196,35 +191,15 @@ void main() {
th_state[i] = combine_state(th_state[i - 1], map_element(Element_index(ref, i)));
}
State agg = th_state[N_ROWS - 1];
sh_mat[gl_LocalInvocationID.x] = agg.mat;
sh_translate[gl_LocalInvocationID.x] = agg.translate;
sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
sh_width[gl_LocalInvocationID.x] = agg.linewidth;
sh_flags[gl_LocalInvocationID.x] = agg.flags;
sh_path_count[gl_LocalInvocationID.x] = agg.path_count;
sh_pathseg_count[gl_LocalInvocationID.x] = agg.pathseg_count;
sh_state[gl_LocalInvocationID.x] = agg;
for (uint i = 0; i < LG_WG_SIZE; i++) {
barrier();
if (gl_LocalInvocationID.x >= (1 << i)) {
State other;
uint ix = gl_LocalInvocationID.x - (1 << i);
other.mat = sh_mat[ix];
other.translate = sh_translate[ix];
other.bbox = sh_bbox[ix];
other.linewidth = sh_width[ix];
other.flags = sh_flags[ix];
other.path_count = sh_path_count[ix];
other.pathseg_count = sh_pathseg_count[ix];
State other = sh_state[gl_LocalInvocationID.x - (1 << i)];
agg = combine_state(other, agg);
}
barrier();
sh_mat[gl_LocalInvocationID.x] = agg.mat;
sh_translate[gl_LocalInvocationID.x] = agg.translate;
sh_bbox[gl_LocalInvocationID.x] = agg.bbox;
sh_width[gl_LocalInvocationID.x] = agg.linewidth;
sh_flags[gl_LocalInvocationID.x] = agg.flags;
sh_path_count[gl_LocalInvocationID.x] = agg.path_count;
sh_pathseg_count[gl_LocalInvocationID.x] = agg.pathseg_count;
sh_state[gl_LocalInvocationID.x] = agg;
}
State exclusive;
@@ -235,6 +210,7 @@ void main() {
exclusive.flags = 0;
exclusive.path_count = 0;
exclusive.pathseg_count = 0;
exclusive.trans_count = 0;
// Publish aggregate for this partition
if (gl_LocalInvocationID.x == WG_SIZE - 1) {
@@ -305,15 +281,7 @@ void main() {
State row = exclusive;
if (gl_LocalInvocationID.x > 0) {
uint ix = gl_LocalInvocationID.x - 1;
State other;
other.mat = sh_mat[ix];
other.translate = sh_translate[ix];
other.bbox = sh_bbox[ix];
other.linewidth = sh_width[ix];
other.flags = sh_flags[ix];
other.path_count = sh_path_count[ix];
other.pathseg_count = sh_pathseg_count[ix];
State other = sh_state[gl_LocalInvocationID.x - 1];
row = combine_state(row, other);
}
for (uint i = 0; i < N_ROWS; i++) {
@@ -328,14 +296,13 @@ void main() {
case Element_FillLine:
case Element_StrokeLine:
LineSeg line = Element_StrokeLine_read(this_ref);
vec2 p0 = st.mat.xy * line.p0.x + st.mat.zw * line.p0.y + st.translate;
vec2 p1 = st.mat.xy * line.p1.x + st.mat.zw * line.p1.y + st.translate;
PathStrokeCubic path_cubic;
path_cubic.p0 = p0;
path_cubic.p1 = mix(p0, p1, 1.0 / 3.0);
path_cubic.p2 = mix(p1, p0, 1.0 / 3.0);
path_cubic.p3 = p1;
path_cubic.p0 = line.p0;
path_cubic.p1 = mix(line.p0, line.p1, 1.0 / 3.0);
path_cubic.p2 = mix(line.p1, line.p0, 1.0 / 3.0);
path_cubic.p3 = line.p1;
path_cubic.path_ix = st.path_count;
path_cubic.trans_ix = st.trans_count;
if (tag == Element_StrokeLine) {
path_cubic.stroke = get_linewidth(st);
} else {
@@ -351,15 +318,12 @@ void main() {
case Element_FillQuad:
case Element_StrokeQuad:
QuadSeg quad = Element_StrokeQuad_read(this_ref);
p0 = st.mat.xy * quad.p0.x + st.mat.zw * quad.p0.y + st.translate;
p1 = st.mat.xy * quad.p1.x + st.mat.zw * quad.p1.y + st.translate;
vec2 p2 = st.mat.xy * quad.p2.x + st.mat.zw * quad.p2.y + st.translate;
path_cubic;
path_cubic.p0 = p0;
path_cubic.p1 = mix(p1, p0, 1.0 / 3.0);
path_cubic.p2 = mix(p1, p2, 1.0 / 3.0);
path_cubic.p3 = p2;
path_cubic.p0 = quad.p0;
path_cubic.p1 = mix(quad.p1, quad.p0, 1.0 / 3.0);
path_cubic.p2 = mix(quad.p1, quad.p2, 1.0 / 3.0);
path_cubic.p3 = quad.p2;
path_cubic.path_ix = st.path_count;
path_cubic.trans_ix = st.trans_count;
if (tag == Element_StrokeQuad) {
path_cubic.stroke = get_linewidth(st);
} else {
@@ -375,12 +339,12 @@ void main() {
case Element_FillCubic:
case Element_StrokeCubic:
CubicSeg cubic = Element_StrokeCubic_read(this_ref);
path_cubic;
path_cubic.p0 = st.mat.xy * cubic.p0.x + st.mat.zw * cubic.p0.y + st.translate;
path_cubic.p1 = st.mat.xy * cubic.p1.x + st.mat.zw * cubic.p1.y + st.translate;
path_cubic.p2 = st.mat.xy * cubic.p2.x + st.mat.zw * cubic.p2.y + st.translate;
path_cubic.p3 = st.mat.xy * cubic.p3.x + st.mat.zw * cubic.p3.y + st.translate;
path_cubic.p0 = cubic.p0;
path_cubic.p1 = cubic.p1;
path_cubic.p2 = cubic.p2;
path_cubic.p3 = cubic.p3;
path_cubic.path_ix = st.path_count;
path_cubic.trans_ix = st.trans_count;
if (tag == Element_StrokeCubic) {
path_cubic.stroke = get_linewidth(st);
} else {
@@ -435,6 +399,11 @@ void main() {
out_ref = AnnotatedRef(conf.anno_alloc.offset + (st.path_count - 1) * Annotated_size);
Annotated_EndClip_write(conf.anno_alloc, out_ref, anno_end_clip);
break;
case Element_Transform:
TransformSeg transform = TransformSeg(st.mat, st.translate);
TransformSegRef trans_ref = TransformSegRef(conf.trans_alloc.offset + (st.trans_count - 1) * TransformSeg_size);
TransformSeg_write(conf.trans_alloc, trans_ref, transform);
break;
}
}
}