forked from joejulian/gio
gpu/shaders: update piet-gpu
Changes: - faster implementation of RGBA output - fix stroked clips and images Signed-off-by: Elias Naur <mail@eliasnaur.com>
This commit is contained in:
+37
-44
@@ -91,6 +91,23 @@ bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit
|
||||
return true;
|
||||
}
|
||||
|
||||
void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float linewidth) {
|
||||
if (fill_mode_from_flags(flags) == MODE_NONZERO) {
|
||||
if (tile.tile.offset != 0) {
|
||||
CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop);
|
||||
Cmd_Fill_write(alloc, cmd_ref, cmd_fill);
|
||||
cmd_ref.offset += 4 + CmdFill_size;
|
||||
} else {
|
||||
Cmd_Solid_write(alloc, cmd_ref);
|
||||
cmd_ref.offset += 4;
|
||||
}
|
||||
} else {
|
||||
CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * linewidth);
|
||||
Cmd_Stroke_write(alloc, cmd_ref, cmd_stroke);
|
||||
cmd_ref.offset += 4 + CmdStroke_size;
|
||||
}
|
||||
}
|
||||
|
||||
void main() {
|
||||
if (mem_error != NO_ERROR) {
|
||||
return;
|
||||
@@ -135,6 +152,12 @@ void main() {
|
||||
uint part_start_ix = 0;
|
||||
uint ready_ix = 0;
|
||||
|
||||
// Leave room for the fine rasterizer scratch allocation.
|
||||
Alloc scratch_alloc = slice_mem(cmd_alloc, 0, Alloc_size);
|
||||
cmd_ref.offset += Alloc_size;
|
||||
|
||||
uint num_begin_slots = 0;
|
||||
uint begin_slot = 0;
|
||||
while (true) {
|
||||
for (uint i = 0; i < N_SLICE; i++) {
|
||||
sh_bitmaps[i][th_ix] = 0;
|
||||
@@ -320,20 +343,7 @@ void main() {
|
||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||
break;
|
||||
}
|
||||
if (fill_mode_from_flags(tag.flags) == MODE_NONZERO) {
|
||||
if (tile.tile.offset != 0) {
|
||||
CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop);
|
||||
Cmd_Fill_write(cmd_alloc, cmd_ref, cmd_fill);
|
||||
cmd_ref.offset += 4 + CmdFill_size;
|
||||
} else {
|
||||
Cmd_Solid_write(cmd_alloc, cmd_ref);
|
||||
cmd_ref.offset += 4;
|
||||
}
|
||||
} else {
|
||||
CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * fill.linewidth);
|
||||
Cmd_Stroke_write(cmd_alloc, cmd_ref, cmd_stroke);
|
||||
cmd_ref.offset += 4 + CmdStroke_size;
|
||||
}
|
||||
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill.linewidth);
|
||||
Cmd_Color_write(cmd_alloc, cmd_ref, CmdColor(fill.rgba_color));
|
||||
cmd_ref.offset += 4 + CmdColor_size;
|
||||
break;
|
||||
@@ -344,20 +354,7 @@ void main() {
|
||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||
break;
|
||||
}
|
||||
if (fill_mode_from_flags(tag.flags) == MODE_NONZERO) {
|
||||
if (tile.tile.offset != 0) {
|
||||
CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop);
|
||||
Cmd_Fill_write(cmd_alloc, cmd_ref, cmd_fill);
|
||||
cmd_ref.offset += 4 + CmdFill_size;
|
||||
} else {
|
||||
Cmd_Solid_write(cmd_alloc, cmd_ref);
|
||||
cmd_ref.offset += 4;
|
||||
}
|
||||
} else {
|
||||
CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * fill_img.linewidth);
|
||||
Cmd_Stroke_write(cmd_alloc, cmd_ref, cmd_stroke);
|
||||
cmd_ref.offset += 4 + CmdStroke_size;
|
||||
}
|
||||
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill_img.linewidth);
|
||||
Cmd_Image_write(cmd_alloc, cmd_ref, CmdImage(fill_img.index, fill_img.offset));
|
||||
cmd_ref.offset += 4 + CmdImage_size;
|
||||
break;
|
||||
@@ -373,27 +370,14 @@ void main() {
|
||||
if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) {
|
||||
break;
|
||||
}
|
||||
if (fill_mode_from_flags(tag.flags) == MODE_NONZERO) {
|
||||
if (tile.tile.offset != 0) {
|
||||
CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop);
|
||||
Cmd_Fill_write(cmd_alloc, cmd_ref, cmd_fill);
|
||||
cmd_ref.offset += 4 + CmdFill_size;
|
||||
} else {
|
||||
// TODO: here is where a bunch of optimization magic should happen
|
||||
float alpha = tile.backdrop == 0 ? 0.0 : 1.0;
|
||||
Cmd_Alpha_write(cmd_alloc, cmd_ref, CmdAlpha(alpha));
|
||||
cmd_ref.offset += 4 + CmdAlpha_size;
|
||||
}
|
||||
} else {
|
||||
CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * begin_clip.linewidth);
|
||||
Cmd_Stroke_write(cmd_alloc, cmd_ref, cmd_stroke);
|
||||
cmd_ref.offset += 4 + CmdStroke_size;
|
||||
}
|
||||
write_fill(cmd_alloc, cmd_ref, tag.flags, tile, begin_clip.linewidth);
|
||||
Cmd_BeginClip_write(cmd_alloc, cmd_ref);
|
||||
cmd_ref.offset += 4;
|
||||
if (clip_depth < 32) {
|
||||
clip_one_mask &= ~(1 << clip_depth);
|
||||
}
|
||||
begin_slot++;
|
||||
num_begin_slots = max(num_begin_slots, begin_slot);
|
||||
}
|
||||
clip_depth++;
|
||||
break;
|
||||
@@ -405,6 +389,7 @@ void main() {
|
||||
}
|
||||
Cmd_Solid_write(cmd_alloc, cmd_ref);
|
||||
cmd_ref.offset += 4;
|
||||
begin_slot--;
|
||||
Cmd_EndClip_write(cmd_alloc, cmd_ref);
|
||||
cmd_ref.offset += 4;
|
||||
}
|
||||
@@ -432,5 +417,13 @@ void main() {
|
||||
}
|
||||
if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) {
|
||||
Cmd_End_write(cmd_alloc, cmd_ref);
|
||||
if (num_begin_slots > 0) {
|
||||
// Write scratch allocation: one state per BeginClip per rasterizer chunk.
|
||||
uint scratch_size = num_begin_slots * TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE * 4;
|
||||
MallocResult scratch = malloc(scratch_size);
|
||||
// Ignore scratch.failed; we don't use the allocation and kernel4
|
||||
// checks for memory overflow before using it.
|
||||
alloc_write(scratch_alloc, scratch_alloc.offset, scratch.alloc);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user