mirror of
https://git.sr.ht/~eliasnaur/gio
synced 2026-07-01 15:45:38 +00:00
0218546161
The piet-gpu project is dual licensed under the Apache 2.0 and MIT, and the shaders themselves are also offered under the UNLICENSE terms. See https://github.com/linebender/piet-gpu#license-and-contributions, as of commit 72e2dfab3da8ae1adf7a0fb056b71ccbc4cfa29a: "The piet-gpu project is dual-licensed under both Apache 2.0 and MIT licenses. In addition, the shaders are provided under the terms of the Unlicense. The intent is for this research to be used in as broad a context as possible." Signed-off-by: Elias Naur <mail@eliasnaur.com>
110 lines
3.5 KiB
Plaintext
110 lines
3.5 KiB
Plaintext
// SPDX-License-Identifier: Apache-2.0 OR MIT OR Unlicense
|
|
|
|
// Allocation and initialization of tiles for paths.
|
|
|
|
#version 450
|
|
#extension GL_GOOGLE_include_directive : enable
|
|
|
|
#include "mem.h"
|
|
#include "setup.h"
|
|
|
|
#define LG_TILE_ALLOC_WG (7 + LG_WG_FACTOR)
|
|
#define TILE_ALLOC_WG (1 << LG_TILE_ALLOC_WG)
|
|
|
|
layout(local_size_x = TILE_ALLOC_WG, local_size_y = 1) in;
|
|
|
|
layout(set = 0, binding = 1) readonly buffer ConfigBuf {
|
|
Config conf;
|
|
};
|
|
|
|
#include "annotated.h"
|
|
#include "tile.h"
|
|
|
|
// scale factors useful for converting coordinates to tiles
|
|
#define SX (1.0 / float(TILE_WIDTH_PX))
|
|
#define SY (1.0 / float(TILE_HEIGHT_PX))
|
|
|
|
shared uint sh_tile_count[TILE_ALLOC_WG];
|
|
shared MallocResult sh_tile_alloc;
|
|
|
|
void main() {
|
|
if (mem_error != NO_ERROR) {
|
|
return;
|
|
}
|
|
|
|
uint th_ix = gl_LocalInvocationID.x;
|
|
uint element_ix = gl_GlobalInvocationID.x;
|
|
PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size);
|
|
AnnotatedRef ref = AnnotatedRef(conf.anno_alloc.offset + element_ix * Annotated_size);
|
|
|
|
uint tag = Annotated_Nop;
|
|
if (element_ix < conf.n_elements) {
|
|
tag = Annotated_tag(conf.anno_alloc, ref);
|
|
}
|
|
int x0 = 0, y0 = 0, x1 = 0, y1 = 0;
|
|
switch (tag) {
|
|
case Annotated_Fill:
|
|
case Annotated_FillTexture:
|
|
case Annotated_Stroke:
|
|
case Annotated_BeginClip:
|
|
case Annotated_EndClip:
|
|
// Note: we take advantage of the fact that fills, strokes, and
|
|
// clips have compatible layout.
|
|
AnnoFill fill = Annotated_Fill_read(conf.anno_alloc, ref);
|
|
x0 = int(floor(fill.bbox.x * SX));
|
|
y0 = int(floor(fill.bbox.y * SY));
|
|
x1 = int(ceil(fill.bbox.z * SX));
|
|
y1 = int(ceil(fill.bbox.w * SY));
|
|
break;
|
|
}
|
|
x0 = clamp(x0, 0, int(conf.width_in_tiles));
|
|
y0 = clamp(y0, 0, int(conf.height_in_tiles));
|
|
x1 = clamp(x1, 0, int(conf.width_in_tiles));
|
|
y1 = clamp(y1, 0, int(conf.height_in_tiles));
|
|
|
|
Path path;
|
|
path.bbox = uvec4(x0, y0, x1, y1);
|
|
uint tile_count = (x1 - x0) * (y1 - y0);
|
|
if (tag == Annotated_EndClip) {
|
|
// Don't actually allocate tiles for an end clip, but we do want
|
|
// the path structure (especially bbox) allocated for it.
|
|
tile_count = 0;
|
|
}
|
|
|
|
sh_tile_count[th_ix] = tile_count;
|
|
uint total_tile_count = tile_count;
|
|
// Prefix sum of sh_tile_count
|
|
for (uint i = 0; i < LG_TILE_ALLOC_WG; i++) {
|
|
barrier();
|
|
if (th_ix >= (1 << i)) {
|
|
total_tile_count += sh_tile_count[th_ix - (1 << i)];
|
|
}
|
|
barrier();
|
|
sh_tile_count[th_ix] = total_tile_count;
|
|
}
|
|
if (th_ix == TILE_ALLOC_WG - 1) {
|
|
sh_tile_alloc = malloc(total_tile_count * Tile_size);
|
|
}
|
|
barrier();
|
|
MallocResult alloc_start = sh_tile_alloc;
|
|
if (alloc_start.failed) {
|
|
return;
|
|
}
|
|
|
|
if (element_ix < conf.n_elements) {
|
|
uint tile_subix = th_ix > 0 ? sh_tile_count[th_ix - 1] : 0;
|
|
Alloc tiles_alloc = slice_mem(alloc_start.alloc, Tile_size * tile_subix, Tile_size * tile_count);
|
|
path.tiles = TileRef(tiles_alloc.offset);
|
|
Path_write(conf.tile_alloc, path_ref, path);
|
|
}
|
|
|
|
// Zero out allocated tiles efficiently
|
|
uint total_count = sh_tile_count[TILE_ALLOC_WG - 1] * (Tile_size / 4);
|
|
uint start_ix = alloc_start.alloc.offset >> 2;
|
|
for (uint i = th_ix; i < total_count; i += TILE_ALLOC_WG) {
|
|
// Note: this interleaving is faster than using Tile_write
|
|
// by a significant amount.
|
|
write_mem(alloc_start.alloc, start_ix + i, 0);
|
|
}
|
|
}
|