From ebf2dcea507e0478efbb6118f3d55b84f7a7e024 Mon Sep 17 00:00:00 2001 From: Elias Naur Date: Wed, 31 Mar 2021 19:55:29 +0200 Subject: [PATCH] gpu/shaders: update piet-gpu Changes: - faster implementation of RGBA output - fix stroked clips and images Signed-off-by: Elias Naur --- gpu/shaders.go | 6 +- gpu/shaders/backdrop.comp | 4 +- gpu/shaders/coarse.comp | 81 +++++++++++++-------------- gpu/shaders/kernel4.comp | 114 +++++++++++++------------------------- gpu/shaders/mem.h | 24 +++++++- gpu/shaders/setup.h | 5 +- 6 files changed, 106 insertions(+), 128 deletions(-) diff --git a/gpu/shaders.go b/gpu/shaders.go index bf9f084a..1080fc5b 100644 --- a/gpu/shaders.go +++ b/gpu/shaders.go @@ -7,7 +7,7 @@ import "gioui.org/gpu/internal/driver" var ( shader_backdrop_comp = driver.ShaderSources{ Name: "backdrop.comp", - GLSL310ES: "#version 310 es\nlayout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;\n\nstruct Alloc\n{\n uint offset;\n};\n\nstruct AnnotatedRef\n{\n uint offset;\n};\n\nstruct AnnotatedTag\n{\n uint tag;\n uint flags;\n};\n\nstruct PathRef\n{\n uint offset;\n};\n\nstruct TileRef\n{\n uint offset;\n};\n\nstruct Path\n{\n uvec4 bbox;\n TileRef tiles;\n};\n\nstruct Config\n{\n uint n_elements;\n uint n_pathseg;\n uint width_in_tiles;\n uint height_in_tiles;\n Alloc tile_alloc;\n Alloc bin_alloc;\n Alloc ptcl_alloc;\n Alloc pathseg_alloc;\n Alloc anno_alloc;\n Alloc trans_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n uint mem_offset;\n uint mem_error;\n uint memory[];\n} _77;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n Config conf;\n} _191;\n\nshared uint sh_row_width[128];\nshared Alloc sh_row_alloc[128];\nshared uint sh_row_count[128];\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n Alloc param = alloc;\n uint param_1 = offset;\n if (!touch_mem(param, param_1))\n {\n return 0u;\n }\n uint v = _77.memory[offset];\n return v;\n}\n\nAnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint tag_and_flags = read_mem(param, param_1);\n return AnnotatedTag(tag_and_flags & 65535u, tag_and_flags >> uint(16));\n}\n\nuint fill_mode_from_flags(uint flags)\n{\n return flags & 1u;\n}\n\nPath Path_read(Alloc a, PathRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n Alloc param_4 = a;\n uint param_5 = ix + 2u;\n uint raw2 = read_mem(param_4, param_5);\n Path s;\n s.bbox = uvec4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));\n s.tiles = TileRef(raw2);\n return s;\n}\n\nAlloc new_alloc(uint offset, uint size)\n{\n Alloc a;\n a.offset = offset;\n return a;\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n Alloc param = alloc;\n uint param_1 = offset;\n if (!touch_mem(param, param_1))\n {\n return;\n }\n _77.memory[offset] = val;\n}\n\nvoid main()\n{\n if (_77.mem_error != 0u)\n {\n return;\n }\n uint th_ix = gl_LocalInvocationID.x;\n uint element_ix = gl_GlobalInvocationID.x;\n AnnotatedRef ref = AnnotatedRef(_191.conf.anno_alloc.offset + (element_ix * 32u));\n uint row_count = 0u;\n if (element_ix < _191.conf.n_elements)\n {\n Alloc param;\n param.offset = _191.conf.anno_alloc.offset;\n AnnotatedRef param_1 = ref;\n AnnotatedTag tag = Annotated_tag(param, param_1);\n switch (tag.tag)\n {\n case 1u:\n {\n uint param_2 = tag.flags;\n if (fill_mode_from_flags(param_2) != 0u)\n {\n break;\n }\n uint _237 = element_ix * 12u;\n uint _238 = _191.conf.tile_alloc.offset + _237;\n PathRef _239 = PathRef(_238);\n PathRef path_ref = _239;\n Alloc param_3;\n param_3.offset = _191.conf.tile_alloc.offset;\n PathRef _247 = path_ref;\n PathRef param_4 = _247;\n Path _248 = Path_read(param_3, param_4);\n Path path = _248;\n uint _255 = path.bbox.z;\n uint _257 = path.bbox.x;\n uint _258 = _255 - _257;\n sh_row_width[th_ix] = _258;\n uint _263 = path.bbox.w;\n uint _265 = path.bbox.y;\n uint _266 = _263 - _265;\n row_count = _266;\n uint _267 = row_count;\n bool _268 = _267 == 1u;\n bool _274;\n if (_268)\n {\n uint _272 = path.bbox.y;\n bool _273 = _272 > 0u;\n _274 = _273;\n }\n else\n {\n _274 = _268;\n }\n if (_274)\n {\n row_count = 0u;\n }\n uint _279 = path.bbox.z;\n uint _281 = path.bbox.x;\n uint _282 = _279 - _281;\n uint _284 = path.bbox.w;\n uint _286 = path.bbox.y;\n uint _287 = _284 - _286;\n uint _288 = _282 * _287;\n uint _290 = _288 * 8u;\n uint _293 = path.tiles.offset;\n uint param_5 = _293;\n uint param_6 = _290;\n Alloc _295 = new_alloc(param_5, param_6);\n Alloc path_alloc = _295;\n Alloc _300 = path_alloc;\n sh_row_alloc[th_ix] = _300;\n break;\n }\n case 2u:\n case 3u:\n {\n uint _237 = element_ix * 12u;\n uint _238 = _191.conf.tile_alloc.offset + _237;\n PathRef _239 = PathRef(_238);\n PathRef path_ref = _239;\n Alloc param_3;\n param_3.offset = _191.conf.tile_alloc.offset;\n PathRef _247 = path_ref;\n PathRef param_4 = _247;\n Path _248 = Path_read(param_3, param_4);\n Path path = _248;\n uint _255 = path.bbox.z;\n uint _257 = path.bbox.x;\n uint _258 = _255 - _257;\n sh_row_width[th_ix] = _258;\n uint _263 = path.bbox.w;\n uint _265 = path.bbox.y;\n uint _266 = _263 - _265;\n row_count = _266;\n uint _267 = row_count;\n bool _268 = _267 == 1u;\n bool _274;\n if (_268)\n {\n uint _272 = path.bbox.y;\n bool _273 = _272 > 0u;\n _274 = _273;\n }\n else\n {\n _274 = _268;\n }\n if (_274)\n {\n row_count = 0u;\n }\n uint _279 = path.bbox.z;\n uint _281 = path.bbox.x;\n uint _282 = _279 - _281;\n uint _284 = path.bbox.w;\n uint _286 = path.bbox.y;\n uint _287 = _284 - _286;\n uint _288 = _282 * _287;\n uint _290 = _288 * 8u;\n uint _293 = path.tiles.offset;\n uint param_5 = _293;\n uint param_6 = _290;\n Alloc _295 = new_alloc(param_5, param_6);\n Alloc path_alloc = _295;\n Alloc _300 = path_alloc;\n sh_row_alloc[th_ix] = _300;\n break;\n }\n }\n }\n sh_row_count[th_ix] = row_count;\n for (uint i = 0u; i < 7u; i++)\n {\n barrier();\n if (th_ix >= uint(1 << int(i)))\n {\n row_count += sh_row_count[th_ix - uint(1 << int(i))];\n }\n barrier();\n sh_row_count[th_ix] = row_count;\n }\n barrier();\n uint total_rows = sh_row_count[127];\n uint _396;\n for (uint row = th_ix; row < total_rows; row += 128u)\n {\n uint el_ix = 0u;\n for (uint i_1 = 0u; i_1 < 7u; i_1++)\n {\n uint probe = el_ix + uint(64 >> int(i_1));\n if (row >= sh_row_count[probe - 1u])\n {\n el_ix = probe;\n }\n }\n uint width = sh_row_width[el_ix];\n if (width > 0u)\n {\n Alloc tiles_alloc = sh_row_alloc[el_ix];\n if (el_ix > 0u)\n {\n _396 = sh_row_count[el_ix - 1u];\n }\n else\n {\n _396 = 0u;\n }\n uint seq_ix = row - _396;\n uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);\n Alloc param_7 = tiles_alloc;\n uint param_8 = tile_el_ix;\n uint sum = read_mem(param_7, param_8);\n for (uint x = 1u; x < width; x++)\n {\n tile_el_ix += 2u;\n Alloc param_9 = tiles_alloc;\n uint param_10 = tile_el_ix;\n sum += read_mem(param_9, param_10);\n Alloc param_11 = tiles_alloc;\n uint param_12 = tile_el_ix;\n uint param_13 = sum;\n write_mem(param_11, param_12, param_13);\n }\n }\n }\n}\n\n", + GLSL310ES: "#version 310 es\nlayout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;\n\nstruct Alloc\n{\n uint offset;\n};\n\nstruct AnnotatedRef\n{\n uint offset;\n};\n\nstruct AnnotatedTag\n{\n uint tag;\n uint flags;\n};\n\nstruct PathRef\n{\n uint offset;\n};\n\nstruct TileRef\n{\n uint offset;\n};\n\nstruct Path\n{\n uvec4 bbox;\n TileRef tiles;\n};\n\nstruct Config\n{\n uint n_elements;\n uint n_pathseg;\n uint width_in_tiles;\n uint height_in_tiles;\n Alloc tile_alloc;\n Alloc bin_alloc;\n Alloc ptcl_alloc;\n Alloc pathseg_alloc;\n Alloc anno_alloc;\n Alloc trans_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n uint mem_offset;\n uint mem_error;\n uint memory[];\n} _77;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n Config conf;\n} _191;\n\nshared uint sh_row_width[128];\nshared Alloc sh_row_alloc[128];\nshared uint sh_row_count[128];\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n Alloc param = alloc;\n uint param_1 = offset;\n if (!touch_mem(param, param_1))\n {\n return 0u;\n }\n uint v = _77.memory[offset];\n return v;\n}\n\nAnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint tag_and_flags = read_mem(param, param_1);\n return AnnotatedTag(tag_and_flags & 65535u, tag_and_flags >> uint(16));\n}\n\nuint fill_mode_from_flags(uint flags)\n{\n return flags & 1u;\n}\n\nPath Path_read(Alloc a, PathRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n Alloc param_4 = a;\n uint param_5 = ix + 2u;\n uint raw2 = read_mem(param_4, param_5);\n Path s;\n s.bbox = uvec4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));\n s.tiles = TileRef(raw2);\n return s;\n}\n\nAlloc new_alloc(uint offset, uint size)\n{\n Alloc a;\n a.offset = offset;\n return a;\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n Alloc param = alloc;\n uint param_1 = offset;\n if (!touch_mem(param, param_1))\n {\n return;\n }\n _77.memory[offset] = val;\n}\n\nvoid main()\n{\n if (_77.mem_error != 0u)\n {\n return;\n }\n uint th_ix = gl_LocalInvocationID.x;\n uint element_ix = gl_GlobalInvocationID.x;\n AnnotatedRef ref = AnnotatedRef(_191.conf.anno_alloc.offset + (element_ix * 32u));\n uint row_count = 0u;\n if (element_ix < _191.conf.n_elements)\n {\n Alloc param;\n param.offset = _191.conf.anno_alloc.offset;\n AnnotatedRef param_1 = ref;\n AnnotatedTag tag = Annotated_tag(param, param_1);\n switch (tag.tag)\n {\n case 2u:\n case 3u:\n case 1u:\n {\n uint param_2 = tag.flags;\n if (fill_mode_from_flags(param_2) != 0u)\n {\n break;\n }\n PathRef path_ref = PathRef(_191.conf.tile_alloc.offset + (element_ix * 12u));\n Alloc param_3;\n param_3.offset = _191.conf.tile_alloc.offset;\n PathRef param_4 = path_ref;\n Path path = Path_read(param_3, param_4);\n sh_row_width[th_ix] = path.bbox.z - path.bbox.x;\n row_count = path.bbox.w - path.bbox.y;\n bool _267 = row_count == 1u;\n bool _273;\n if (_267)\n {\n _273 = path.bbox.y > 0u;\n }\n else\n {\n _273 = _267;\n }\n if (_273)\n {\n row_count = 0u;\n }\n uint param_5 = path.tiles.offset;\n uint param_6 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;\n Alloc path_alloc = new_alloc(param_5, param_6);\n sh_row_alloc[th_ix] = path_alloc;\n break;\n }\n }\n }\n sh_row_count[th_ix] = row_count;\n for (uint i = 0u; i < 7u; i++)\n {\n barrier();\n if (th_ix >= uint(1 << int(i)))\n {\n row_count += sh_row_count[th_ix - uint(1 << int(i))];\n }\n barrier();\n sh_row_count[th_ix] = row_count;\n }\n barrier();\n uint total_rows = sh_row_count[127];\n uint _395;\n for (uint row = th_ix; row < total_rows; row += 128u)\n {\n uint el_ix = 0u;\n for (uint i_1 = 0u; i_1 < 7u; i_1++)\n {\n uint probe = el_ix + uint(64 >> int(i_1));\n if (row >= sh_row_count[probe - 1u])\n {\n el_ix = probe;\n }\n }\n uint width = sh_row_width[el_ix];\n if (width > 0u)\n {\n Alloc tiles_alloc = sh_row_alloc[el_ix];\n if (el_ix > 0u)\n {\n _395 = sh_row_count[el_ix - 1u];\n }\n else\n {\n _395 = 0u;\n }\n uint seq_ix = row - _395;\n uint tile_el_ix = ((tiles_alloc.offset >> uint(2)) + 1u) + ((seq_ix * 2u) * width);\n Alloc param_7 = tiles_alloc;\n uint param_8 = tile_el_ix;\n uint sum = read_mem(param_7, param_8);\n for (uint x = 1u; x < width; x++)\n {\n tile_el_ix += 2u;\n Alloc param_9 = tiles_alloc;\n uint param_10 = tile_el_ix;\n sum += read_mem(param_9, param_10);\n Alloc param_11 = tiles_alloc;\n uint param_12 = tile_el_ix;\n uint param_13 = sum;\n write_mem(param_11, param_12, param_13);\n }\n }\n }\n}\n\n", } shader_binning_comp = driver.ShaderSources{ Name: "binning.comp", @@ -66,7 +66,7 @@ var ( } shader_coarse_comp = driver.ShaderSources{ Name: "coarse.comp", - GLSL310ES: "#version 310 es\nlayout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;\n\nstruct Alloc\n{\n uint offset;\n};\n\nstruct MallocResult\n{\n Alloc alloc;\n bool failed;\n};\n\nstruct AnnoImageRef\n{\n uint offset;\n};\n\nstruct AnnoImage\n{\n vec4 bbox;\n float linewidth;\n uint index;\n ivec2 offset;\n};\n\nstruct AnnoColorRef\n{\n uint offset;\n};\n\nstruct AnnoColor\n{\n vec4 bbox;\n float linewidth;\n uint rgba_color;\n};\n\nstruct AnnoBeginClipRef\n{\n uint offset;\n};\n\nstruct AnnoBeginClip\n{\n vec4 bbox;\n float linewidth;\n};\n\nstruct AnnotatedRef\n{\n uint offset;\n};\n\nstruct AnnotatedTag\n{\n uint tag;\n uint flags;\n};\n\nstruct BinInstanceRef\n{\n uint offset;\n};\n\nstruct BinInstance\n{\n uint element_ix;\n};\n\nstruct PathRef\n{\n uint offset;\n};\n\nstruct TileRef\n{\n uint offset;\n};\n\nstruct Path\n{\n uvec4 bbox;\n TileRef tiles;\n};\n\nstruct TileSegRef\n{\n uint offset;\n};\n\nstruct Tile\n{\n TileSegRef tile;\n int backdrop;\n};\n\nstruct CmdStrokeRef\n{\n uint offset;\n};\n\nstruct CmdStroke\n{\n uint tile_ref;\n float half_width;\n};\n\nstruct CmdFillRef\n{\n uint offset;\n};\n\nstruct CmdFill\n{\n uint tile_ref;\n int backdrop;\n};\n\nstruct CmdColorRef\n{\n uint offset;\n};\n\nstruct CmdColor\n{\n uint rgba_color;\n};\n\nstruct CmdImageRef\n{\n uint offset;\n};\n\nstruct CmdImage\n{\n uint index;\n ivec2 offset;\n};\n\nstruct CmdAlphaRef\n{\n uint offset;\n};\n\nstruct CmdAlpha\n{\n float alpha;\n};\n\nstruct CmdJumpRef\n{\n uint offset;\n};\n\nstruct CmdJump\n{\n uint new_ref;\n};\n\nstruct CmdRef\n{\n uint offset;\n};\n\nstruct Config\n{\n uint n_elements;\n uint n_pathseg;\n uint width_in_tiles;\n uint height_in_tiles;\n Alloc tile_alloc;\n Alloc bin_alloc;\n Alloc ptcl_alloc;\n Alloc pathseg_alloc;\n Alloc anno_alloc;\n Alloc trans_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n uint mem_offset;\n uint mem_error;\n uint memory[];\n} _276;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n Config conf;\n} _1033;\n\nshared uint sh_bitmaps[4][128];\nshared Alloc sh_part_elements[128];\nshared uint sh_part_count[128];\nshared uint sh_elements[128];\nshared uint sh_tile_stride[128];\nshared uint sh_tile_width[128];\nshared uint sh_tile_x0[128];\nshared uint sh_tile_y0[128];\nshared uint sh_tile_base[128];\nshared uint sh_tile_count[128];\n\nAlloc new_alloc(uint offset, uint size)\n{\n Alloc a;\n a.offset = offset;\n return a;\n}\n\nAlloc slice_mem(Alloc a, uint offset, uint size)\n{\n uint param = a.offset + offset;\n uint param_1 = size;\n return new_alloc(param, param_1);\n}\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n Alloc param = alloc;\n uint param_1 = offset;\n if (!touch_mem(param, param_1))\n {\n return 0u;\n }\n uint v = _276.memory[offset];\n return v;\n}\n\nBinInstanceRef BinInstance_index(BinInstanceRef ref, uint index)\n{\n return BinInstanceRef(ref.offset + (index * 4u));\n}\n\nBinInstance BinInstance_read(Alloc a, BinInstanceRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n BinInstance s;\n s.element_ix = raw0;\n return s;\n}\n\nAnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint tag_and_flags = read_mem(param, param_1);\n return AnnotatedTag(tag_and_flags & 65535u, tag_and_flags >> uint(16));\n}\n\nPath Path_read(Alloc a, PathRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n Alloc param_4 = a;\n uint param_5 = ix + 2u;\n uint raw2 = read_mem(param_4, param_5);\n Path s;\n s.bbox = uvec4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));\n s.tiles = TileRef(raw2);\n return s;\n}\n\nvoid write_tile_alloc(uint el_ix, Alloc a)\n{\n}\n\nAlloc read_tile_alloc(uint el_ix)\n{\n uint param = 0u;\n uint param_1 = uint(int(uint(_276.memory.length())) * 4);\n return new_alloc(param, param_1);\n}\n\nTile Tile_read(Alloc a, TileRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n Tile s;\n s.tile = TileSegRef(raw0);\n s.backdrop = int(raw1);\n return s;\n}\n\nAnnoColor AnnoColor_read(Alloc a, AnnoColorRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n Alloc param_4 = a;\n uint param_5 = ix + 2u;\n uint raw2 = read_mem(param_4, param_5);\n Alloc param_6 = a;\n uint param_7 = ix + 3u;\n uint raw3 = read_mem(param_6, param_7);\n Alloc param_8 = a;\n uint param_9 = ix + 4u;\n uint raw4 = read_mem(param_8, param_9);\n Alloc param_10 = a;\n uint param_11 = ix + 5u;\n uint raw5 = read_mem(param_10, param_11);\n AnnoColor s;\n s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n s.linewidth = uintBitsToFloat(raw4);\n s.rgba_color = raw5;\n return s;\n}\n\nAnnoColor Annotated_Color_read(Alloc a, AnnotatedRef ref)\n{\n Alloc param = a;\n AnnoColorRef param_1 = AnnoColorRef(ref.offset + 4u);\n return AnnoColor_read(param, param_1);\n}\n\nMallocResult malloc(uint size)\n{\n MallocResult r;\n r.failed = false;\n uint _282 = atomicAdd(_276.mem_offset, size);\n uint offset = _282;\n uint param = offset;\n uint param_1 = size;\n r.alloc = new_alloc(param, param_1);\n if ((offset + size) > uint(int(uint(_276.memory.length())) * 4))\n {\n r.failed = true;\n uint _303 = atomicMax(_276.mem_error, 1u);\n return r;\n }\n return r;\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n Alloc param = alloc;\n uint param_1 = offset;\n if (!touch_mem(param, param_1))\n {\n return;\n }\n _276.memory[offset] = val;\n}\n\nvoid CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint param_2 = s.new_ref;\n write_mem(param, param_1, param_2);\n}\n\nvoid Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 9u;\n write_mem(param, param_1, param_2);\n Alloc param_3 = a;\n CmdJumpRef param_4 = CmdJumpRef(ref.offset + 4u);\n CmdJump param_5 = s;\n CmdJump_write(param_3, param_4, param_5);\n}\n\nbool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit)\n{\n if (cmd_ref.offset < cmd_limit)\n {\n return true;\n }\n uint param = 1024u;\n MallocResult _993 = malloc(param);\n MallocResult new_cmd = _993;\n if (new_cmd.failed)\n {\n return false;\n }\n CmdJump jump = CmdJump(new_cmd.alloc.offset);\n Alloc param_1 = cmd_alloc;\n CmdRef param_2 = cmd_ref;\n CmdJump param_3 = jump;\n Cmd_Jump_write(param_1, param_2, param_3);\n cmd_alloc = new_cmd.alloc;\n cmd_ref = CmdRef(cmd_alloc.offset);\n cmd_limit = (cmd_alloc.offset + 1024u) - 36u;\n return true;\n}\n\nuint fill_mode_from_flags(uint flags)\n{\n return flags & 1u;\n}\n\nvoid CmdFill_write(Alloc a, CmdFillRef ref, CmdFill s)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint param_2 = s.tile_ref;\n write_mem(param, param_1, param_2);\n Alloc param_3 = a;\n uint param_4 = ix + 1u;\n uint param_5 = uint(s.backdrop);\n write_mem(param_3, param_4, param_5);\n}\n\nvoid Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 1u;\n write_mem(param, param_1, param_2);\n Alloc param_3 = a;\n CmdFillRef param_4 = CmdFillRef(ref.offset + 4u);\n CmdFill param_5 = s;\n CmdFill_write(param_3, param_4, param_5);\n}\n\nvoid Cmd_Solid_write(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 3u;\n write_mem(param, param_1, param_2);\n}\n\nvoid CmdStroke_write(Alloc a, CmdStrokeRef ref, CmdStroke s)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint param_2 = s.tile_ref;\n write_mem(param, param_1, param_2);\n Alloc param_3 = a;\n uint param_4 = ix + 1u;\n uint param_5 = floatBitsToUint(s.half_width);\n write_mem(param_3, param_4, param_5);\n}\n\nvoid Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 2u;\n write_mem(param, param_1, param_2);\n Alloc param_3 = a;\n CmdStrokeRef param_4 = CmdStrokeRef(ref.offset + 4u);\n CmdStroke param_5 = s;\n CmdStroke_write(param_3, param_4, param_5);\n}\n\nvoid CmdColor_write(Alloc a, CmdColorRef ref, CmdColor s)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint param_2 = s.rgba_color;\n write_mem(param, param_1, param_2);\n}\n\nvoid Cmd_Color_write(Alloc a, CmdRef ref, CmdColor s)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 5u;\n write_mem(param, param_1, param_2);\n Alloc param_3 = a;\n CmdColorRef param_4 = CmdColorRef(ref.offset + 4u);\n CmdColor param_5 = s;\n CmdColor_write(param_3, param_4, param_5);\n}\n\nAnnoImage AnnoImage_read(Alloc a, AnnoImageRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n Alloc param_4 = a;\n uint param_5 = ix + 2u;\n uint raw2 = read_mem(param_4, param_5);\n Alloc param_6 = a;\n uint param_7 = ix + 3u;\n uint raw3 = read_mem(param_6, param_7);\n Alloc param_8 = a;\n uint param_9 = ix + 4u;\n uint raw4 = read_mem(param_8, param_9);\n Alloc param_10 = a;\n uint param_11 = ix + 5u;\n uint raw5 = read_mem(param_10, param_11);\n Alloc param_12 = a;\n uint param_13 = ix + 6u;\n uint raw6 = read_mem(param_12, param_13);\n AnnoImage s;\n s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n s.linewidth = uintBitsToFloat(raw4);\n s.index = raw5;\n s.offset = ivec2(int(raw6 << uint(16)) >> 16, int(raw6) >> 16);\n return s;\n}\n\nAnnoImage Annotated_Image_read(Alloc a, AnnotatedRef ref)\n{\n Alloc param = a;\n AnnoImageRef param_1 = AnnoImageRef(ref.offset + 4u);\n return AnnoImage_read(param, param_1);\n}\n\nvoid CmdImage_write(Alloc a, CmdImageRef ref, CmdImage s)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint param_2 = s.index;\n write_mem(param, param_1, param_2);\n Alloc param_3 = a;\n uint param_4 = ix + 1u;\n uint param_5 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16));\n write_mem(param_3, param_4, param_5);\n}\n\nvoid Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 6u;\n write_mem(param, param_1, param_2);\n Alloc param_3 = a;\n CmdImageRef param_4 = CmdImageRef(ref.offset + 4u);\n CmdImage param_5 = s;\n CmdImage_write(param_3, param_4, param_5);\n}\n\nAnnoBeginClip AnnoBeginClip_read(Alloc a, AnnoBeginClipRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n Alloc param_4 = a;\n uint param_5 = ix + 2u;\n uint raw2 = read_mem(param_4, param_5);\n Alloc param_6 = a;\n uint param_7 = ix + 3u;\n uint raw3 = read_mem(param_6, param_7);\n Alloc param_8 = a;\n uint param_9 = ix + 4u;\n uint raw4 = read_mem(param_8, param_9);\n AnnoBeginClip s;\n s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n s.linewidth = uintBitsToFloat(raw4);\n return s;\n}\n\nAnnoBeginClip Annotated_BeginClip_read(Alloc a, AnnotatedRef ref)\n{\n Alloc param = a;\n AnnoBeginClipRef param_1 = AnnoBeginClipRef(ref.offset + 4u);\n return AnnoBeginClip_read(param, param_1);\n}\n\nvoid CmdAlpha_write(Alloc a, CmdAlphaRef ref, CmdAlpha s)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint param_2 = floatBitsToUint(s.alpha);\n write_mem(param, param_1, param_2);\n}\n\nvoid Cmd_Alpha_write(Alloc a, CmdRef ref, CmdAlpha s)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 4u;\n write_mem(param, param_1, param_2);\n Alloc param_3 = a;\n CmdAlphaRef param_4 = CmdAlphaRef(ref.offset + 4u);\n CmdAlpha param_5 = s;\n CmdAlpha_write(param_3, param_4, param_5);\n}\n\nvoid Cmd_BeginClip_write(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 7u;\n write_mem(param, param_1, param_2);\n}\n\nvoid Cmd_EndClip_write(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 8u;\n write_mem(param, param_1, param_2);\n}\n\nvoid Cmd_End_write(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 0u;\n write_mem(param, param_1, param_2);\n}\n\nvoid main()\n{\n if (_276.mem_error != 0u)\n {\n return;\n }\n uint width_in_bins = ((_1033.conf.width_in_tiles + 16u) - 1u) / 16u;\n uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x;\n uint partition_ix = 0u;\n uint n_partitions = ((_1033.conf.n_elements + 128u) - 1u) / 128u;\n uint th_ix = gl_LocalInvocationID.x;\n uint bin_tile_x = 16u * gl_WorkGroupID.x;\n uint bin_tile_y = 8u * gl_WorkGroupID.y;\n uint tile_x = gl_LocalInvocationID.x % 16u;\n uint tile_y = gl_LocalInvocationID.x / 16u;\n uint this_tile_ix = (((bin_tile_y + tile_y) * _1033.conf.width_in_tiles) + bin_tile_x) + tile_x;\n Alloc param;\n param.offset = _1033.conf.ptcl_alloc.offset;\n uint param_1 = this_tile_ix * 1024u;\n uint param_2 = 1024u;\n Alloc cmd_alloc = slice_mem(param, param_1, param_2);\n CmdRef cmd_ref = CmdRef(cmd_alloc.offset);\n uint cmd_limit = (cmd_ref.offset + 1024u) - 36u;\n uint clip_depth = 0u;\n uint clip_zero_depth = 0u;\n uint clip_one_mask = 0u;\n uint rd_ix = 0u;\n uint wr_ix = 0u;\n uint part_start_ix = 0u;\n uint ready_ix = 0u;\n Alloc param_3;\n Alloc param_5;\n uint _1309;\n uint element_ix;\n AnnotatedRef ref;\n Alloc param_13;\n Alloc param_15;\n uint tile_count;\n Alloc param_21;\n uint _1623;\n bool include_tile;\n Alloc param_26;\n Tile tile_1;\n Alloc param_31;\n Alloc param_51;\n Alloc param_71;\n while (true)\n {\n for (uint i = 0u; i < 4u; i++)\n {\n sh_bitmaps[i][th_ix] = 0u;\n }\n bool _1361;\n for (;;)\n {\n if ((ready_ix == wr_ix) && (partition_ix < n_partitions))\n {\n part_start_ix = ready_ix;\n uint count = 0u;\n bool _1159 = th_ix < 128u;\n bool _1167;\n if (_1159)\n {\n _1167 = (partition_ix + th_ix) < n_partitions;\n }\n else\n {\n _1167 = _1159;\n }\n if (_1167)\n {\n uint in_ix = (_1033.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 128u) + bin_ix) * 2u);\n param_3.offset = _1033.conf.bin_alloc.offset;\n uint param_4 = in_ix;\n count = read_mem(param_3, param_4);\n param_5.offset = _1033.conf.bin_alloc.offset;\n uint param_6 = in_ix + 1u;\n uint offset = read_mem(param_5, param_6);\n uint param_7 = offset;\n uint param_8 = count * 4u;\n sh_part_elements[th_ix] = new_alloc(param_7, param_8);\n }\n for (uint i_1 = 0u; i_1 < 7u; i_1++)\n {\n if (th_ix < 128u)\n {\n sh_part_count[th_ix] = count;\n }\n barrier();\n if (th_ix < 128u)\n {\n if (th_ix >= uint(1 << int(i_1)))\n {\n count += sh_part_count[th_ix - uint(1 << int(i_1))];\n }\n }\n barrier();\n }\n if (th_ix < 128u)\n {\n sh_part_count[th_ix] = part_start_ix + count;\n }\n barrier();\n ready_ix = sh_part_count[127];\n partition_ix += 128u;\n }\n uint ix = rd_ix + th_ix;\n if ((ix >= wr_ix) && (ix < ready_ix))\n {\n uint part_ix = 0u;\n for (uint i_2 = 0u; i_2 < 7u; i_2++)\n {\n uint probe = part_ix + uint(64 >> int(i_2));\n if (ix >= sh_part_count[probe - 1u])\n {\n part_ix = probe;\n }\n }\n if (part_ix > 0u)\n {\n _1309 = sh_part_count[part_ix - 1u];\n }\n else\n {\n _1309 = part_start_ix;\n }\n ix -= _1309;\n Alloc bin_alloc = sh_part_elements[part_ix];\n BinInstanceRef inst_ref = BinInstanceRef(bin_alloc.offset);\n BinInstanceRef param_9 = inst_ref;\n uint param_10 = ix;\n Alloc param_11 = bin_alloc;\n BinInstanceRef param_12 = BinInstance_index(param_9, param_10);\n BinInstance inst = BinInstance_read(param_11, param_12);\n sh_elements[th_ix] = inst.element_ix;\n }\n barrier();\n wr_ix = min((rd_ix + 128u), ready_ix);\n bool _1351 = (wr_ix - rd_ix) < 128u;\n if (_1351)\n {\n _1361 = (wr_ix < ready_ix) || (partition_ix < n_partitions);\n }\n else\n {\n _1361 = _1351;\n }\n if (_1361)\n {\n continue;\n }\n else\n {\n break;\n }\n }\n uint tag = 0u;\n if ((th_ix + rd_ix) < wr_ix)\n {\n element_ix = sh_elements[th_ix];\n ref = AnnotatedRef(_1033.conf.anno_alloc.offset + (element_ix * 32u));\n param_13.offset = _1033.conf.anno_alloc.offset;\n AnnotatedRef param_14 = ref;\n tag = Annotated_tag(param_13, param_14).tag;\n }\n switch (tag)\n {\n case 1u:\n case 2u:\n case 3u:\n case 4u:\n {\n uint path_ix = element_ix;\n param_15.offset = _1033.conf.tile_alloc.offset;\n PathRef param_16 = PathRef(_1033.conf.tile_alloc.offset + (path_ix * 12u));\n Path path = Path_read(param_15, param_16);\n uint stride = path.bbox.z - path.bbox.x;\n sh_tile_stride[th_ix] = stride;\n int dx = int(path.bbox.x) - int(bin_tile_x);\n int dy = int(path.bbox.y) - int(bin_tile_y);\n int x0 = clamp(dx, 0, 16);\n int y0 = clamp(dy, 0, 8);\n int x1 = clamp(int(path.bbox.z) - int(bin_tile_x), 0, 16);\n int y1 = clamp(int(path.bbox.w) - int(bin_tile_y), 0, 8);\n sh_tile_width[th_ix] = uint(x1 - x0);\n sh_tile_x0[th_ix] = uint(x0);\n sh_tile_y0[th_ix] = uint(y0);\n tile_count = uint(x1 - x0) * uint(y1 - y0);\n uint base = path.tiles.offset - (((uint(dy) * stride) + uint(dx)) * 8u);\n sh_tile_base[th_ix] = base;\n uint param_17 = path.tiles.offset;\n uint param_18 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;\n Alloc path_alloc = new_alloc(param_17, param_18);\n uint param_19 = th_ix;\n Alloc param_20 = path_alloc;\n write_tile_alloc(param_19, param_20);\n break;\n }\n default:\n {\n tile_count = 0u;\n break;\n }\n }\n sh_tile_count[th_ix] = tile_count;\n for (uint i_3 = 0u; i_3 < 7u; i_3++)\n {\n barrier();\n if (th_ix >= uint(1 << int(i_3)))\n {\n tile_count += sh_tile_count[th_ix - uint(1 << int(i_3))];\n }\n barrier();\n sh_tile_count[th_ix] = tile_count;\n }\n barrier();\n uint total_tile_count = sh_tile_count[127];\n for (uint ix_1 = th_ix; ix_1 < total_tile_count; ix_1 += 128u)\n {\n uint el_ix = 0u;\n for (uint i_4 = 0u; i_4 < 7u; i_4++)\n {\n uint probe_1 = el_ix + uint(64 >> int(i_4));\n if (ix_1 >= sh_tile_count[probe_1 - 1u])\n {\n el_ix = probe_1;\n }\n }\n AnnotatedRef ref_1 = AnnotatedRef(_1033.conf.anno_alloc.offset + (sh_elements[el_ix] * 32u));\n param_21.offset = _1033.conf.anno_alloc.offset;\n AnnotatedRef param_22 = ref_1;\n uint tag_1 = Annotated_tag(param_21, param_22).tag;\n if (el_ix > 0u)\n {\n _1623 = sh_tile_count[el_ix - 1u];\n }\n else\n {\n _1623 = 0u;\n }\n uint seq_ix = ix_1 - _1623;\n uint width = sh_tile_width[el_ix];\n uint x = sh_tile_x0[el_ix] + (seq_ix % width);\n uint y = sh_tile_y0[el_ix] + (seq_ix / width);\n if ((tag_1 == 3u) || (tag_1 == 4u))\n {\n include_tile = true;\n }\n else\n {\n uint param_23 = el_ix;\n Alloc param_24 = read_tile_alloc(param_23);\n TileRef param_25 = TileRef(sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u));\n Tile tile = Tile_read(param_24, param_25);\n bool _1684 = tile.tile.offset != 0u;\n bool _1691;\n if (!_1684)\n {\n _1691 = tile.backdrop != 0;\n }\n else\n {\n _1691 = _1684;\n }\n include_tile = _1691;\n }\n if (include_tile)\n {\n uint el_slice = el_ix / 32u;\n uint el_mask = uint(1 << int(el_ix & 31u));\n uint _1711 = atomicOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask);\n }\n }\n barrier();\n uint slice_ix = 0u;\n uint bitmap = sh_bitmaps[0][th_ix];\n while (true)\n {\n if (bitmap == 0u)\n {\n slice_ix++;\n if (slice_ix == 4u)\n {\n break;\n }\n bitmap = sh_bitmaps[slice_ix][th_ix];\n if (bitmap == 0u)\n {\n continue;\n }\n }\n uint element_ref_ix = (slice_ix * 32u) + uint(findLSB(bitmap));\n uint element_ix_1 = sh_elements[element_ref_ix];\n bitmap &= (bitmap - 1u);\n ref = AnnotatedRef(_1033.conf.anno_alloc.offset + (element_ix_1 * 32u));\n param_26.offset = _1033.conf.anno_alloc.offset;\n AnnotatedRef param_27 = ref;\n AnnotatedTag tag_2 = Annotated_tag(param_26, param_27);\n if (clip_zero_depth == 0u)\n {\n switch (tag_2.tag)\n {\n case 1u:\n {\n uint param_28 = element_ref_ix;\n Alloc param_29 = read_tile_alloc(param_28);\n TileRef param_30 = TileRef(sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u));\n tile_1 = Tile_read(param_29, param_30);\n param_31.offset = _1033.conf.anno_alloc.offset;\n AnnotatedRef param_32 = ref;\n AnnoColor fill = Annotated_Color_read(param_31, param_32);\n Alloc param_33 = cmd_alloc;\n CmdRef param_34 = cmd_ref;\n uint param_35 = cmd_limit;\n bool _1821 = alloc_cmd(param_33, param_34, param_35);\n cmd_alloc = param_33;\n cmd_ref = param_34;\n cmd_limit = param_35;\n if (!_1821)\n {\n break;\n }\n uint param_36 = tag_2.flags;\n if (fill_mode_from_flags(param_36) == 0u)\n {\n if (tile_1.tile.offset != 0u)\n {\n CmdFill cmd_fill = CmdFill(tile_1.tile.offset, tile_1.backdrop);\n Alloc param_37 = cmd_alloc;\n CmdRef param_38 = cmd_ref;\n CmdFill param_39 = cmd_fill;\n Cmd_Fill_write(param_37, param_38, param_39);\n cmd_ref.offset += 12u;\n }\n else\n {\n Alloc param_40 = cmd_alloc;\n CmdRef param_41 = cmd_ref;\n Cmd_Solid_write(param_40, param_41);\n cmd_ref.offset += 4u;\n }\n }\n else\n {\n CmdStroke cmd_stroke = CmdStroke(tile_1.tile.offset, 0.5 * fill.linewidth);\n Alloc param_42 = cmd_alloc;\n CmdRef param_43 = cmd_ref;\n CmdStroke param_44 = cmd_stroke;\n Cmd_Stroke_write(param_42, param_43, param_44);\n cmd_ref.offset += 12u;\n }\n Alloc param_45 = cmd_alloc;\n CmdRef param_46 = cmd_ref;\n CmdColor param_47 = CmdColor(fill.rgba_color);\n Cmd_Color_write(param_45, param_46, param_47);\n cmd_ref.offset += 8u;\n break;\n }\n case 2u:\n {\n uint param_48 = element_ref_ix;\n Alloc param_49 = read_tile_alloc(param_48);\n TileRef param_50 = TileRef(sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u));\n tile_1 = Tile_read(param_49, param_50);\n param_51.offset = _1033.conf.anno_alloc.offset;\n AnnotatedRef param_52 = ref;\n AnnoImage fill_img = Annotated_Image_read(param_51, param_52);\n Alloc param_53 = cmd_alloc;\n CmdRef param_54 = cmd_ref;\n uint param_55 = cmd_limit;\n bool _1936 = alloc_cmd(param_53, param_54, param_55);\n cmd_alloc = param_53;\n cmd_ref = param_54;\n cmd_limit = param_55;\n if (!_1936)\n {\n break;\n }\n uint param_56 = tag_2.flags;\n if (fill_mode_from_flags(param_56) == 0u)\n {\n if (tile_1.tile.offset != 0u)\n {\n CmdFill cmd_fill_1 = CmdFill(tile_1.tile.offset, tile_1.backdrop);\n Alloc param_57 = cmd_alloc;\n CmdRef param_58 = cmd_ref;\n CmdFill param_59 = cmd_fill_1;\n Cmd_Fill_write(param_57, param_58, param_59);\n cmd_ref.offset += 12u;\n }\n else\n {\n Alloc param_60 = cmd_alloc;\n CmdRef param_61 = cmd_ref;\n Cmd_Solid_write(param_60, param_61);\n cmd_ref.offset += 4u;\n }\n }\n else\n {\n CmdStroke cmd_stroke_1 = CmdStroke(tile_1.tile.offset, 0.5 * fill_img.linewidth);\n Alloc param_62 = cmd_alloc;\n CmdRef param_63 = cmd_ref;\n CmdStroke param_64 = cmd_stroke_1;\n Cmd_Stroke_write(param_62, param_63, param_64);\n cmd_ref.offset += 12u;\n }\n Alloc param_65 = cmd_alloc;\n CmdRef param_66 = cmd_ref;\n CmdImage param_67 = CmdImage(fill_img.index, fill_img.offset);\n Cmd_Image_write(param_65, param_66, param_67);\n cmd_ref.offset += 12u;\n break;\n }\n case 3u:\n {\n uint param_68 = element_ref_ix;\n Alloc param_69 = read_tile_alloc(param_68);\n TileRef param_70 = TileRef(sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u));\n tile_1 = Tile_read(param_69, param_70);\n bool _2039 = tile_1.tile.offset == 0u;\n bool _2045;\n if (_2039)\n {\n _2045 = tile_1.backdrop == 0;\n }\n else\n {\n _2045 = _2039;\n }\n if (_2045)\n {\n clip_zero_depth = clip_depth + 1u;\n }\n else\n {\n if ((tile_1.tile.offset == 0u) && (clip_depth < 32u))\n {\n clip_one_mask |= uint(1 << int(clip_depth));\n }\n else\n {\n param_71.offset = _1033.conf.anno_alloc.offset;\n AnnotatedRef param_72 = ref;\n AnnoBeginClip begin_clip = Annotated_BeginClip_read(param_71, param_72);\n Alloc param_73 = cmd_alloc;\n CmdRef param_74 = cmd_ref;\n uint param_75 = cmd_limit;\n bool _2080 = alloc_cmd(param_73, param_74, param_75);\n cmd_alloc = param_73;\n cmd_ref = param_74;\n cmd_limit = param_75;\n if (!_2080)\n {\n break;\n }\n uint param_76 = tag_2.flags;\n if (fill_mode_from_flags(param_76) == 0u)\n {\n if (tile_1.tile.offset != 0u)\n {\n CmdFill cmd_fill_2 = CmdFill(tile_1.tile.offset, tile_1.backdrop);\n Alloc param_77 = cmd_alloc;\n CmdRef param_78 = cmd_ref;\n CmdFill param_79 = cmd_fill_2;\n Cmd_Fill_write(param_77, param_78, param_79);\n cmd_ref.offset += 12u;\n }\n else\n {\n float alpha = (tile_1.backdrop == 0) ? 0.0 : 1.0;\n Alloc param_80 = cmd_alloc;\n CmdRef param_81 = cmd_ref;\n CmdAlpha param_82 = CmdAlpha(alpha);\n Cmd_Alpha_write(param_80, param_81, param_82);\n cmd_ref.offset += 8u;\n }\n }\n else\n {\n CmdStroke cmd_stroke_2 = CmdStroke(tile_1.tile.offset, 0.5 * begin_clip.linewidth);\n Alloc param_83 = cmd_alloc;\n CmdRef param_84 = cmd_ref;\n CmdStroke param_85 = cmd_stroke_2;\n Cmd_Stroke_write(param_83, param_84, param_85);\n cmd_ref.offset += 12u;\n }\n Alloc param_86 = cmd_alloc;\n CmdRef param_87 = cmd_ref;\n Cmd_BeginClip_write(param_86, param_87);\n cmd_ref.offset += 4u;\n if (clip_depth < 32u)\n {\n clip_one_mask &= uint(~(1 << int(clip_depth)));\n }\n }\n }\n clip_depth++;\n break;\n }\n case 4u:\n {\n clip_depth--;\n bool _2181 = clip_depth >= 32u;\n bool _2191;\n if (!_2181)\n {\n _2191 = (clip_one_mask & uint(1 << int(clip_depth))) == 0u;\n }\n else\n {\n _2191 = _2181;\n }\n if (_2191)\n {\n Alloc param_88 = cmd_alloc;\n CmdRef param_89 = cmd_ref;\n uint param_90 = cmd_limit;\n bool _2200 = alloc_cmd(param_88, param_89, param_90);\n cmd_alloc = param_88;\n cmd_ref = param_89;\n cmd_limit = param_90;\n if (!_2200)\n {\n break;\n }\n Alloc param_91 = cmd_alloc;\n CmdRef param_92 = cmd_ref;\n Cmd_Solid_write(param_91, param_92);\n cmd_ref.offset += 4u;\n Alloc param_93 = cmd_alloc;\n CmdRef param_94 = cmd_ref;\n Cmd_EndClip_write(param_93, param_94);\n cmd_ref.offset += 4u;\n }\n break;\n }\n }\n }\n else\n {\n switch (tag_2.tag)\n {\n case 3u:\n {\n clip_depth++;\n break;\n }\n case 4u:\n {\n if (clip_depth == clip_zero_depth)\n {\n clip_zero_depth = 0u;\n }\n clip_depth--;\n break;\n }\n }\n }\n }\n barrier();\n rd_ix += 128u;\n if ((rd_ix >= ready_ix) && (partition_ix >= n_partitions))\n {\n break;\n }\n }\n bool _2263 = (bin_tile_x + tile_x) < _1033.conf.width_in_tiles;\n bool _2272;\n if (_2263)\n {\n _2272 = (bin_tile_y + tile_y) < _1033.conf.height_in_tiles;\n }\n else\n {\n _2272 = _2263;\n }\n if (_2272)\n {\n Alloc param_95 = cmd_alloc;\n CmdRef param_96 = cmd_ref;\n Cmd_End_write(param_95, param_96);\n }\n}\n\n", + GLSL310ES: "#version 310 es\nlayout(local_size_x = 128, local_size_y = 1, local_size_z = 1) in;\n\nstruct Alloc\n{\n uint offset;\n};\n\nstruct MallocResult\n{\n Alloc alloc;\n bool failed;\n};\n\nstruct AnnoImageRef\n{\n uint offset;\n};\n\nstruct AnnoImage\n{\n vec4 bbox;\n float linewidth;\n uint index;\n ivec2 offset;\n};\n\nstruct AnnoColorRef\n{\n uint offset;\n};\n\nstruct AnnoColor\n{\n vec4 bbox;\n float linewidth;\n uint rgba_color;\n};\n\nstruct AnnoBeginClipRef\n{\n uint offset;\n};\n\nstruct AnnoBeginClip\n{\n vec4 bbox;\n float linewidth;\n};\n\nstruct AnnotatedRef\n{\n uint offset;\n};\n\nstruct AnnotatedTag\n{\n uint tag;\n uint flags;\n};\n\nstruct BinInstanceRef\n{\n uint offset;\n};\n\nstruct BinInstance\n{\n uint element_ix;\n};\n\nstruct PathRef\n{\n uint offset;\n};\n\nstruct TileRef\n{\n uint offset;\n};\n\nstruct Path\n{\n uvec4 bbox;\n TileRef tiles;\n};\n\nstruct TileSegRef\n{\n uint offset;\n};\n\nstruct Tile\n{\n TileSegRef tile;\n int backdrop;\n};\n\nstruct CmdStrokeRef\n{\n uint offset;\n};\n\nstruct CmdStroke\n{\n uint tile_ref;\n float half_width;\n};\n\nstruct CmdFillRef\n{\n uint offset;\n};\n\nstruct CmdFill\n{\n uint tile_ref;\n int backdrop;\n};\n\nstruct CmdColorRef\n{\n uint offset;\n};\n\nstruct CmdColor\n{\n uint rgba_color;\n};\n\nstruct CmdImageRef\n{\n uint offset;\n};\n\nstruct CmdImage\n{\n uint index;\n ivec2 offset;\n};\n\nstruct CmdJumpRef\n{\n uint offset;\n};\n\nstruct CmdJump\n{\n uint new_ref;\n};\n\nstruct CmdRef\n{\n uint offset;\n};\n\nstruct Config\n{\n uint n_elements;\n uint n_pathseg;\n uint width_in_tiles;\n uint height_in_tiles;\n Alloc tile_alloc;\n Alloc bin_alloc;\n Alloc ptcl_alloc;\n Alloc pathseg_alloc;\n Alloc anno_alloc;\n Alloc trans_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n uint mem_offset;\n uint mem_error;\n uint memory[];\n} _276;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n Config conf;\n} _1066;\n\nshared uint sh_bitmaps[4][128];\nshared Alloc sh_part_elements[128];\nshared uint sh_part_count[128];\nshared uint sh_elements[128];\nshared uint sh_tile_stride[128];\nshared uint sh_tile_width[128];\nshared uint sh_tile_x0[128];\nshared uint sh_tile_y0[128];\nshared uint sh_tile_base[128];\nshared uint sh_tile_count[128];\n\nAlloc new_alloc(uint offset, uint size)\n{\n Alloc a;\n a.offset = offset;\n return a;\n}\n\nAlloc slice_mem(Alloc a, uint offset, uint size)\n{\n uint param = a.offset + offset;\n uint param_1 = size;\n return new_alloc(param, param_1);\n}\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n Alloc param = alloc;\n uint param_1 = offset;\n if (!touch_mem(param, param_1))\n {\n return 0u;\n }\n uint v = _276.memory[offset];\n return v;\n}\n\nBinInstanceRef BinInstance_index(BinInstanceRef ref, uint index)\n{\n return BinInstanceRef(ref.offset + (index * 4u));\n}\n\nBinInstance BinInstance_read(Alloc a, BinInstanceRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n BinInstance s;\n s.element_ix = raw0;\n return s;\n}\n\nAnnotatedTag Annotated_tag(Alloc a, AnnotatedRef ref)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint tag_and_flags = read_mem(param, param_1);\n return AnnotatedTag(tag_and_flags & 65535u, tag_and_flags >> uint(16));\n}\n\nPath Path_read(Alloc a, PathRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n Alloc param_4 = a;\n uint param_5 = ix + 2u;\n uint raw2 = read_mem(param_4, param_5);\n Path s;\n s.bbox = uvec4(raw0 & 65535u, raw0 >> uint(16), raw1 & 65535u, raw1 >> uint(16));\n s.tiles = TileRef(raw2);\n return s;\n}\n\nvoid write_tile_alloc(uint el_ix, Alloc a)\n{\n}\n\nAlloc read_tile_alloc(uint el_ix)\n{\n uint param = 0u;\n uint param_1 = uint(int(uint(_276.memory.length())) * 4);\n return new_alloc(param, param_1);\n}\n\nTile Tile_read(Alloc a, TileRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n Tile s;\n s.tile = TileSegRef(raw0);\n s.backdrop = int(raw1);\n return s;\n}\n\nAnnoColor AnnoColor_read(Alloc a, AnnoColorRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n Alloc param_4 = a;\n uint param_5 = ix + 2u;\n uint raw2 = read_mem(param_4, param_5);\n Alloc param_6 = a;\n uint param_7 = ix + 3u;\n uint raw3 = read_mem(param_6, param_7);\n Alloc param_8 = a;\n uint param_9 = ix + 4u;\n uint raw4 = read_mem(param_8, param_9);\n Alloc param_10 = a;\n uint param_11 = ix + 5u;\n uint raw5 = read_mem(param_10, param_11);\n AnnoColor s;\n s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n s.linewidth = uintBitsToFloat(raw4);\n s.rgba_color = raw5;\n return s;\n}\n\nAnnoColor Annotated_Color_read(Alloc a, AnnotatedRef ref)\n{\n Alloc param = a;\n AnnoColorRef param_1 = AnnoColorRef(ref.offset + 4u);\n return AnnoColor_read(param, param_1);\n}\n\nMallocResult malloc(uint size)\n{\n MallocResult r;\n r.failed = false;\n uint _282 = atomicAdd(_276.mem_offset, size);\n uint offset = _282;\n uint param = offset;\n uint param_1 = size;\n r.alloc = new_alloc(param, param_1);\n if ((offset + size) > uint(int(uint(_276.memory.length())) * 4))\n {\n r.failed = true;\n uint _303 = atomicMax(_276.mem_error, 1u);\n return r;\n }\n return r;\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n Alloc param = alloc;\n uint param_1 = offset;\n if (!touch_mem(param, param_1))\n {\n return;\n }\n _276.memory[offset] = val;\n}\n\nvoid CmdJump_write(Alloc a, CmdJumpRef ref, CmdJump s)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint param_2 = s.new_ref;\n write_mem(param, param_1, param_2);\n}\n\nvoid Cmd_Jump_write(Alloc a, CmdRef ref, CmdJump s)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 9u;\n write_mem(param, param_1, param_2);\n Alloc param_3 = a;\n CmdJumpRef param_4 = CmdJumpRef(ref.offset + 4u);\n CmdJump param_5 = s;\n CmdJump_write(param_3, param_4, param_5);\n}\n\nbool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit)\n{\n if (cmd_ref.offset < cmd_limit)\n {\n return true;\n }\n uint param = 1024u;\n MallocResult _968 = malloc(param);\n MallocResult new_cmd = _968;\n if (new_cmd.failed)\n {\n return false;\n }\n CmdJump jump = CmdJump(new_cmd.alloc.offset);\n Alloc param_1 = cmd_alloc;\n CmdRef param_2 = cmd_ref;\n CmdJump param_3 = jump;\n Cmd_Jump_write(param_1, param_2, param_3);\n cmd_alloc = new_cmd.alloc;\n cmd_ref = CmdRef(cmd_alloc.offset);\n cmd_limit = (cmd_alloc.offset + 1024u) - 36u;\n return true;\n}\n\nuint fill_mode_from_flags(uint flags)\n{\n return flags & 1u;\n}\n\nvoid CmdFill_write(Alloc a, CmdFillRef ref, CmdFill s)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint param_2 = s.tile_ref;\n write_mem(param, param_1, param_2);\n Alloc param_3 = a;\n uint param_4 = ix + 1u;\n uint param_5 = uint(s.backdrop);\n write_mem(param_3, param_4, param_5);\n}\n\nvoid Cmd_Fill_write(Alloc a, CmdRef ref, CmdFill s)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 1u;\n write_mem(param, param_1, param_2);\n Alloc param_3 = a;\n CmdFillRef param_4 = CmdFillRef(ref.offset + 4u);\n CmdFill param_5 = s;\n CmdFill_write(param_3, param_4, param_5);\n}\n\nvoid Cmd_Solid_write(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 3u;\n write_mem(param, param_1, param_2);\n}\n\nvoid CmdStroke_write(Alloc a, CmdStrokeRef ref, CmdStroke s)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint param_2 = s.tile_ref;\n write_mem(param, param_1, param_2);\n Alloc param_3 = a;\n uint param_4 = ix + 1u;\n uint param_5 = floatBitsToUint(s.half_width);\n write_mem(param_3, param_4, param_5);\n}\n\nvoid Cmd_Stroke_write(Alloc a, CmdRef ref, CmdStroke s)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 2u;\n write_mem(param, param_1, param_2);\n Alloc param_3 = a;\n CmdStrokeRef param_4 = CmdStrokeRef(ref.offset + 4u);\n CmdStroke param_5 = s;\n CmdStroke_write(param_3, param_4, param_5);\n}\n\nvoid write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float linewidth)\n{\n uint param = flags;\n if (fill_mode_from_flags(param) == 0u)\n {\n if (tile.tile.offset != 0u)\n {\n CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop);\n Alloc param_1 = alloc;\n CmdRef param_2 = cmd_ref;\n CmdFill param_3 = cmd_fill;\n Cmd_Fill_write(param_1, param_2, param_3);\n cmd_ref.offset += 12u;\n }\n else\n {\n Alloc param_4 = alloc;\n CmdRef param_5 = cmd_ref;\n Cmd_Solid_write(param_4, param_5);\n cmd_ref.offset += 4u;\n }\n }\n else\n {\n CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * linewidth);\n Alloc param_6 = alloc;\n CmdRef param_7 = cmd_ref;\n CmdStroke param_8 = cmd_stroke;\n Cmd_Stroke_write(param_6, param_7, param_8);\n cmd_ref.offset += 12u;\n }\n}\n\nvoid CmdColor_write(Alloc a, CmdColorRef ref, CmdColor s)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint param_2 = s.rgba_color;\n write_mem(param, param_1, param_2);\n}\n\nvoid Cmd_Color_write(Alloc a, CmdRef ref, CmdColor s)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 5u;\n write_mem(param, param_1, param_2);\n Alloc param_3 = a;\n CmdColorRef param_4 = CmdColorRef(ref.offset + 4u);\n CmdColor param_5 = s;\n CmdColor_write(param_3, param_4, param_5);\n}\n\nAnnoImage AnnoImage_read(Alloc a, AnnoImageRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n Alloc param_4 = a;\n uint param_5 = ix + 2u;\n uint raw2 = read_mem(param_4, param_5);\n Alloc param_6 = a;\n uint param_7 = ix + 3u;\n uint raw3 = read_mem(param_6, param_7);\n Alloc param_8 = a;\n uint param_9 = ix + 4u;\n uint raw4 = read_mem(param_8, param_9);\n Alloc param_10 = a;\n uint param_11 = ix + 5u;\n uint raw5 = read_mem(param_10, param_11);\n Alloc param_12 = a;\n uint param_13 = ix + 6u;\n uint raw6 = read_mem(param_12, param_13);\n AnnoImage s;\n s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n s.linewidth = uintBitsToFloat(raw4);\n s.index = raw5;\n s.offset = ivec2(int(raw6 << uint(16)) >> 16, int(raw6) >> 16);\n return s;\n}\n\nAnnoImage Annotated_Image_read(Alloc a, AnnotatedRef ref)\n{\n Alloc param = a;\n AnnoImageRef param_1 = AnnoImageRef(ref.offset + 4u);\n return AnnoImage_read(param, param_1);\n}\n\nvoid CmdImage_write(Alloc a, CmdImageRef ref, CmdImage s)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint param_2 = s.index;\n write_mem(param, param_1, param_2);\n Alloc param_3 = a;\n uint param_4 = ix + 1u;\n uint param_5 = (uint(s.offset.x) & 65535u) | (uint(s.offset.y) << uint(16));\n write_mem(param_3, param_4, param_5);\n}\n\nvoid Cmd_Image_write(Alloc a, CmdRef ref, CmdImage s)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 6u;\n write_mem(param, param_1, param_2);\n Alloc param_3 = a;\n CmdImageRef param_4 = CmdImageRef(ref.offset + 4u);\n CmdImage param_5 = s;\n CmdImage_write(param_3, param_4, param_5);\n}\n\nAnnoBeginClip AnnoBeginClip_read(Alloc a, AnnoBeginClipRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n Alloc param_4 = a;\n uint param_5 = ix + 2u;\n uint raw2 = read_mem(param_4, param_5);\n Alloc param_6 = a;\n uint param_7 = ix + 3u;\n uint raw3 = read_mem(param_6, param_7);\n Alloc param_8 = a;\n uint param_9 = ix + 4u;\n uint raw4 = read_mem(param_8, param_9);\n AnnoBeginClip s;\n s.bbox = vec4(uintBitsToFloat(raw0), uintBitsToFloat(raw1), uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n s.linewidth = uintBitsToFloat(raw4);\n return s;\n}\n\nAnnoBeginClip Annotated_BeginClip_read(Alloc a, AnnotatedRef ref)\n{\n Alloc param = a;\n AnnoBeginClipRef param_1 = AnnoBeginClipRef(ref.offset + 4u);\n return AnnoBeginClip_read(param, param_1);\n}\n\nvoid Cmd_BeginClip_write(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 7u;\n write_mem(param, param_1, param_2);\n}\n\nvoid Cmd_EndClip_write(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 8u;\n write_mem(param, param_1, param_2);\n}\n\nvoid Cmd_End_write(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint param_2 = 0u;\n write_mem(param, param_1, param_2);\n}\n\nvoid alloc_write(Alloc a, uint offset, Alloc alloc)\n{\n Alloc param = a;\n uint param_1 = offset >> uint(2);\n uint param_2 = alloc.offset;\n write_mem(param, param_1, param_2);\n}\n\nvoid main()\n{\n if (_276.mem_error != 0u)\n {\n return;\n }\n uint width_in_bins = ((_1066.conf.width_in_tiles + 16u) - 1u) / 16u;\n uint bin_ix = (width_in_bins * gl_WorkGroupID.y) + gl_WorkGroupID.x;\n uint partition_ix = 0u;\n uint n_partitions = ((_1066.conf.n_elements + 128u) - 1u) / 128u;\n uint th_ix = gl_LocalInvocationID.x;\n uint bin_tile_x = 16u * gl_WorkGroupID.x;\n uint bin_tile_y = 8u * gl_WorkGroupID.y;\n uint tile_x = gl_LocalInvocationID.x % 16u;\n uint tile_y = gl_LocalInvocationID.x / 16u;\n uint this_tile_ix = (((bin_tile_y + tile_y) * _1066.conf.width_in_tiles) + bin_tile_x) + tile_x;\n Alloc param;\n param.offset = _1066.conf.ptcl_alloc.offset;\n uint param_1 = this_tile_ix * 1024u;\n uint param_2 = 1024u;\n Alloc cmd_alloc = slice_mem(param, param_1, param_2);\n CmdRef cmd_ref = CmdRef(cmd_alloc.offset);\n uint cmd_limit = (cmd_ref.offset + 1024u) - 36u;\n uint clip_depth = 0u;\n uint clip_zero_depth = 0u;\n uint clip_one_mask = 0u;\n uint rd_ix = 0u;\n uint wr_ix = 0u;\n uint part_start_ix = 0u;\n uint ready_ix = 0u;\n Alloc param_3 = cmd_alloc;\n uint param_4 = 0u;\n uint param_5 = 8u;\n Alloc scratch_alloc = slice_mem(param_3, param_4, param_5);\n cmd_ref.offset += 8u;\n uint num_begin_slots = 0u;\n uint begin_slot = 0u;\n Alloc param_6;\n Alloc param_8;\n uint _1354;\n uint element_ix;\n AnnotatedRef ref;\n Alloc param_16;\n Alloc param_18;\n uint tile_count;\n Alloc param_24;\n uint _1667;\n bool include_tile;\n Alloc param_29;\n Tile tile_1;\n Alloc param_34;\n Alloc param_50;\n Alloc param_66;\n while (true)\n {\n for (uint i = 0u; i < 4u; i++)\n {\n sh_bitmaps[i][th_ix] = 0u;\n }\n bool _1406;\n for (;;)\n {\n if ((ready_ix == wr_ix) && (partition_ix < n_partitions))\n {\n part_start_ix = ready_ix;\n uint count = 0u;\n bool _1204 = th_ix < 128u;\n bool _1212;\n if (_1204)\n {\n _1212 = (partition_ix + th_ix) < n_partitions;\n }\n else\n {\n _1212 = _1204;\n }\n if (_1212)\n {\n uint in_ix = (_1066.conf.bin_alloc.offset >> uint(2)) + ((((partition_ix + th_ix) * 128u) + bin_ix) * 2u);\n param_6.offset = _1066.conf.bin_alloc.offset;\n uint param_7 = in_ix;\n count = read_mem(param_6, param_7);\n param_8.offset = _1066.conf.bin_alloc.offset;\n uint param_9 = in_ix + 1u;\n uint offset = read_mem(param_8, param_9);\n uint param_10 = offset;\n uint param_11 = count * 4u;\n sh_part_elements[th_ix] = new_alloc(param_10, param_11);\n }\n for (uint i_1 = 0u; i_1 < 7u; i_1++)\n {\n if (th_ix < 128u)\n {\n sh_part_count[th_ix] = count;\n }\n barrier();\n if (th_ix < 128u)\n {\n if (th_ix >= uint(1 << int(i_1)))\n {\n count += sh_part_count[th_ix - uint(1 << int(i_1))];\n }\n }\n barrier();\n }\n if (th_ix < 128u)\n {\n sh_part_count[th_ix] = part_start_ix + count;\n }\n barrier();\n ready_ix = sh_part_count[127];\n partition_ix += 128u;\n }\n uint ix = rd_ix + th_ix;\n if ((ix >= wr_ix) && (ix < ready_ix))\n {\n uint part_ix = 0u;\n for (uint i_2 = 0u; i_2 < 7u; i_2++)\n {\n uint probe = part_ix + uint(64 >> int(i_2));\n if (ix >= sh_part_count[probe - 1u])\n {\n part_ix = probe;\n }\n }\n if (part_ix > 0u)\n {\n _1354 = sh_part_count[part_ix - 1u];\n }\n else\n {\n _1354 = part_start_ix;\n }\n ix -= _1354;\n Alloc bin_alloc = sh_part_elements[part_ix];\n BinInstanceRef inst_ref = BinInstanceRef(bin_alloc.offset);\n BinInstanceRef param_12 = inst_ref;\n uint param_13 = ix;\n Alloc param_14 = bin_alloc;\n BinInstanceRef param_15 = BinInstance_index(param_12, param_13);\n BinInstance inst = BinInstance_read(param_14, param_15);\n sh_elements[th_ix] = inst.element_ix;\n }\n barrier();\n wr_ix = min((rd_ix + 128u), ready_ix);\n bool _1396 = (wr_ix - rd_ix) < 128u;\n if (_1396)\n {\n _1406 = (wr_ix < ready_ix) || (partition_ix < n_partitions);\n }\n else\n {\n _1406 = _1396;\n }\n if (_1406)\n {\n continue;\n }\n else\n {\n break;\n }\n }\n uint tag = 0u;\n if ((th_ix + rd_ix) < wr_ix)\n {\n element_ix = sh_elements[th_ix];\n ref = AnnotatedRef(_1066.conf.anno_alloc.offset + (element_ix * 32u));\n param_16.offset = _1066.conf.anno_alloc.offset;\n AnnotatedRef param_17 = ref;\n tag = Annotated_tag(param_16, param_17).tag;\n }\n switch (tag)\n {\n case 1u:\n case 2u:\n case 3u:\n case 4u:\n {\n uint path_ix = element_ix;\n param_18.offset = _1066.conf.tile_alloc.offset;\n PathRef param_19 = PathRef(_1066.conf.tile_alloc.offset + (path_ix * 12u));\n Path path = Path_read(param_18, param_19);\n uint stride = path.bbox.z - path.bbox.x;\n sh_tile_stride[th_ix] = stride;\n int dx = int(path.bbox.x) - int(bin_tile_x);\n int dy = int(path.bbox.y) - int(bin_tile_y);\n int x0 = clamp(dx, 0, 16);\n int y0 = clamp(dy, 0, 8);\n int x1 = clamp(int(path.bbox.z) - int(bin_tile_x), 0, 16);\n int y1 = clamp(int(path.bbox.w) - int(bin_tile_y), 0, 8);\n sh_tile_width[th_ix] = uint(x1 - x0);\n sh_tile_x0[th_ix] = uint(x0);\n sh_tile_y0[th_ix] = uint(y0);\n tile_count = uint(x1 - x0) * uint(y1 - y0);\n uint base = path.tiles.offset - (((uint(dy) * stride) + uint(dx)) * 8u);\n sh_tile_base[th_ix] = base;\n uint param_20 = path.tiles.offset;\n uint param_21 = ((path.bbox.z - path.bbox.x) * (path.bbox.w - path.bbox.y)) * 8u;\n Alloc path_alloc = new_alloc(param_20, param_21);\n uint param_22 = th_ix;\n Alloc param_23 = path_alloc;\n write_tile_alloc(param_22, param_23);\n break;\n }\n default:\n {\n tile_count = 0u;\n break;\n }\n }\n sh_tile_count[th_ix] = tile_count;\n for (uint i_3 = 0u; i_3 < 7u; i_3++)\n {\n barrier();\n if (th_ix >= uint(1 << int(i_3)))\n {\n tile_count += sh_tile_count[th_ix - uint(1 << int(i_3))];\n }\n barrier();\n sh_tile_count[th_ix] = tile_count;\n }\n barrier();\n uint total_tile_count = sh_tile_count[127];\n for (uint ix_1 = th_ix; ix_1 < total_tile_count; ix_1 += 128u)\n {\n uint el_ix = 0u;\n for (uint i_4 = 0u; i_4 < 7u; i_4++)\n {\n uint probe_1 = el_ix + uint(64 >> int(i_4));\n if (ix_1 >= sh_tile_count[probe_1 - 1u])\n {\n el_ix = probe_1;\n }\n }\n AnnotatedRef ref_1 = AnnotatedRef(_1066.conf.anno_alloc.offset + (sh_elements[el_ix] * 32u));\n param_24.offset = _1066.conf.anno_alloc.offset;\n AnnotatedRef param_25 = ref_1;\n uint tag_1 = Annotated_tag(param_24, param_25).tag;\n if (el_ix > 0u)\n {\n _1667 = sh_tile_count[el_ix - 1u];\n }\n else\n {\n _1667 = 0u;\n }\n uint seq_ix = ix_1 - _1667;\n uint width = sh_tile_width[el_ix];\n uint x = sh_tile_x0[el_ix] + (seq_ix % width);\n uint y = sh_tile_y0[el_ix] + (seq_ix / width);\n if ((tag_1 == 3u) || (tag_1 == 4u))\n {\n include_tile = true;\n }\n else\n {\n uint param_26 = el_ix;\n Alloc param_27 = read_tile_alloc(param_26);\n TileRef param_28 = TileRef(sh_tile_base[el_ix] + (((sh_tile_stride[el_ix] * y) + x) * 8u));\n Tile tile = Tile_read(param_27, param_28);\n bool _1728 = tile.tile.offset != 0u;\n bool _1735;\n if (!_1728)\n {\n _1735 = tile.backdrop != 0;\n }\n else\n {\n _1735 = _1728;\n }\n include_tile = _1735;\n }\n if (include_tile)\n {\n uint el_slice = el_ix / 32u;\n uint el_mask = uint(1 << int(el_ix & 31u));\n uint _1755 = atomicOr(sh_bitmaps[el_slice][(y * 16u) + x], el_mask);\n }\n }\n barrier();\n uint slice_ix = 0u;\n uint bitmap = sh_bitmaps[0][th_ix];\n while (true)\n {\n if (bitmap == 0u)\n {\n slice_ix++;\n if (slice_ix == 4u)\n {\n break;\n }\n bitmap = sh_bitmaps[slice_ix][th_ix];\n if (bitmap == 0u)\n {\n continue;\n }\n }\n uint element_ref_ix = (slice_ix * 32u) + uint(findLSB(bitmap));\n uint element_ix_1 = sh_elements[element_ref_ix];\n bitmap &= (bitmap - 1u);\n ref = AnnotatedRef(_1066.conf.anno_alloc.offset + (element_ix_1 * 32u));\n param_29.offset = _1066.conf.anno_alloc.offset;\n AnnotatedRef param_30 = ref;\n AnnotatedTag tag_2 = Annotated_tag(param_29, param_30);\n if (clip_zero_depth == 0u)\n {\n switch (tag_2.tag)\n {\n case 1u:\n {\n uint param_31 = element_ref_ix;\n Alloc param_32 = read_tile_alloc(param_31);\n TileRef param_33 = TileRef(sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u));\n tile_1 = Tile_read(param_32, param_33);\n param_34.offset = _1066.conf.anno_alloc.offset;\n AnnotatedRef param_35 = ref;\n AnnoColor fill = Annotated_Color_read(param_34, param_35);\n Alloc param_36 = cmd_alloc;\n CmdRef param_37 = cmd_ref;\n uint param_38 = cmd_limit;\n bool _1865 = alloc_cmd(param_36, param_37, param_38);\n cmd_alloc = param_36;\n cmd_ref = param_37;\n cmd_limit = param_38;\n if (!_1865)\n {\n break;\n }\n Alloc param_39 = cmd_alloc;\n CmdRef param_40 = cmd_ref;\n uint param_41 = tag_2.flags;\n Tile param_42 = tile_1;\n float param_43 = fill.linewidth;\n write_fill(param_39, param_40, param_41, param_42, param_43);\n cmd_ref = param_40;\n Alloc param_44 = cmd_alloc;\n CmdRef param_45 = cmd_ref;\n CmdColor param_46 = CmdColor(fill.rgba_color);\n Cmd_Color_write(param_44, param_45, param_46);\n cmd_ref.offset += 8u;\n break;\n }\n case 2u:\n {\n uint param_47 = element_ref_ix;\n Alloc param_48 = read_tile_alloc(param_47);\n TileRef param_49 = TileRef(sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u));\n tile_1 = Tile_read(param_48, param_49);\n param_50.offset = _1066.conf.anno_alloc.offset;\n AnnotatedRef param_51 = ref;\n AnnoImage fill_img = Annotated_Image_read(param_50, param_51);\n Alloc param_52 = cmd_alloc;\n CmdRef param_53 = cmd_ref;\n uint param_54 = cmd_limit;\n bool _1935 = alloc_cmd(param_52, param_53, param_54);\n cmd_alloc = param_52;\n cmd_ref = param_53;\n cmd_limit = param_54;\n if (!_1935)\n {\n break;\n }\n Alloc param_55 = cmd_alloc;\n CmdRef param_56 = cmd_ref;\n uint param_57 = tag_2.flags;\n Tile param_58 = tile_1;\n float param_59 = fill_img.linewidth;\n write_fill(param_55, param_56, param_57, param_58, param_59);\n cmd_ref = param_56;\n Alloc param_60 = cmd_alloc;\n CmdRef param_61 = cmd_ref;\n CmdImage param_62 = CmdImage(fill_img.index, fill_img.offset);\n Cmd_Image_write(param_60, param_61, param_62);\n cmd_ref.offset += 12u;\n break;\n }\n case 3u:\n {\n uint param_63 = element_ref_ix;\n Alloc param_64 = read_tile_alloc(param_63);\n TileRef param_65 = TileRef(sh_tile_base[element_ref_ix] + (((sh_tile_stride[element_ref_ix] * tile_y) + tile_x) * 8u));\n tile_1 = Tile_read(param_64, param_65);\n bool _1994 = tile_1.tile.offset == 0u;\n bool _2000;\n if (_1994)\n {\n _2000 = tile_1.backdrop == 0;\n }\n else\n {\n _2000 = _1994;\n }\n if (_2000)\n {\n clip_zero_depth = clip_depth + 1u;\n }\n else\n {\n if ((tile_1.tile.offset == 0u) && (clip_depth < 32u))\n {\n clip_one_mask |= uint(1 << int(clip_depth));\n }\n else\n {\n param_66.offset = _1066.conf.anno_alloc.offset;\n AnnotatedRef param_67 = ref;\n AnnoBeginClip begin_clip = Annotated_BeginClip_read(param_66, param_67);\n Alloc param_68 = cmd_alloc;\n CmdRef param_69 = cmd_ref;\n uint param_70 = cmd_limit;\n bool _2035 = alloc_cmd(param_68, param_69, param_70);\n cmd_alloc = param_68;\n cmd_ref = param_69;\n cmd_limit = param_70;\n if (!_2035)\n {\n break;\n }\n Alloc param_71 = cmd_alloc;\n CmdRef param_72 = cmd_ref;\n uint param_73 = tag_2.flags;\n Tile param_74 = tile_1;\n float param_75 = begin_clip.linewidth;\n write_fill(param_71, param_72, param_73, param_74, param_75);\n cmd_ref = param_72;\n Alloc param_76 = cmd_alloc;\n CmdRef param_77 = cmd_ref;\n Cmd_BeginClip_write(param_76, param_77);\n cmd_ref.offset += 4u;\n if (clip_depth < 32u)\n {\n clip_one_mask &= uint(~(1 << int(clip_depth)));\n }\n begin_slot++;\n num_begin_slots = max(num_begin_slots, begin_slot);\n }\n }\n clip_depth++;\n break;\n }\n case 4u:\n {\n clip_depth--;\n bool _2087 = clip_depth >= 32u;\n bool _2097;\n if (!_2087)\n {\n _2097 = (clip_one_mask & uint(1 << int(clip_depth))) == 0u;\n }\n else\n {\n _2097 = _2087;\n }\n if (_2097)\n {\n Alloc param_78 = cmd_alloc;\n CmdRef param_79 = cmd_ref;\n uint param_80 = cmd_limit;\n bool _2106 = alloc_cmd(param_78, param_79, param_80);\n cmd_alloc = param_78;\n cmd_ref = param_79;\n cmd_limit = param_80;\n if (!_2106)\n {\n break;\n }\n Alloc param_81 = cmd_alloc;\n CmdRef param_82 = cmd_ref;\n Cmd_Solid_write(param_81, param_82);\n cmd_ref.offset += 4u;\n begin_slot--;\n Alloc param_83 = cmd_alloc;\n CmdRef param_84 = cmd_ref;\n Cmd_EndClip_write(param_83, param_84);\n cmd_ref.offset += 4u;\n }\n break;\n }\n }\n }\n else\n {\n switch (tag_2.tag)\n {\n case 3u:\n {\n clip_depth++;\n break;\n }\n case 4u:\n {\n if (clip_depth == clip_zero_depth)\n {\n clip_zero_depth = 0u;\n }\n clip_depth--;\n break;\n }\n }\n }\n }\n barrier();\n rd_ix += 128u;\n if ((rd_ix >= ready_ix) && (partition_ix >= n_partitions))\n {\n break;\n }\n }\n bool _2171 = (bin_tile_x + tile_x) < _1066.conf.width_in_tiles;\n bool _2180;\n if (_2171)\n {\n _2180 = (bin_tile_y + tile_y) < _1066.conf.height_in_tiles;\n }\n else\n {\n _2180 = _2171;\n }\n if (_2180)\n {\n Alloc param_85 = cmd_alloc;\n CmdRef param_86 = cmd_ref;\n Cmd_End_write(param_85, param_86);\n if (num_begin_slots > 0u)\n {\n uint scratch_size = (((num_begin_slots * 32u) * 32u) * 2u) * 4u;\n uint param_87 = scratch_size;\n MallocResult _2201 = malloc(param_87);\n MallocResult scratch = _2201;\n Alloc param_88 = scratch_alloc;\n uint param_89 = scratch_alloc.offset;\n Alloc param_90 = scratch.alloc;\n alloc_write(param_88, param_89, param_90);\n }\n }\n}\n\n", } shader_copy_frag = driver.ShaderSources{ Name: "copy.frag", @@ -166,7 +166,7 @@ var ( } shader_kernel4_comp = driver.ShaderSources{ Name: "kernel4.comp", - GLSL310ES: "#version 310 es\nlayout(local_size_x = 32, local_size_y = 4, local_size_z = 1) in;\n\nstruct Alloc\n{\n uint offset;\n};\n\nstruct MallocResult\n{\n Alloc alloc;\n bool failed;\n};\n\nstruct CmdStrokeRef\n{\n uint offset;\n};\n\nstruct CmdStroke\n{\n uint tile_ref;\n float half_width;\n};\n\nstruct CmdFillRef\n{\n uint offset;\n};\n\nstruct CmdFill\n{\n uint tile_ref;\n int backdrop;\n};\n\nstruct CmdColorRef\n{\n uint offset;\n};\n\nstruct CmdColor\n{\n uint rgba_color;\n};\n\nstruct CmdImageRef\n{\n uint offset;\n};\n\nstruct CmdImage\n{\n uint index;\n ivec2 offset;\n};\n\nstruct CmdAlphaRef\n{\n uint offset;\n};\n\nstruct CmdAlpha\n{\n float alpha;\n};\n\nstruct CmdJumpRef\n{\n uint offset;\n};\n\nstruct CmdJump\n{\n uint new_ref;\n};\n\nstruct CmdRef\n{\n uint offset;\n};\n\nstruct CmdTag\n{\n uint tag;\n uint flags;\n};\n\nstruct TileSegRef\n{\n uint offset;\n};\n\nstruct TileSeg\n{\n vec2 origin;\n vec2 vector;\n float y_edge;\n TileSegRef next;\n};\n\nstruct Config\n{\n uint n_elements;\n uint n_pathseg;\n uint width_in_tiles;\n uint height_in_tiles;\n Alloc tile_alloc;\n Alloc bin_alloc;\n Alloc ptcl_alloc;\n Alloc pathseg_alloc;\n Alloc anno_alloc;\n Alloc trans_alloc;\n};\n\nstruct ClipState\n{\n uint srgb;\n float area;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n uint mem_offset;\n uint mem_error;\n uint memory[];\n} _190;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n Config conf;\n} _749;\n\nlayout(binding = 3, rgba8) uniform readonly highp image2D images[1];\nlayout(binding = 2, rgba8) uniform writeonly highp image2D image;\n\nshared MallocResult sh_clip_alloc;\n\nAlloc new_alloc(uint offset, uint size)\n{\n Alloc a;\n a.offset = offset;\n return a;\n}\n\nAlloc slice_mem(Alloc a, uint offset, uint size)\n{\n uint param = a.offset + offset;\n uint param_1 = size;\n return new_alloc(param, param_1);\n}\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n Alloc param = alloc;\n uint param_1 = offset;\n if (!touch_mem(param, param_1))\n {\n return 0u;\n }\n uint v = _190.memory[offset];\n return v;\n}\n\nCmdTag Cmd_tag(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint tag_and_flags = read_mem(param, param_1);\n return CmdTag(tag_and_flags & 65535u, tag_and_flags >> uint(16));\n}\n\nCmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n CmdStroke s;\n s.tile_ref = raw0;\n s.half_width = uintBitsToFloat(raw1);\n return s;\n}\n\nCmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdStrokeRef param_1 = CmdStrokeRef(ref.offset + 4u);\n return CmdStroke_read(param, param_1);\n}\n\nTileSeg TileSeg_read(Alloc a, TileSegRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n Alloc param_4 = a;\n uint param_5 = ix + 2u;\n uint raw2 = read_mem(param_4, param_5);\n Alloc param_6 = a;\n uint param_7 = ix + 3u;\n uint raw3 = read_mem(param_6, param_7);\n Alloc param_8 = a;\n uint param_9 = ix + 4u;\n uint raw4 = read_mem(param_8, param_9);\n Alloc param_10 = a;\n uint param_11 = ix + 5u;\n uint raw5 = read_mem(param_10, param_11);\n TileSeg s;\n s.origin = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n s.vector = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n s.y_edge = uintBitsToFloat(raw4);\n s.next = TileSegRef(raw5);\n return s;\n}\n\nCmdFill CmdFill_read(Alloc a, CmdFillRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n CmdFill s;\n s.tile_ref = raw0;\n s.backdrop = int(raw1);\n return s;\n}\n\nCmdFill Cmd_Fill_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdFillRef param_1 = CmdFillRef(ref.offset + 4u);\n return CmdFill_read(param, param_1);\n}\n\nCmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n CmdAlpha s;\n s.alpha = uintBitsToFloat(raw0);\n return s;\n}\n\nCmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdAlphaRef param_1 = CmdAlphaRef(ref.offset + 4u);\n return CmdAlpha_read(param, param_1);\n}\n\nCmdColor CmdColor_read(Alloc a, CmdColorRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n CmdColor s;\n s.rgba_color = raw0;\n return s;\n}\n\nCmdColor Cmd_Color_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdColorRef param_1 = CmdColorRef(ref.offset + 4u);\n return CmdColor_read(param, param_1);\n}\n\nvec3 fromsRGB(vec3 srgb)\n{\n bvec3 cutoff = greaterThanEqual(srgb, vec3(0.040449999272823333740234375));\n vec3 below = srgb / vec3(12.9200000762939453125);\n vec3 above = pow((srgb + vec3(0.054999999701976776123046875)) / vec3(1.05499994754791259765625), vec3(2.400000095367431640625));\n return mix(below, above, cutoff);\n}\n\nvec4 unpacksRGB(uint srgba)\n{\n vec4 color = unpackUnorm4x8(srgba).wzyx;\n vec3 param = color.xyz;\n return vec4(fromsRGB(param), color.w);\n}\n\nCmdImage CmdImage_read(Alloc a, CmdImageRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n CmdImage s;\n s.index = raw0;\n s.offset = ivec2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);\n return s;\n}\n\nCmdImage Cmd_Image_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdImageRef param_1 = CmdImageRef(ref.offset + 4u);\n return CmdImage_read(param, param_1);\n}\n\nvec4[8] fillImage(uvec2 xy, CmdImage cmd_img)\n{\n vec4 rgba[8];\n for (uint i = 0u; i < 8u; i++)\n {\n ivec2 uv = ivec2(int(xy.x), int(xy.y + (i * 4u))) + cmd_img.offset;\n vec4 fg_rgba = imageLoad(images[0], uv);\n vec3 param = fg_rgba.xyz;\n vec3 _722 = fromsRGB(param);\n fg_rgba = vec4(_722.x, _722.y, _722.z, fg_rgba.w);\n rgba[i] = fg_rgba;\n }\n return rgba;\n}\n\nMallocResult malloc(uint size)\n{\n MallocResult r;\n r.failed = false;\n uint _196 = atomicAdd(_190.mem_offset, size);\n uint offset = _196;\n uint param = offset;\n uint param_1 = size;\n r.alloc = new_alloc(param, param_1);\n if ((offset + size) > uint(int(uint(_190.memory.length())) * 4))\n {\n r.failed = true;\n uint _217 = atomicMax(_190.mem_error, 1u);\n return r;\n }\n return r;\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n Alloc param = alloc;\n uint param_1 = offset;\n if (!touch_mem(param, param_1))\n {\n return;\n }\n _190.memory[offset] = val;\n}\n\nMallocResult alloc_clip_buf(uint link)\n{\n bool _569 = gl_LocalInvocationID.x == 0u;\n bool _575;\n if (_569)\n {\n _575 = gl_LocalInvocationID.y == 0u;\n }\n else\n {\n _575 = _569;\n }\n if (_575)\n {\n uint param = 8196u;\n MallocResult _581 = malloc(param);\n MallocResult m = _581;\n if (!m.failed)\n {\n Alloc param_1 = m.alloc;\n uint param_2 = (m.alloc.offset >> uint(2)) + 2048u;\n uint param_3 = link;\n write_mem(param_1, param_2, param_3);\n }\n sh_clip_alloc = m;\n }\n barrier();\n return sh_clip_alloc;\n}\n\nvec3 tosRGB(vec3 rgb)\n{\n bvec3 cutoff = greaterThanEqual(rgb, vec3(0.003130800090730190277099609375));\n vec3 below = vec3(12.9200000762939453125) * rgb;\n vec3 above = (vec3(1.05499994754791259765625) * pow(rgb, vec3(0.416660010814666748046875))) - vec3(0.054999999701976776123046875);\n return mix(below, above, cutoff);\n}\n\nuint packsRGB(inout vec4 rgba)\n{\n vec3 param = rgba.xyz;\n rgba = vec4(tosRGB(param), rgba.w);\n return packUnorm4x8(rgba.wzyx);\n}\n\nCmdJump CmdJump_read(Alloc a, CmdJumpRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n CmdJump s;\n s.new_ref = raw0;\n return s;\n}\n\nCmdJump Cmd_Jump_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdJumpRef param_1 = CmdJumpRef(ref.offset + 4u);\n return CmdJump_read(param, param_1);\n}\n\nvoid main()\n{\n if (_190.mem_error != 0u)\n {\n return;\n }\n uint tile_ix = (gl_WorkGroupID.y * _749.conf.width_in_tiles) + gl_WorkGroupID.x;\n Alloc param;\n param.offset = _749.conf.ptcl_alloc.offset;\n uint param_1 = tile_ix * 1024u;\n uint param_2 = 1024u;\n Alloc cmd_alloc = slice_mem(param, param_1, param_2);\n CmdRef cmd_ref = CmdRef(cmd_alloc.offset);\n uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + (32u * gl_WorkGroupID.y));\n vec2 xy = vec2(xy_uint);\n uint blend_spill = 0u;\n uint blend_sp = 0u;\n uint param_3 = 0u;\n uint param_4 = 0u;\n Alloc clip_tos = new_alloc(param_3, param_4);\n vec4 rgba[8];\n for (uint i = 0u; i < 8u; i++)\n {\n rgba[i] = vec4(0.0);\n }\n float df[8];\n TileSegRef tile_seg_ref;\n float area[8];\n uint blend_slot;\n ClipState blend_stack[4][8];\n while (true)\n {\n Alloc param_5 = cmd_alloc;\n CmdRef param_6 = cmd_ref;\n uint tag = Cmd_tag(param_5, param_6).tag;\n if (tag == 0u)\n {\n break;\n }\n switch (tag)\n {\n case 2u:\n {\n Alloc param_7 = cmd_alloc;\n CmdRef param_8 = cmd_ref;\n CmdStroke stroke = Cmd_Stroke_read(param_7, param_8);\n for (uint k = 0u; k < 8u; k++)\n {\n df[k] = 1000000000.0;\n }\n tile_seg_ref = TileSegRef(stroke.tile_ref);\n do\n {\n uint param_9 = tile_seg_ref.offset;\n uint param_10 = 24u;\n Alloc param_11 = new_alloc(param_9, param_10);\n TileSegRef param_12 = tile_seg_ref;\n TileSeg seg = TileSeg_read(param_11, param_12);\n vec2 line_vec = seg.vector;\n for (uint k_1 = 0u; k_1 < 8u; k_1++)\n {\n vec2 dpos = (xy + vec2(0.5)) - seg.origin;\n dpos.y += float(k_1 * 4u);\n float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);\n df[k_1] = min(df[k_1], length((line_vec * t) - dpos));\n }\n tile_seg_ref = seg.next;\n } while (tile_seg_ref.offset != 0u);\n for (uint k_2 = 0u; k_2 < 8u; k_2++)\n {\n area[k_2] = clamp((stroke.half_width + 0.5) - df[k_2], 0.0, 1.0);\n }\n cmd_ref.offset += 12u;\n break;\n }\n case 1u:\n {\n Alloc param_13 = cmd_alloc;\n CmdRef param_14 = cmd_ref;\n CmdFill fill = Cmd_Fill_read(param_13, param_14);\n for (uint k_3 = 0u; k_3 < 8u; k_3++)\n {\n area[k_3] = float(fill.backdrop);\n }\n tile_seg_ref = TileSegRef(fill.tile_ref);\n do\n {\n uint param_15 = tile_seg_ref.offset;\n uint param_16 = 24u;\n Alloc param_17 = new_alloc(param_15, param_16);\n TileSegRef param_18 = tile_seg_ref;\n TileSeg seg_1 = TileSeg_read(param_17, param_18);\n for (uint k_4 = 0u; k_4 < 8u; k_4++)\n {\n vec2 my_xy = vec2(xy.x, xy.y + float(k_4 * 4u));\n vec2 start = seg_1.origin - my_xy;\n vec2 end = start + seg_1.vector;\n vec2 window = clamp(vec2(start.y, end.y), vec2(0.0), vec2(1.0));\n if (!(window.x == window.y))\n {\n vec2 t_1 = (window - vec2(start.y)) / vec2(seg_1.vector.y);\n vec2 xs = vec2(mix(start.x, end.x, t_1.x), mix(start.x, end.x, t_1.y));\n float xmin = min(min(xs.x, xs.y), 1.0) - 9.9999999747524270787835121154785e-07;\n float xmax = max(xs.x, xs.y);\n float b = min(xmax, 1.0);\n float c = max(b, 0.0);\n float d = max(xmin, 0.0);\n float a = ((b + (0.5 * ((d * d) - (c * c)))) - xmin) / (xmax - xmin);\n area[k_4] += (a * (window.x - window.y));\n }\n area[k_4] += (sign(seg_1.vector.x) * clamp((my_xy.y - seg_1.y_edge) + 1.0, 0.0, 1.0));\n }\n tile_seg_ref = seg_1.next;\n } while (tile_seg_ref.offset != 0u);\n for (uint k_5 = 0u; k_5 < 8u; k_5++)\n {\n area[k_5] = min(abs(area[k_5]), 1.0);\n }\n cmd_ref.offset += 12u;\n break;\n }\n case 3u:\n {\n for (uint k_6 = 0u; k_6 < 8u; k_6++)\n {\n area[k_6] = 1.0;\n }\n cmd_ref.offset += 4u;\n break;\n }\n case 4u:\n {\n Alloc param_19 = cmd_alloc;\n CmdRef param_20 = cmd_ref;\n CmdAlpha alpha = Cmd_Alpha_read(param_19, param_20);\n for (uint k_7 = 0u; k_7 < 8u; k_7++)\n {\n area[k_7] = alpha.alpha;\n }\n cmd_ref.offset += 8u;\n break;\n }\n case 5u:\n {\n Alloc param_21 = cmd_alloc;\n CmdRef param_22 = cmd_ref;\n CmdColor color = Cmd_Color_read(param_21, param_22);\n uint param_23 = color.rgba_color;\n vec4 fg = unpacksRGB(param_23);\n for (uint k_8 = 0u; k_8 < 8u; k_8++)\n {\n vec4 fg_k = fg * area[k_8];\n rgba[k_8] = (rgba[k_8] * (1.0 - fg_k.w)) + fg_k;\n }\n cmd_ref.offset += 8u;\n break;\n }\n case 6u:\n {\n Alloc param_24 = cmd_alloc;\n CmdRef param_25 = cmd_ref;\n CmdImage fill_img = Cmd_Image_read(param_24, param_25);\n uvec2 param_26 = xy_uint;\n CmdImage param_27 = fill_img;\n vec4 img[8] = fillImage(param_26, param_27);\n for (uint k_9 = 0u; k_9 < 8u; k_9++)\n {\n vec4 fg_k_1 = img[k_9] * area[k_9];\n rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1;\n }\n cmd_ref.offset += 12u;\n break;\n }\n case 7u:\n {\n blend_slot = blend_sp % 4u;\n if (blend_sp == (blend_spill + 4u))\n {\n uint param_28 = clip_tos.offset;\n MallocResult _1311 = alloc_clip_buf(param_28);\n MallocResult m = _1311;\n if (m.failed)\n {\n return;\n }\n clip_tos = m.alloc;\n uint base_ix = (clip_tos.offset >> uint(2)) + (2u * (gl_LocalInvocationID.x + (32u * gl_LocalInvocationID.y)));\n for (uint k_10 = 0u; k_10 < 8u; k_10++)\n {\n uint srgb = blend_stack[blend_slot][k_10].srgb;\n uint area_1 = floatBitsToUint(blend_stack[blend_slot][k_10].area);\n Alloc param_29 = clip_tos;\n uint param_30 = (base_ix + 0u) + (((k_10 * 2u) * 32u) * 4u);\n uint param_31 = srgb;\n write_mem(param_29, param_30, param_31);\n Alloc param_32 = clip_tos;\n uint param_33 = (base_ix + 1u) + (((k_10 * 2u) * 32u) * 4u);\n uint param_34 = area_1;\n write_mem(param_32, param_33, param_34);\n }\n blend_spill++;\n }\n for (uint k_11 = 0u; k_11 < 8u; k_11++)\n {\n vec4 param_35 = rgba[k_11];\n uint _1399 = packsRGB(param_35);\n blend_stack[blend_slot][k_11] = ClipState(_1399, clamp(abs(area[k_11]), 0.0, 1.0));\n rgba[k_11] = vec4(0.0);\n }\n blend_sp++;\n cmd_ref.offset += 4u;\n break;\n }\n case 8u:\n {\n blend_slot = (blend_sp - 1u) % 4u;\n if (blend_sp == blend_spill)\n {\n uint base_ix_1 = (clip_tos.offset >> uint(2)) + (2u * (gl_LocalInvocationID.x + (32u * gl_LocalInvocationID.y)));\n for (uint k_12 = 0u; k_12 < 8u; k_12++)\n {\n Alloc param_36 = clip_tos;\n uint param_37 = (base_ix_1 + 0u) + (((k_12 * 2u) * 32u) * 4u);\n uint srgb_1 = read_mem(param_36, param_37);\n Alloc param_38 = clip_tos;\n uint param_39 = (base_ix_1 + 1u) + (((k_12 * 2u) * 32u) * 4u);\n uint area_2 = read_mem(param_38, param_39);\n ClipState state = ClipState(srgb_1, uintBitsToFloat(area_2));\n blend_stack[blend_slot][k_12] = state;\n }\n Alloc param_40 = clip_tos;\n uint param_41 = (clip_tos.offset >> uint(2)) + 2048u;\n clip_tos.offset = read_mem(param_40, param_41);\n blend_spill--;\n }\n blend_sp--;\n for (uint k_13 = 0u; k_13 < 8u; k_13++)\n {\n uint param_42 = blend_stack[blend_slot][k_13].srgb;\n vec4 bg = unpacksRGB(param_42);\n vec4 fg_1 = (rgba[k_13] * area[k_13]) * blend_stack[blend_slot][k_13].area;\n rgba[k_13] = (bg * (1.0 - fg_1.w)) + fg_1;\n }\n cmd_ref.offset += 4u;\n break;\n }\n case 9u:\n {\n Alloc param_43 = cmd_alloc;\n CmdRef param_44 = cmd_ref;\n cmd_ref = CmdRef(Cmd_Jump_read(param_43, param_44).new_ref);\n cmd_alloc.offset = cmd_ref.offset;\n continue;\n }\n }\n }\n for (uint i_1 = 0u; i_1 < 8u; i_1++)\n {\n vec3 param_45 = rgba[i_1].xyz;\n imageStore(image, ivec2(int(xy_uint.x), int(xy_uint.y + (4u * i_1))), vec4(tosRGB(param_45), rgba[i_1].w));\n }\n}\n\n", + GLSL310ES: "#version 310 es\nlayout(local_size_x = 16, local_size_y = 8, local_size_z = 1) in;\n\nstruct Alloc\n{\n uint offset;\n};\n\nstruct CmdStrokeRef\n{\n uint offset;\n};\n\nstruct CmdStroke\n{\n uint tile_ref;\n float half_width;\n};\n\nstruct CmdFillRef\n{\n uint offset;\n};\n\nstruct CmdFill\n{\n uint tile_ref;\n int backdrop;\n};\n\nstruct CmdColorRef\n{\n uint offset;\n};\n\nstruct CmdColor\n{\n uint rgba_color;\n};\n\nstruct CmdImageRef\n{\n uint offset;\n};\n\nstruct CmdImage\n{\n uint index;\n ivec2 offset;\n};\n\nstruct CmdAlphaRef\n{\n uint offset;\n};\n\nstruct CmdAlpha\n{\n float alpha;\n};\n\nstruct CmdJumpRef\n{\n uint offset;\n};\n\nstruct CmdJump\n{\n uint new_ref;\n};\n\nstruct CmdRef\n{\n uint offset;\n};\n\nstruct CmdTag\n{\n uint tag;\n uint flags;\n};\n\nstruct TileSegRef\n{\n uint offset;\n};\n\nstruct TileSeg\n{\n vec2 origin;\n vec2 vector;\n float y_edge;\n TileSegRef next;\n};\n\nstruct Config\n{\n uint n_elements;\n uint n_pathseg;\n uint width_in_tiles;\n uint height_in_tiles;\n Alloc tile_alloc;\n Alloc bin_alloc;\n Alloc ptcl_alloc;\n Alloc pathseg_alloc;\n Alloc anno_alloc;\n Alloc trans_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n uint mem_offset;\n uint mem_error;\n uint memory[];\n} _196;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n Config conf;\n} _693;\n\nlayout(binding = 3, rgba8) uniform readonly highp image2D images[1];\nlayout(binding = 2, rgba8) uniform writeonly highp image2D image;\n\nAlloc new_alloc(uint offset, uint size)\n{\n Alloc a;\n a.offset = offset;\n return a;\n}\n\nAlloc slice_mem(Alloc a, uint offset, uint size)\n{\n uint param = a.offset + offset;\n uint param_1 = size;\n return new_alloc(param, param_1);\n}\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n Alloc param = alloc;\n uint param_1 = offset;\n if (!touch_mem(param, param_1))\n {\n return 0u;\n }\n uint v = _196.memory[offset];\n return v;\n}\n\nAlloc alloc_read(Alloc a, uint offset)\n{\n Alloc param = a;\n uint param_1 = offset >> uint(2);\n Alloc alloc;\n alloc.offset = read_mem(param, param_1);\n return alloc;\n}\n\nCmdTag Cmd_tag(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint tag_and_flags = read_mem(param, param_1);\n return CmdTag(tag_and_flags & 65535u, tag_and_flags >> uint(16));\n}\n\nCmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n CmdStroke s;\n s.tile_ref = raw0;\n s.half_width = uintBitsToFloat(raw1);\n return s;\n}\n\nCmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdStrokeRef param_1 = CmdStrokeRef(ref.offset + 4u);\n return CmdStroke_read(param, param_1);\n}\n\nTileSeg TileSeg_read(Alloc a, TileSegRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n Alloc param_4 = a;\n uint param_5 = ix + 2u;\n uint raw2 = read_mem(param_4, param_5);\n Alloc param_6 = a;\n uint param_7 = ix + 3u;\n uint raw3 = read_mem(param_6, param_7);\n Alloc param_8 = a;\n uint param_9 = ix + 4u;\n uint raw4 = read_mem(param_8, param_9);\n Alloc param_10 = a;\n uint param_11 = ix + 5u;\n uint raw5 = read_mem(param_10, param_11);\n TileSeg s;\n s.origin = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n s.vector = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n s.y_edge = uintBitsToFloat(raw4);\n s.next = TileSegRef(raw5);\n return s;\n}\n\nuvec2 chunk_offset(uint i)\n{\n return uvec2((i % 2u) * 16u, (i / 2u) * 8u);\n}\n\nCmdFill CmdFill_read(Alloc a, CmdFillRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n CmdFill s;\n s.tile_ref = raw0;\n s.backdrop = int(raw1);\n return s;\n}\n\nCmdFill Cmd_Fill_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdFillRef param_1 = CmdFillRef(ref.offset + 4u);\n return CmdFill_read(param, param_1);\n}\n\nCmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n CmdAlpha s;\n s.alpha = uintBitsToFloat(raw0);\n return s;\n}\n\nCmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdAlphaRef param_1 = CmdAlphaRef(ref.offset + 4u);\n return CmdAlpha_read(param, param_1);\n}\n\nCmdColor CmdColor_read(Alloc a, CmdColorRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n CmdColor s;\n s.rgba_color = raw0;\n return s;\n}\n\nCmdColor Cmd_Color_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdColorRef param_1 = CmdColorRef(ref.offset + 4u);\n return CmdColor_read(param, param_1);\n}\n\nvec3 fromsRGB(vec3 srgb)\n{\n bvec3 cutoff = greaterThanEqual(srgb, vec3(0.040449999272823333740234375));\n vec3 below = srgb / vec3(12.9200000762939453125);\n vec3 above = pow((srgb + vec3(0.054999999701976776123046875)) / vec3(1.05499994754791259765625), vec3(2.400000095367431640625));\n return mix(below, above, cutoff);\n}\n\nvec4 unpacksRGB(uint srgba)\n{\n vec4 color = unpackUnorm4x8(srgba).wzyx;\n vec3 param = color.xyz;\n return vec4(fromsRGB(param), color.w);\n}\n\nCmdImage CmdImage_read(Alloc a, CmdImageRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n CmdImage s;\n s.index = raw0;\n s.offset = ivec2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);\n return s;\n}\n\nCmdImage Cmd_Image_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdImageRef param_1 = CmdImageRef(ref.offset + 4u);\n return CmdImage_read(param, param_1);\n}\n\nvec4[8] fillImage(uvec2 xy, CmdImage cmd_img)\n{\n vec4 rgba[8];\n for (uint i = 0u; i < 8u; i++)\n {\n uint param = i;\n ivec2 uv = ivec2(xy + chunk_offset(param)) + cmd_img.offset;\n vec4 fg_rgba = imageLoad(images[0], uv);\n vec3 param_1 = fg_rgba.xyz;\n vec3 _663 = fromsRGB(param_1);\n fg_rgba = vec4(_663.x, _663.y, _663.z, fg_rgba.w);\n rgba[i] = fg_rgba;\n }\n return rgba;\n}\n\nvec3 tosRGB(vec3 rgb)\n{\n bvec3 cutoff = greaterThanEqual(rgb, vec3(0.003130800090730190277099609375));\n vec3 below = vec3(12.9200000762939453125) * rgb;\n vec3 above = (vec3(1.05499994754791259765625) * pow(rgb, vec3(0.416660010814666748046875))) - vec3(0.054999999701976776123046875);\n return mix(below, above, cutoff);\n}\n\nuint packsRGB(inout vec4 rgba)\n{\n vec3 param = rgba.xyz;\n rgba = vec4(tosRGB(param), rgba.w);\n return packUnorm4x8(rgba.wzyx);\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n Alloc param = alloc;\n uint param_1 = offset;\n if (!touch_mem(param, param_1))\n {\n return;\n }\n _196.memory[offset] = val;\n}\n\nCmdJump CmdJump_read(Alloc a, CmdJumpRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n CmdJump s;\n s.new_ref = raw0;\n return s;\n}\n\nCmdJump Cmd_Jump_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdJumpRef param_1 = CmdJumpRef(ref.offset + 4u);\n return CmdJump_read(param, param_1);\n}\n\nvoid main()\n{\n if (_196.mem_error != 0u)\n {\n return;\n }\n uint tile_ix = (gl_WorkGroupID.y * _693.conf.width_in_tiles) + gl_WorkGroupID.x;\n Alloc param;\n param.offset = _693.conf.ptcl_alloc.offset;\n uint param_1 = tile_ix * 1024u;\n uint param_2 = 1024u;\n Alloc cmd_alloc = slice_mem(param, param_1, param_2);\n CmdRef cmd_ref = CmdRef(cmd_alloc.offset);\n Alloc param_3 = cmd_alloc;\n uint param_4 = cmd_ref.offset;\n Alloc scratch_alloc = alloc_read(param_3, param_4);\n cmd_ref.offset += 8u;\n uvec2 xy_uint = uvec2(gl_LocalInvocationID.x + (32u * gl_WorkGroupID.x), gl_LocalInvocationID.y + (32u * gl_WorkGroupID.y));\n vec2 xy = vec2(xy_uint);\n vec4 rgba[8];\n for (uint i = 0u; i < 8u; i++)\n {\n rgba[i] = vec4(0.0);\n }\n uint clip_depth = 0u;\n float df[8];\n TileSegRef tile_seg_ref;\n float area[8];\n uint base_ix;\n while (true)\n {\n Alloc param_5 = cmd_alloc;\n CmdRef param_6 = cmd_ref;\n uint tag = Cmd_tag(param_5, param_6).tag;\n if (tag == 0u)\n {\n break;\n }\n switch (tag)\n {\n case 2u:\n {\n Alloc param_7 = cmd_alloc;\n CmdRef param_8 = cmd_ref;\n CmdStroke stroke = Cmd_Stroke_read(param_7, param_8);\n for (uint k = 0u; k < 8u; k++)\n {\n df[k] = 1000000000.0;\n }\n tile_seg_ref = TileSegRef(stroke.tile_ref);\n do\n {\n uint param_9 = tile_seg_ref.offset;\n uint param_10 = 24u;\n Alloc param_11 = new_alloc(param_9, param_10);\n TileSegRef param_12 = tile_seg_ref;\n TileSeg seg = TileSeg_read(param_11, param_12);\n vec2 line_vec = seg.vector;\n for (uint k_1 = 0u; k_1 < 8u; k_1++)\n {\n vec2 dpos = (xy + vec2(0.5)) - seg.origin;\n uint param_13 = k_1;\n dpos += vec2(chunk_offset(param_13));\n float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);\n df[k_1] = min(df[k_1], length((line_vec * t) - dpos));\n }\n tile_seg_ref = seg.next;\n } while (tile_seg_ref.offset != 0u);\n for (uint k_2 = 0u; k_2 < 8u; k_2++)\n {\n area[k_2] = clamp((stroke.half_width + 0.5) - df[k_2], 0.0, 1.0);\n }\n cmd_ref.offset += 12u;\n break;\n }\n case 1u:\n {\n Alloc param_14 = cmd_alloc;\n CmdRef param_15 = cmd_ref;\n CmdFill fill = Cmd_Fill_read(param_14, param_15);\n for (uint k_3 = 0u; k_3 < 8u; k_3++)\n {\n area[k_3] = float(fill.backdrop);\n }\n tile_seg_ref = TileSegRef(fill.tile_ref);\n do\n {\n uint param_16 = tile_seg_ref.offset;\n uint param_17 = 24u;\n Alloc param_18 = new_alloc(param_16, param_17);\n TileSegRef param_19 = tile_seg_ref;\n TileSeg seg_1 = TileSeg_read(param_18, param_19);\n for (uint k_4 = 0u; k_4 < 8u; k_4++)\n {\n uint param_20 = k_4;\n vec2 my_xy = xy + vec2(chunk_offset(param_20));\n vec2 start = seg_1.origin - my_xy;\n vec2 end = start + seg_1.vector;\n vec2 window = clamp(vec2(start.y, end.y), vec2(0.0), vec2(1.0));\n if (!(window.x == window.y))\n {\n vec2 t_1 = (window - vec2(start.y)) / vec2(seg_1.vector.y);\n vec2 xs = vec2(mix(start.x, end.x, t_1.x), mix(start.x, end.x, t_1.y));\n float xmin = min(min(xs.x, xs.y), 1.0) - 9.9999999747524270787835121154785e-07;\n float xmax = max(xs.x, xs.y);\n float b = min(xmax, 1.0);\n float c = max(b, 0.0);\n float d = max(xmin, 0.0);\n float a = ((b + (0.5 * ((d * d) - (c * c)))) - xmin) / (xmax - xmin);\n area[k_4] += (a * (window.x - window.y));\n }\n area[k_4] += (sign(seg_1.vector.x) * clamp((my_xy.y - seg_1.y_edge) + 1.0, 0.0, 1.0));\n }\n tile_seg_ref = seg_1.next;\n } while (tile_seg_ref.offset != 0u);\n for (uint k_5 = 0u; k_5 < 8u; k_5++)\n {\n area[k_5] = min(abs(area[k_5]), 1.0);\n }\n cmd_ref.offset += 12u;\n break;\n }\n case 3u:\n {\n for (uint k_6 = 0u; k_6 < 8u; k_6++)\n {\n area[k_6] = 1.0;\n }\n cmd_ref.offset += 4u;\n break;\n }\n case 4u:\n {\n Alloc param_21 = cmd_alloc;\n CmdRef param_22 = cmd_ref;\n CmdAlpha alpha = Cmd_Alpha_read(param_21, param_22);\n for (uint k_7 = 0u; k_7 < 8u; k_7++)\n {\n area[k_7] = alpha.alpha;\n }\n cmd_ref.offset += 8u;\n break;\n }\n case 5u:\n {\n Alloc param_23 = cmd_alloc;\n CmdRef param_24 = cmd_ref;\n CmdColor color = Cmd_Color_read(param_23, param_24);\n uint param_25 = color.rgba_color;\n vec4 fg = unpacksRGB(param_25);\n for (uint k_8 = 0u; k_8 < 8u; k_8++)\n {\n vec4 fg_k = fg * area[k_8];\n rgba[k_8] = (rgba[k_8] * (1.0 - fg_k.w)) + fg_k;\n }\n cmd_ref.offset += 8u;\n break;\n }\n case 6u:\n {\n Alloc param_26 = cmd_alloc;\n CmdRef param_27 = cmd_ref;\n CmdImage fill_img = Cmd_Image_read(param_26, param_27);\n uvec2 param_28 = xy_uint;\n CmdImage param_29 = fill_img;\n vec4 img[8] = fillImage(param_28, param_29);\n for (uint k_9 = 0u; k_9 < 8u; k_9++)\n {\n vec4 fg_k_1 = img[k_9] * area[k_9];\n rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1;\n }\n cmd_ref.offset += 12u;\n break;\n }\n case 7u:\n {\n base_ix = (scratch_alloc.offset >> uint(2)) + (2u * ((((clip_depth * 32u) * 32u) + gl_LocalInvocationID.x) + (32u * gl_LocalInvocationID.y)));\n for (uint k_10 = 0u; k_10 < 8u; k_10++)\n {\n uint param_30 = k_10;\n uvec2 offset = chunk_offset(param_30);\n vec4 param_31 = vec4(rgba[k_10]);\n uint _1286 = packsRGB(param_31);\n uint srgb = _1286;\n float alpha_1 = clamp(abs(area[k_10]), 0.0, 1.0);\n Alloc param_32 = scratch_alloc;\n uint param_33 = (base_ix + 0u) + (2u * (offset.x + (offset.y * 32u)));\n uint param_34 = srgb;\n write_mem(param_32, param_33, param_34);\n Alloc param_35 = scratch_alloc;\n uint param_36 = (base_ix + 1u) + (2u * (offset.x + (offset.y * 32u)));\n uint param_37 = floatBitsToUint(alpha_1);\n write_mem(param_35, param_36, param_37);\n rgba[k_10] = vec4(0.0);\n }\n clip_depth++;\n cmd_ref.offset += 4u;\n break;\n }\n case 8u:\n {\n clip_depth--;\n base_ix = (scratch_alloc.offset >> uint(2)) + (2u * ((((clip_depth * 32u) * 32u) + gl_LocalInvocationID.x) + (32u * gl_LocalInvocationID.y)));\n for (uint k_11 = 0u; k_11 < 8u; k_11++)\n {\n uint param_38 = k_11;\n uvec2 offset_1 = chunk_offset(param_38);\n Alloc param_39 = scratch_alloc;\n uint param_40 = (base_ix + 0u) + (2u * (offset_1.x + (offset_1.y * 32u)));\n uint srgb_1 = read_mem(param_39, param_40);\n Alloc param_41 = scratch_alloc;\n uint param_42 = (base_ix + 1u) + (2u * (offset_1.x + (offset_1.y * 32u)));\n uint alpha_2 = read_mem(param_41, param_42);\n uint param_43 = srgb_1;\n vec4 bg = unpacksRGB(param_43);\n vec4 fg_1 = (rgba[k_11] * area[k_11]) * uintBitsToFloat(alpha_2);\n rgba[k_11] = (bg * (1.0 - fg_1.w)) + fg_1;\n }\n cmd_ref.offset += 4u;\n break;\n }\n case 9u:\n {\n Alloc param_44 = cmd_alloc;\n CmdRef param_45 = cmd_ref;\n cmd_ref = CmdRef(Cmd_Jump_read(param_44, param_45).new_ref);\n cmd_alloc.offset = cmd_ref.offset;\n continue;\n }\n }\n }\n for (uint i_1 = 0u; i_1 < 8u; i_1++)\n {\n uint param_46 = i_1;\n vec3 param_47 = rgba[i_1].xyz;\n imageStore(image, ivec2(xy_uint + chunk_offset(param_46)), vec4(tosRGB(param_47), rgba[i_1].w));\n }\n}\n\n", } shader_material_frag = driver.ShaderSources{ Name: "material.frag", diff --git a/gpu/shaders/backdrop.comp b/gpu/shaders/backdrop.comp index 84612752..b640d14b 100644 --- a/gpu/shaders/backdrop.comp +++ b/gpu/shaders/backdrop.comp @@ -48,13 +48,13 @@ void main() { if (element_ix < conf.n_elements) { AnnotatedTag tag = Annotated_tag(conf.anno_alloc, ref); switch (tag.tag) { + case Annotated_Image: + case Annotated_BeginClip: case Annotated_Color: if (fill_mode_from_flags(tag.flags) != MODE_NONZERO) { break; } // Fall through. - case Annotated_Image: - case Annotated_BeginClip: PathRef path_ref = PathRef(conf.tile_alloc.offset + element_ix * Path_size); Path path = Path_read(conf.tile_alloc, path_ref); sh_row_width[th_ix] = path.bbox.z - path.bbox.x; diff --git a/gpu/shaders/coarse.comp b/gpu/shaders/coarse.comp index a4837bd5..76d7fc69 100644 --- a/gpu/shaders/coarse.comp +++ b/gpu/shaders/coarse.comp @@ -91,6 +91,23 @@ bool alloc_cmd(inout Alloc cmd_alloc, inout CmdRef cmd_ref, inout uint cmd_limit return true; } +void write_fill(Alloc alloc, inout CmdRef cmd_ref, uint flags, Tile tile, float linewidth) { + if (fill_mode_from_flags(flags) == MODE_NONZERO) { + if (tile.tile.offset != 0) { + CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop); + Cmd_Fill_write(alloc, cmd_ref, cmd_fill); + cmd_ref.offset += 4 + CmdFill_size; + } else { + Cmd_Solid_write(alloc, cmd_ref); + cmd_ref.offset += 4; + } + } else { + CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * linewidth); + Cmd_Stroke_write(alloc, cmd_ref, cmd_stroke); + cmd_ref.offset += 4 + CmdStroke_size; + } +} + void main() { if (mem_error != NO_ERROR) { return; @@ -135,6 +152,12 @@ void main() { uint part_start_ix = 0; uint ready_ix = 0; + // Leave room for the fine rasterizer scratch allocation. + Alloc scratch_alloc = slice_mem(cmd_alloc, 0, Alloc_size); + cmd_ref.offset += Alloc_size; + + uint num_begin_slots = 0; + uint begin_slot = 0; while (true) { for (uint i = 0; i < N_SLICE; i++) { sh_bitmaps[i][th_ix] = 0; @@ -320,20 +343,7 @@ void main() { if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { break; } - if (fill_mode_from_flags(tag.flags) == MODE_NONZERO) { - if (tile.tile.offset != 0) { - CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop); - Cmd_Fill_write(cmd_alloc, cmd_ref, cmd_fill); - cmd_ref.offset += 4 + CmdFill_size; - } else { - Cmd_Solid_write(cmd_alloc, cmd_ref); - cmd_ref.offset += 4; - } - } else { - CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * fill.linewidth); - Cmd_Stroke_write(cmd_alloc, cmd_ref, cmd_stroke); - cmd_ref.offset += 4 + CmdStroke_size; - } + write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill.linewidth); Cmd_Color_write(cmd_alloc, cmd_ref, CmdColor(fill.rgba_color)); cmd_ref.offset += 4 + CmdColor_size; break; @@ -344,20 +354,7 @@ void main() { if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { break; } - if (fill_mode_from_flags(tag.flags) == MODE_NONZERO) { - if (tile.tile.offset != 0) { - CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop); - Cmd_Fill_write(cmd_alloc, cmd_ref, cmd_fill); - cmd_ref.offset += 4 + CmdFill_size; - } else { - Cmd_Solid_write(cmd_alloc, cmd_ref); - cmd_ref.offset += 4; - } - } else { - CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * fill_img.linewidth); - Cmd_Stroke_write(cmd_alloc, cmd_ref, cmd_stroke); - cmd_ref.offset += 4 + CmdStroke_size; - } + write_fill(cmd_alloc, cmd_ref, tag.flags, tile, fill_img.linewidth); Cmd_Image_write(cmd_alloc, cmd_ref, CmdImage(fill_img.index, fill_img.offset)); cmd_ref.offset += 4 + CmdImage_size; break; @@ -373,27 +370,14 @@ void main() { if (!alloc_cmd(cmd_alloc, cmd_ref, cmd_limit)) { break; } - if (fill_mode_from_flags(tag.flags) == MODE_NONZERO) { - if (tile.tile.offset != 0) { - CmdFill cmd_fill = CmdFill(tile.tile.offset, tile.backdrop); - Cmd_Fill_write(cmd_alloc, cmd_ref, cmd_fill); - cmd_ref.offset += 4 + CmdFill_size; - } else { - // TODO: here is where a bunch of optimization magic should happen - float alpha = tile.backdrop == 0 ? 0.0 : 1.0; - Cmd_Alpha_write(cmd_alloc, cmd_ref, CmdAlpha(alpha)); - cmd_ref.offset += 4 + CmdAlpha_size; - } - } else { - CmdStroke cmd_stroke = CmdStroke(tile.tile.offset, 0.5 * begin_clip.linewidth); - Cmd_Stroke_write(cmd_alloc, cmd_ref, cmd_stroke); - cmd_ref.offset += 4 + CmdStroke_size; - } + write_fill(cmd_alloc, cmd_ref, tag.flags, tile, begin_clip.linewidth); Cmd_BeginClip_write(cmd_alloc, cmd_ref); cmd_ref.offset += 4; if (clip_depth < 32) { clip_one_mask &= ~(1 << clip_depth); } + begin_slot++; + num_begin_slots = max(num_begin_slots, begin_slot); } clip_depth++; break; @@ -405,6 +389,7 @@ void main() { } Cmd_Solid_write(cmd_alloc, cmd_ref); cmd_ref.offset += 4; + begin_slot--; Cmd_EndClip_write(cmd_alloc, cmd_ref); cmd_ref.offset += 4; } @@ -432,5 +417,13 @@ void main() { } if (bin_tile_x + tile_x < conf.width_in_tiles && bin_tile_y + tile_y < conf.height_in_tiles) { Cmd_End_write(cmd_alloc, cmd_ref); + if (num_begin_slots > 0) { + // Write scratch allocation: one state per BeginClip per rasterizer chunk. + uint scratch_size = num_begin_slots * TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE * 4; + MallocResult scratch = malloc(scratch_size); + // Ignore scratch.failed; we don't use the allocation and kernel4 + // checks for memory overflow before using it. + alloc_write(scratch_alloc, scratch_alloc.offset, scratch.alloc); + } } } diff --git a/gpu/shaders/kernel4.comp b/gpu/shaders/kernel4.comp index 69c10288..b7834cf5 100644 --- a/gpu/shaders/kernel4.comp +++ b/gpu/shaders/kernel4.comp @@ -13,9 +13,12 @@ #include "mem.h" #include "setup.h" -#define CHUNK 8 -#define CHUNK_DY (TILE_HEIGHT_PX / CHUNK) -layout(local_size_x = TILE_WIDTH_PX, local_size_y = CHUNK_DY) in; +#define CHUNK_X 2 +#define CHUNK_Y 4 +#define CHUNK CHUNK_X * CHUNK_Y +#define CHUNK_DX (TILE_WIDTH_PX / CHUNK_X) +#define CHUNK_DY (TILE_HEIGHT_PX / CHUNK_Y) +layout(local_size_x = CHUNK_DX, local_size_y = CHUNK_DY) in; layout(set = 0, binding = 1) readonly buffer ConfigBuf { Config conf; @@ -32,36 +35,6 @@ layout(rgba8, set = 0, binding = 3) uniform readonly image2D images[1]; #include "ptcl.h" #include "tile.h" -#define BLEND_STACK_SIZE 4 - -// Layout of a clip scratch frame: -// Each frame is WIDTH * HEIGHT ClipStates, then a link reference. - -struct ClipState { - uint srgb; - float area; -}; - -// Link offset and frame size in 32-bit words. -#define CLIP_STATE_SIZE 2 -#define CLIP_LINK_OFFSET (TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE) -#define CLIP_BUF_SIZE (CLIP_LINK_OFFSET + 1) - -shared MallocResult sh_clip_alloc; - -// Allocate a scratch buffer for clipping. -MallocResult alloc_clip_buf(uint link) { - if (gl_LocalInvocationID.x == 0 && gl_LocalInvocationID.y == 0) { - MallocResult m = malloc(CLIP_BUF_SIZE * 4); - if (!m.failed) { - write_mem(m.alloc, (m.alloc.offset >> 2) + CLIP_LINK_OFFSET, link); - } - sh_clip_alloc = m; - } - barrier(); - return sh_clip_alloc; -} - vec3 tosRGB(vec3 rgb) { bvec3 cutoff = greaterThanEqual(rgb, vec3(0.0031308)); vec3 below = vec3(12.92)*rgb; @@ -90,10 +63,14 @@ uint packsRGB(vec4 rgba) { return packUnorm4x8(rgba.wzyx); } +uvec2 chunk_offset(uint i) { + return uvec2(i % CHUNK_X * CHUNK_DX, i / CHUNK_X * CHUNK_DY); +} + vec4[CHUNK] fillImage(uvec2 xy, CmdImage cmd_img) { vec4 rgba[CHUNK]; for (uint i = 0; i < CHUNK; i++) { - ivec2 uv = ivec2(xy.x, xy.y + i * CHUNK_DY) + cmd_img.offset; + ivec2 uv = ivec2(xy + chunk_offset(i)) + cmd_img.offset; #ifdef ENABLE_IMAGE_INDICES vec4 fg_rgba = imageLoad(images[cmd_img.index], uv); #else @@ -114,23 +91,24 @@ void main() { Alloc cmd_alloc = slice_mem(conf.ptcl_alloc, tile_ix * PTCL_INITIAL_ALLOC, PTCL_INITIAL_ALLOC); CmdRef cmd_ref = CmdRef(cmd_alloc.offset); - uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y); + // Read scrach space allocation, written first in the command list. + Alloc scratch_alloc = alloc_read(cmd_alloc, cmd_ref.offset); + cmd_ref.offset += Alloc_size; + + uvec2 xy_uint = uvec2(gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_WorkGroupID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y); vec2 xy = vec2(xy_uint); vec4 rgba[CHUNK]; - ClipState blend_stack[BLEND_STACK_SIZE][CHUNK]; - uint blend_spill = 0; - uint blend_sp = 0; - Alloc clip_tos = new_alloc(0, 0); for (uint i = 0; i < CHUNK; i++) { rgba[i] = vec4(0.0); #ifdef ENABLE_IMAGE_INDICES if (xy_uint.x < 1024 && xy_uint.y < 1024) { - rgba[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i)/4); + rgba[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint + chunk_offset(i))/4); } #endif } float area[CHUNK]; + uint clip_depth = 0; while (true) { uint tag = Cmd_tag(cmd_alloc, cmd_ref).tag; if (tag == Cmd_End) { @@ -148,7 +126,7 @@ void main() { vec2 line_vec = seg.vector; for (uint k = 0; k < CHUNK; k++) { vec2 dpos = xy + vec2(0.5, 0.5) - seg.origin; - dpos.y += float(k * CHUNK_DY); + dpos += vec2(chunk_offset(k)); float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0); df[k] = min(df[k], length(line_vec * t - dpos)); } @@ -167,7 +145,7 @@ void main() { do { TileSeg seg = TileSeg_read(new_alloc(tile_seg_ref.offset, TileSeg_size), tile_seg_ref); for (uint k = 0; k < CHUNK; k++) { - vec2 my_xy = vec2(xy.x, xy.y + float(k * CHUNK_DY)); + vec2 my_xy = xy + vec2(chunk_offset(k)); vec2 start = seg.origin - my_xy; vec2 end = start + seg.vector; vec2 window = clamp(vec2(start.y, end.y), 0.0, 1.0); @@ -223,47 +201,29 @@ void main() { cmd_ref.offset += 4 + CmdImage_size; break; case Cmd_BeginClip: - uint blend_slot = blend_sp % BLEND_STACK_SIZE; - if (blend_sp == blend_spill + BLEND_STACK_SIZE) { - // spill to scratch buffer - MallocResult m = alloc_clip_buf(clip_tos.offset); - if (m.failed) { - return; - } - clip_tos = m.alloc; - uint base_ix = (clip_tos.offset >> 2) + CLIP_STATE_SIZE * (gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y); - for (uint k = 0; k < CHUNK; k++) { - uint srgb = blend_stack[blend_slot][k].srgb; - uint area = floatBitsToUint(blend_stack[blend_slot][k].area); - write_mem(clip_tos, base_ix + 0 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY, srgb); - write_mem(clip_tos, base_ix + 1 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY, area); - } - blend_spill++; - } + uint base_ix = (scratch_alloc.offset >> 2) + CLIP_STATE_SIZE * (clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX + + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y); for (uint k = 0; k < CHUNK; k++) { - blend_stack[blend_slot][k] = ClipState(packsRGB(rgba[k]), clamp(abs(area[k]), 0.0, 1.0)); + uvec2 offset = chunk_offset(k); + uint srgb = packsRGB(vec4(rgba[k])); + float alpha = clamp(abs(area[k]), 0.0, 1.0); + write_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), srgb); + write_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX), floatBitsToUint(alpha)); rgba[k] = vec4(0.0); } - blend_sp++; + clip_depth++; cmd_ref.offset += 4; break; case Cmd_EndClip: - blend_slot = (blend_sp - 1) % BLEND_STACK_SIZE; - if (blend_sp == blend_spill) { - uint base_ix = (clip_tos.offset >> 2) + CLIP_STATE_SIZE * (gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y); - for (uint k = 0; k < CHUNK; k++) { - uint srgb = read_mem(clip_tos, base_ix + 0 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY); - uint area = read_mem(clip_tos, base_ix + 1 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY); - ClipState state = ClipState(srgb, uintBitsToFloat(area)); - blend_stack[blend_slot][k] = state; - } - clip_tos.offset = read_mem(clip_tos, (clip_tos.offset >> 2) + CLIP_LINK_OFFSET); - blend_spill--; - } - blend_sp--; + clip_depth--; + base_ix = (scratch_alloc.offset >> 2) + CLIP_STATE_SIZE * (clip_depth * TILE_WIDTH_PX * TILE_HEIGHT_PX + + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y); for (uint k = 0; k < CHUNK; k++) { - vec4 bg = unpacksRGB(blend_stack[blend_slot][k].srgb); - vec4 fg = rgba[k] * area[k] * blend_stack[blend_slot][k].area; + uvec2 offset = chunk_offset(k); + uint srgb = read_mem(scratch_alloc, base_ix + 0 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX)); + uint alpha = read_mem(scratch_alloc, base_ix + 1 + CLIP_STATE_SIZE * (offset.x + offset.y * TILE_WIDTH_PX)); + vec4 bg = unpacksRGB(srgb); + vec4 fg = rgba[k] * area[k] * uintBitsToFloat(alpha); rgba[k] = bg * (1.0 - fg.a) + fg; } cmd_ref.offset += 4; @@ -276,6 +236,6 @@ void main() { } for (uint i = 0; i < CHUNK; i++) { - imageStore(image, ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i), vec4(tosRGB(rgba[i].rgb), rgba[i].a)); + imageStore(image, ivec2(xy_uint + chunk_offset(i)), vec4(tosRGB(rgba[i].rgb), rgba[i].a)); } } diff --git a/gpu/shaders/mem.h b/gpu/shaders/mem.h index 7b2a02a2..6e6e775d 100644 --- a/gpu/shaders/mem.h +++ b/gpu/shaders/mem.h @@ -21,7 +21,11 @@ layout(set = 0, binding = 0) buffer Memory { #define ERR_OUT_OF_BOUNDS 2 #define ERR_UNALIGNED_ACCESS 3 +#ifdef MEM_DEBUG +#define Alloc_size 16 +#else #define Alloc_size 8 +#endif // Alloc represents a memory allocation. struct Alloc { @@ -39,7 +43,7 @@ struct MallocResult { bool failed; }; -// new_alloc synthesizes an Alloc when its offset and size are derived. +// new_alloc synthesizes an Alloc from an offset and size. Alloc new_alloc(uint offset, uint size) { Alloc a; a.offset = offset; @@ -118,3 +122,21 @@ Alloc slice_mem(Alloc a, uint offset, uint size) { #endif return new_alloc(a.offset + offset, size); } + +// alloc_write writes alloc to memory at offset bytes. +void alloc_write(Alloc a, uint offset, Alloc alloc) { + write_mem(a, offset >> 2, alloc.offset); +#ifdef MEM_DEBUG + write_mem(a, (offset >> 2) + 1, alloc.size); +#endif +} + +// alloc_read reads an Alloc from memory at offset bytes. +Alloc alloc_read(Alloc a, uint offset) { + Alloc alloc; + alloc.offset = read_mem(a, offset >> 2); +#ifdef MEM_DEBUG + alloc.size = read_mem(a, (offset >> 2) + 1); +#endif + return alloc; +} diff --git a/gpu/shaders/setup.h b/gpu/shaders/setup.h index 5f76cbc7..83b6d1d1 100644 --- a/gpu/shaders/setup.h +++ b/gpu/shaders/setup.h @@ -42,7 +42,10 @@ struct Config { #define MODE_NONZERO 0 #define MODE_STROKE 1 +// Size of kernel4 clip state, in words. +#define CLIP_STATE_SIZE 2 + // fill_mode_from_flags extracts the fill mode from tag flags. uint fill_mode_from_flags(uint flags) { - return flags & 0x1; + return flags & 0x1; }