diff --git a/gpu/compute.go b/gpu/compute.go index 18e8027e..f802d901 100644 --- a/gpu/compute.go +++ b/gpu/compute.go @@ -319,6 +319,7 @@ func (g *compute) Frame() error { ct, k4t = ct.Round(q), k4t.Round(q) t.profile = fmt.Sprintf("ft:%7s et:%7s tat:%7s pct:%7s bbt:%7s ct:%7s k4t:%7s", ft, et, tat, pct, bbt, ct, k4t) } + g.drawOps.clear = false return nil } @@ -331,6 +332,11 @@ func (g *compute) Profile() string { // shader can only write to RGBA textures, but since we actually render in sRGB // format we can't use glBlitFramebuffer, because it does sRGB conversion. func (g *compute) blitOutput(viewport image.Point) { + if !g.drawOps.clear { + g.ctx.BlendFunc(driver.BlendFactorOne, driver.BlendFactorOneMinusSrcAlpha) + g.ctx.SetBlend(true) + defer g.ctx.SetBlend(false) + } g.ctx.Viewport(0, 0, viewport.X, viewport.Y) g.ctx.BindTexture(0, g.output.image) g.ctx.BindProgram(g.output.blitProg) @@ -345,7 +351,6 @@ func (g *compute) encode(viewport image.Point) error { flipY := f32.Affine2D{}.Scale(f32.Pt(0, 0), f32.Pt(1, -1)).Offset(f32.Pt(0, float32(viewport.Y))) g.enc.transform(flipY) if g.drawOps.clear { - g.drawOps.clear = false g.enc.rect(f32.Rectangle{Max: layout.FPt(viewport)}) g.enc.fillColor(f32color.NRGBAToRGBA(g.drawOps.clearColor.SRGB())) } diff --git a/gpu/shaders.go b/gpu/shaders.go index 17f5f1ee..bf9f084a 100644 --- a/gpu/shaders.go +++ b/gpu/shaders.go @@ -166,7 +166,7 @@ var ( } shader_kernel4_comp = driver.ShaderSources{ Name: "kernel4.comp", - GLSL310ES: "#version 310 es\nlayout(local_size_x = 32, local_size_y = 4, local_size_z = 1) in;\n\nstruct Alloc\n{\n uint offset;\n};\n\nstruct MallocResult\n{\n Alloc alloc;\n bool failed;\n};\n\nstruct CmdStrokeRef\n{\n uint offset;\n};\n\nstruct CmdStroke\n{\n uint tile_ref;\n float half_width;\n};\n\nstruct CmdFillRef\n{\n uint offset;\n};\n\nstruct CmdFill\n{\n uint tile_ref;\n int backdrop;\n};\n\nstruct CmdColorRef\n{\n uint offset;\n};\n\nstruct CmdColor\n{\n uint rgba_color;\n};\n\nstruct CmdImageRef\n{\n uint offset;\n};\n\nstruct CmdImage\n{\n uint index;\n ivec2 offset;\n};\n\nstruct CmdAlphaRef\n{\n uint offset;\n};\n\nstruct CmdAlpha\n{\n float alpha;\n};\n\nstruct CmdJumpRef\n{\n uint offset;\n};\n\nstruct CmdJump\n{\n uint new_ref;\n};\n\nstruct CmdRef\n{\n uint offset;\n};\n\nstruct CmdTag\n{\n uint tag;\n uint flags;\n};\n\nstruct TileSegRef\n{\n uint offset;\n};\n\nstruct TileSeg\n{\n vec2 origin;\n vec2 vector;\n float y_edge;\n TileSegRef next;\n};\n\nstruct Config\n{\n uint n_elements;\n uint n_pathseg;\n uint width_in_tiles;\n uint height_in_tiles;\n Alloc tile_alloc;\n Alloc bin_alloc;\n Alloc ptcl_alloc;\n Alloc pathseg_alloc;\n Alloc anno_alloc;\n Alloc trans_alloc;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n uint mem_offset;\n uint mem_error;\n uint memory[];\n} _190;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n Config conf;\n} _749;\n\nlayout(binding = 3, rgba8) uniform readonly highp image2D images[1];\nlayout(binding = 2, rgba8) uniform writeonly highp image2D image;\n\nshared MallocResult sh_clip_alloc;\n\nAlloc new_alloc(uint offset, uint size)\n{\n Alloc a;\n a.offset = offset;\n return a;\n}\n\nAlloc slice_mem(Alloc a, uint offset, uint size)\n{\n uint param = a.offset + offset;\n uint param_1 = size;\n return new_alloc(param, param_1);\n}\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n Alloc param = alloc;\n uint param_1 = offset;\n if (!touch_mem(param, param_1))\n {\n return 0u;\n }\n uint v = _190.memory[offset];\n return v;\n}\n\nCmdTag Cmd_tag(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint tag_and_flags = read_mem(param, param_1);\n return CmdTag(tag_and_flags & 65535u, tag_and_flags >> uint(16));\n}\n\nCmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n CmdStroke s;\n s.tile_ref = raw0;\n s.half_width = uintBitsToFloat(raw1);\n return s;\n}\n\nCmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdStrokeRef param_1 = CmdStrokeRef(ref.offset + 4u);\n return CmdStroke_read(param, param_1);\n}\n\nTileSeg TileSeg_read(Alloc a, TileSegRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n Alloc param_4 = a;\n uint param_5 = ix + 2u;\n uint raw2 = read_mem(param_4, param_5);\n Alloc param_6 = a;\n uint param_7 = ix + 3u;\n uint raw3 = read_mem(param_6, param_7);\n Alloc param_8 = a;\n uint param_9 = ix + 4u;\n uint raw4 = read_mem(param_8, param_9);\n Alloc param_10 = a;\n uint param_11 = ix + 5u;\n uint raw5 = read_mem(param_10, param_11);\n TileSeg s;\n s.origin = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n s.vector = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n s.y_edge = uintBitsToFloat(raw4);\n s.next = TileSegRef(raw5);\n return s;\n}\n\nCmdFill CmdFill_read(Alloc a, CmdFillRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n CmdFill s;\n s.tile_ref = raw0;\n s.backdrop = int(raw1);\n return s;\n}\n\nCmdFill Cmd_Fill_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdFillRef param_1 = CmdFillRef(ref.offset + 4u);\n return CmdFill_read(param, param_1);\n}\n\nCmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n CmdAlpha s;\n s.alpha = uintBitsToFloat(raw0);\n return s;\n}\n\nCmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdAlphaRef param_1 = CmdAlphaRef(ref.offset + 4u);\n return CmdAlpha_read(param, param_1);\n}\n\nCmdColor CmdColor_read(Alloc a, CmdColorRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n CmdColor s;\n s.rgba_color = raw0;\n return s;\n}\n\nCmdColor Cmd_Color_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdColorRef param_1 = CmdColorRef(ref.offset + 4u);\n return CmdColor_read(param, param_1);\n}\n\nvec3 fromsRGB(vec3 srgb)\n{\n bvec3 cutoff = greaterThanEqual(srgb, vec3(0.040449999272823333740234375));\n vec3 below = srgb / vec3(12.9200000762939453125);\n vec3 above = pow((srgb + vec3(0.054999999701976776123046875)) / vec3(1.05499994754791259765625), vec3(2.400000095367431640625));\n return mix(below, above, cutoff);\n}\n\nvec4 unpacksRGB(uint srgba)\n{\n vec4 color = unpackUnorm4x8(srgba).wzyx;\n vec3 param = color.xyz;\n return vec4(fromsRGB(param), color.w);\n}\n\nCmdImage CmdImage_read(Alloc a, CmdImageRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n CmdImage s;\n s.index = raw0;\n s.offset = ivec2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);\n return s;\n}\n\nCmdImage Cmd_Image_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdImageRef param_1 = CmdImageRef(ref.offset + 4u);\n return CmdImage_read(param, param_1);\n}\n\nvec4[8] fillImage(uvec2 xy, CmdImage cmd_img)\n{\n vec4 rgba[8];\n for (uint i = 0u; i < 8u; i++)\n {\n ivec2 uv = ivec2(int(xy.x), int(xy.y + (i * 4u))) + cmd_img.offset;\n vec4 fg_rgba = imageLoad(images[0], uv);\n vec3 param = fg_rgba.xyz;\n vec3 _722 = fromsRGB(param);\n fg_rgba = vec4(_722.x, _722.y, _722.z, fg_rgba.w);\n rgba[i] = fg_rgba;\n }\n return rgba;\n}\n\nMallocResult malloc(uint size)\n{\n MallocResult r;\n r.failed = false;\n uint _196 = atomicAdd(_190.mem_offset, size);\n uint offset = _196;\n uint param = offset;\n uint param_1 = size;\n r.alloc = new_alloc(param, param_1);\n if ((offset + size) > uint(int(uint(_190.memory.length())) * 4))\n {\n r.failed = true;\n uint _217 = atomicMax(_190.mem_error, 1u);\n return r;\n }\n return r;\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n Alloc param = alloc;\n uint param_1 = offset;\n if (!touch_mem(param, param_1))\n {\n return;\n }\n _190.memory[offset] = val;\n}\n\nMallocResult alloc_clip_buf(uint link)\n{\n bool _569 = gl_LocalInvocationID.x == 0u;\n bool _575;\n if (_569)\n {\n _575 = gl_LocalInvocationID.y == 0u;\n }\n else\n {\n _575 = _569;\n }\n if (_575)\n {\n uint param = 4100u;\n MallocResult _581 = malloc(param);\n MallocResult m = _581;\n if (!m.failed)\n {\n Alloc param_1 = m.alloc;\n uint param_2 = (m.alloc.offset >> uint(2)) + 1024u;\n uint param_3 = link;\n write_mem(param_1, param_2, param_3);\n }\n sh_clip_alloc = m;\n }\n barrier();\n return sh_clip_alloc;\n}\n\nvec3 tosRGB(vec3 rgb)\n{\n bvec3 cutoff = greaterThanEqual(rgb, vec3(0.003130800090730190277099609375));\n vec3 below = vec3(12.9200000762939453125) * rgb;\n vec3 above = (vec3(1.05499994754791259765625) * pow(rgb, vec3(0.416660010814666748046875))) - vec3(0.054999999701976776123046875);\n return mix(below, above, cutoff);\n}\n\nuint packsRGB(inout vec4 rgba)\n{\n vec3 param = rgba.xyz;\n rgba = vec4(tosRGB(param), rgba.w);\n return packUnorm4x8(rgba.wzyx);\n}\n\nCmdJump CmdJump_read(Alloc a, CmdJumpRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n CmdJump s;\n s.new_ref = raw0;\n return s;\n}\n\nCmdJump Cmd_Jump_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdJumpRef param_1 = CmdJumpRef(ref.offset + 4u);\n return CmdJump_read(param, param_1);\n}\n\nvoid main()\n{\n if (_190.mem_error != 0u)\n {\n return;\n }\n uint tile_ix = (gl_WorkGroupID.y * _749.conf.width_in_tiles) + gl_WorkGroupID.x;\n Alloc param;\n param.offset = _749.conf.ptcl_alloc.offset;\n uint param_1 = tile_ix * 1024u;\n uint param_2 = 1024u;\n Alloc cmd_alloc = slice_mem(param, param_1, param_2);\n CmdRef cmd_ref = CmdRef(cmd_alloc.offset);\n uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + (32u * gl_WorkGroupID.y));\n vec2 xy = vec2(xy_uint);\n uint blend_spill = 0u;\n uint blend_sp = 0u;\n uint param_3 = 0u;\n uint param_4 = 0u;\n Alloc clip_tos = new_alloc(param_3, param_4);\n vec3 rgb[8];\n float mask[8];\n for (uint i = 0u; i < 8u; i++)\n {\n rgb[i] = vec3(0.5);\n mask[i] = 1.0;\n }\n float df[8];\n TileSegRef tile_seg_ref;\n float area[8];\n uint blend_slot;\n uint blend_stack[4][8];\n while (true)\n {\n Alloc param_5 = cmd_alloc;\n CmdRef param_6 = cmd_ref;\n uint tag = Cmd_tag(param_5, param_6).tag;\n if (tag == 0u)\n {\n break;\n }\n switch (tag)\n {\n case 2u:\n {\n Alloc param_7 = cmd_alloc;\n CmdRef param_8 = cmd_ref;\n CmdStroke stroke = Cmd_Stroke_read(param_7, param_8);\n for (uint k = 0u; k < 8u; k++)\n {\n df[k] = 1000000000.0;\n }\n tile_seg_ref = TileSegRef(stroke.tile_ref);\n do\n {\n uint param_9 = tile_seg_ref.offset;\n uint param_10 = 24u;\n Alloc param_11 = new_alloc(param_9, param_10);\n TileSegRef param_12 = tile_seg_ref;\n TileSeg seg = TileSeg_read(param_11, param_12);\n vec2 line_vec = seg.vector;\n for (uint k_1 = 0u; k_1 < 8u; k_1++)\n {\n vec2 dpos = (xy + vec2(0.5)) - seg.origin;\n dpos.y += float(k_1 * 4u);\n float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);\n df[k_1] = min(df[k_1], length((line_vec * t) - dpos));\n }\n tile_seg_ref = seg.next;\n } while (tile_seg_ref.offset != 0u);\n for (uint k_2 = 0u; k_2 < 8u; k_2++)\n {\n area[k_2] = clamp((stroke.half_width + 0.5) - df[k_2], 0.0, 1.0);\n }\n cmd_ref.offset += 12u;\n break;\n }\n case 1u:\n {\n Alloc param_13 = cmd_alloc;\n CmdRef param_14 = cmd_ref;\n CmdFill fill = Cmd_Fill_read(param_13, param_14);\n for (uint k_3 = 0u; k_3 < 8u; k_3++)\n {\n area[k_3] = float(fill.backdrop);\n }\n tile_seg_ref = TileSegRef(fill.tile_ref);\n do\n {\n uint param_15 = tile_seg_ref.offset;\n uint param_16 = 24u;\n Alloc param_17 = new_alloc(param_15, param_16);\n TileSegRef param_18 = tile_seg_ref;\n TileSeg seg_1 = TileSeg_read(param_17, param_18);\n for (uint k_4 = 0u; k_4 < 8u; k_4++)\n {\n vec2 my_xy = vec2(xy.x, xy.y + float(k_4 * 4u));\n vec2 start = seg_1.origin - my_xy;\n vec2 end = start + seg_1.vector;\n vec2 window = clamp(vec2(start.y, end.y), vec2(0.0), vec2(1.0));\n if (!(window.x == window.y))\n {\n vec2 t_1 = (window - vec2(start.y)) / vec2(seg_1.vector.y);\n vec2 xs = vec2(mix(start.x, end.x, t_1.x), mix(start.x, end.x, t_1.y));\n float xmin = min(min(xs.x, xs.y), 1.0) - 9.9999999747524270787835121154785e-07;\n float xmax = max(xs.x, xs.y);\n float b = min(xmax, 1.0);\n float c = max(b, 0.0);\n float d = max(xmin, 0.0);\n float a = ((b + (0.5 * ((d * d) - (c * c)))) - xmin) / (xmax - xmin);\n area[k_4] += (a * (window.x - window.y));\n }\n area[k_4] += (sign(seg_1.vector.x) * clamp((my_xy.y - seg_1.y_edge) + 1.0, 0.0, 1.0));\n }\n tile_seg_ref = seg_1.next;\n } while (tile_seg_ref.offset != 0u);\n for (uint k_5 = 0u; k_5 < 8u; k_5++)\n {\n area[k_5] = min(abs(area[k_5]), 1.0);\n }\n cmd_ref.offset += 12u;\n break;\n }\n case 3u:\n {\n for (uint k_6 = 0u; k_6 < 8u; k_6++)\n {\n area[k_6] = 1.0;\n }\n cmd_ref.offset += 4u;\n break;\n }\n case 4u:\n {\n Alloc param_19 = cmd_alloc;\n CmdRef param_20 = cmd_ref;\n CmdAlpha alpha = Cmd_Alpha_read(param_19, param_20);\n for (uint k_7 = 0u; k_7 < 8u; k_7++)\n {\n area[k_7] = alpha.alpha;\n }\n cmd_ref.offset += 8u;\n break;\n }\n case 5u:\n {\n Alloc param_21 = cmd_alloc;\n CmdRef param_22 = cmd_ref;\n CmdColor color = Cmd_Color_read(param_21, param_22);\n uint param_23 = color.rgba_color;\n vec4 fg_rgba = unpacksRGB(param_23);\n for (uint k_8 = 0u; k_8 < 8u; k_8++)\n {\n rgb[k_8] = mix(rgb[k_8], fg_rgba.xyz, vec3((mask[k_8] * area[k_8]) * fg_rgba.w));\n }\n cmd_ref.offset += 8u;\n break;\n }\n case 6u:\n {\n Alloc param_24 = cmd_alloc;\n CmdRef param_25 = cmd_ref;\n CmdImage fill_img = Cmd_Image_read(param_24, param_25);\n uvec2 param_26 = xy_uint;\n CmdImage param_27 = fill_img;\n vec4 rgba[8] = fillImage(param_26, param_27);\n for (uint k_9 = 0u; k_9 < 8u; k_9++)\n {\n rgb[k_9] = mix(rgb[k_9], rgba[k_9].xyz, vec3((mask[k_9] * area[k_9]) * rgba[k_9].w));\n }\n cmd_ref.offset += 12u;\n break;\n }\n case 7u:\n {\n blend_slot = blend_sp % 4u;\n if (blend_sp == (blend_spill + 4u))\n {\n uint param_28 = clip_tos.offset;\n MallocResult _1320 = alloc_clip_buf(param_28);\n MallocResult m = _1320;\n if (m.failed)\n {\n return;\n }\n clip_tos = m.alloc;\n uint base_ix = ((clip_tos.offset >> uint(2)) + gl_LocalInvocationID.x) + (32u * gl_LocalInvocationID.y);\n for (uint k_10 = 0u; k_10 < 8u; k_10++)\n {\n Alloc param_29 = clip_tos;\n uint param_30 = base_ix + ((k_10 * 32u) * 4u);\n uint param_31 = blend_stack[blend_slot][k_10];\n write_mem(param_29, param_30, param_31);\n }\n blend_spill++;\n }\n for (uint k_11 = 0u; k_11 < 8u; k_11++)\n {\n vec4 param_32 = vec4(rgb[k_11], clamp(abs(area[k_11]), 0.0, 1.0));\n uint _1392 = packsRGB(param_32);\n blend_stack[blend_slot][k_11] = _1392;\n }\n blend_sp++;\n cmd_ref.offset += 4u;\n break;\n }\n case 8u:\n {\n blend_slot = (blend_sp - 1u) % 4u;\n if (blend_sp == blend_spill)\n {\n uint base_ix_1 = ((clip_tos.offset >> uint(2)) + gl_LocalInvocationID.x) + (32u * gl_LocalInvocationID.y);\n for (uint k_12 = 0u; k_12 < 8u; k_12++)\n {\n Alloc param_33 = clip_tos;\n uint param_34 = base_ix_1 + ((k_12 * 32u) * 4u);\n blend_stack[blend_slot][k_12] = read_mem(param_33, param_34);\n }\n Alloc param_35 = clip_tos;\n uint param_36 = (clip_tos.offset >> uint(2)) + 1024u;\n clip_tos.offset = read_mem(param_35, param_36);\n blend_spill--;\n }\n blend_sp--;\n for (uint k_13 = 0u; k_13 < 8u; k_13++)\n {\n uint param_37 = blend_stack[blend_slot][k_13];\n vec4 rgba_1 = unpacksRGB(param_37);\n rgb[k_13] = mix(rgba_1.xyz, rgb[k_13], vec3(area[k_13] * rgba_1.w));\n }\n cmd_ref.offset += 4u;\n break;\n }\n case 9u:\n {\n Alloc param_38 = cmd_alloc;\n CmdRef param_39 = cmd_ref;\n cmd_ref = CmdRef(Cmd_Jump_read(param_38, param_39).new_ref);\n cmd_alloc.offset = cmd_ref.offset;\n continue;\n }\n }\n }\n for (uint i_1 = 0u; i_1 < 8u; i_1++)\n {\n vec3 param_40 = rgb[i_1];\n imageStore(image, ivec2(int(xy_uint.x), int(xy_uint.y + (4u * i_1))), vec4(tosRGB(param_40), 1.0));\n }\n}\n\n", + GLSL310ES: "#version 310 es\nlayout(local_size_x = 32, local_size_y = 4, local_size_z = 1) in;\n\nstruct Alloc\n{\n uint offset;\n};\n\nstruct MallocResult\n{\n Alloc alloc;\n bool failed;\n};\n\nstruct CmdStrokeRef\n{\n uint offset;\n};\n\nstruct CmdStroke\n{\n uint tile_ref;\n float half_width;\n};\n\nstruct CmdFillRef\n{\n uint offset;\n};\n\nstruct CmdFill\n{\n uint tile_ref;\n int backdrop;\n};\n\nstruct CmdColorRef\n{\n uint offset;\n};\n\nstruct CmdColor\n{\n uint rgba_color;\n};\n\nstruct CmdImageRef\n{\n uint offset;\n};\n\nstruct CmdImage\n{\n uint index;\n ivec2 offset;\n};\n\nstruct CmdAlphaRef\n{\n uint offset;\n};\n\nstruct CmdAlpha\n{\n float alpha;\n};\n\nstruct CmdJumpRef\n{\n uint offset;\n};\n\nstruct CmdJump\n{\n uint new_ref;\n};\n\nstruct CmdRef\n{\n uint offset;\n};\n\nstruct CmdTag\n{\n uint tag;\n uint flags;\n};\n\nstruct TileSegRef\n{\n uint offset;\n};\n\nstruct TileSeg\n{\n vec2 origin;\n vec2 vector;\n float y_edge;\n TileSegRef next;\n};\n\nstruct Config\n{\n uint n_elements;\n uint n_pathseg;\n uint width_in_tiles;\n uint height_in_tiles;\n Alloc tile_alloc;\n Alloc bin_alloc;\n Alloc ptcl_alloc;\n Alloc pathseg_alloc;\n Alloc anno_alloc;\n Alloc trans_alloc;\n};\n\nstruct ClipState\n{\n uint srgb;\n float area;\n};\n\nlayout(binding = 0, std430) buffer Memory\n{\n uint mem_offset;\n uint mem_error;\n uint memory[];\n} _190;\n\nlayout(binding = 1, std430) readonly buffer ConfigBuf\n{\n Config conf;\n} _749;\n\nlayout(binding = 3, rgba8) uniform readonly highp image2D images[1];\nlayout(binding = 2, rgba8) uniform writeonly highp image2D image;\n\nshared MallocResult sh_clip_alloc;\n\nAlloc new_alloc(uint offset, uint size)\n{\n Alloc a;\n a.offset = offset;\n return a;\n}\n\nAlloc slice_mem(Alloc a, uint offset, uint size)\n{\n uint param = a.offset + offset;\n uint param_1 = size;\n return new_alloc(param, param_1);\n}\n\nbool touch_mem(Alloc alloc, uint offset)\n{\n return true;\n}\n\nuint read_mem(Alloc alloc, uint offset)\n{\n Alloc param = alloc;\n uint param_1 = offset;\n if (!touch_mem(param, param_1))\n {\n return 0u;\n }\n uint v = _190.memory[offset];\n return v;\n}\n\nCmdTag Cmd_tag(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n uint param_1 = ref.offset >> uint(2);\n uint tag_and_flags = read_mem(param, param_1);\n return CmdTag(tag_and_flags & 65535u, tag_and_flags >> uint(16));\n}\n\nCmdStroke CmdStroke_read(Alloc a, CmdStrokeRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n CmdStroke s;\n s.tile_ref = raw0;\n s.half_width = uintBitsToFloat(raw1);\n return s;\n}\n\nCmdStroke Cmd_Stroke_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdStrokeRef param_1 = CmdStrokeRef(ref.offset + 4u);\n return CmdStroke_read(param, param_1);\n}\n\nTileSeg TileSeg_read(Alloc a, TileSegRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n Alloc param_4 = a;\n uint param_5 = ix + 2u;\n uint raw2 = read_mem(param_4, param_5);\n Alloc param_6 = a;\n uint param_7 = ix + 3u;\n uint raw3 = read_mem(param_6, param_7);\n Alloc param_8 = a;\n uint param_9 = ix + 4u;\n uint raw4 = read_mem(param_8, param_9);\n Alloc param_10 = a;\n uint param_11 = ix + 5u;\n uint raw5 = read_mem(param_10, param_11);\n TileSeg s;\n s.origin = vec2(uintBitsToFloat(raw0), uintBitsToFloat(raw1));\n s.vector = vec2(uintBitsToFloat(raw2), uintBitsToFloat(raw3));\n s.y_edge = uintBitsToFloat(raw4);\n s.next = TileSegRef(raw5);\n return s;\n}\n\nCmdFill CmdFill_read(Alloc a, CmdFillRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n CmdFill s;\n s.tile_ref = raw0;\n s.backdrop = int(raw1);\n return s;\n}\n\nCmdFill Cmd_Fill_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdFillRef param_1 = CmdFillRef(ref.offset + 4u);\n return CmdFill_read(param, param_1);\n}\n\nCmdAlpha CmdAlpha_read(Alloc a, CmdAlphaRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n CmdAlpha s;\n s.alpha = uintBitsToFloat(raw0);\n return s;\n}\n\nCmdAlpha Cmd_Alpha_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdAlphaRef param_1 = CmdAlphaRef(ref.offset + 4u);\n return CmdAlpha_read(param, param_1);\n}\n\nCmdColor CmdColor_read(Alloc a, CmdColorRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n CmdColor s;\n s.rgba_color = raw0;\n return s;\n}\n\nCmdColor Cmd_Color_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdColorRef param_1 = CmdColorRef(ref.offset + 4u);\n return CmdColor_read(param, param_1);\n}\n\nvec3 fromsRGB(vec3 srgb)\n{\n bvec3 cutoff = greaterThanEqual(srgb, vec3(0.040449999272823333740234375));\n vec3 below = srgb / vec3(12.9200000762939453125);\n vec3 above = pow((srgb + vec3(0.054999999701976776123046875)) / vec3(1.05499994754791259765625), vec3(2.400000095367431640625));\n return mix(below, above, cutoff);\n}\n\nvec4 unpacksRGB(uint srgba)\n{\n vec4 color = unpackUnorm4x8(srgba).wzyx;\n vec3 param = color.xyz;\n return vec4(fromsRGB(param), color.w);\n}\n\nCmdImage CmdImage_read(Alloc a, CmdImageRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n Alloc param_2 = a;\n uint param_3 = ix + 1u;\n uint raw1 = read_mem(param_2, param_3);\n CmdImage s;\n s.index = raw0;\n s.offset = ivec2(int(raw1 << uint(16)) >> 16, int(raw1) >> 16);\n return s;\n}\n\nCmdImage Cmd_Image_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdImageRef param_1 = CmdImageRef(ref.offset + 4u);\n return CmdImage_read(param, param_1);\n}\n\nvec4[8] fillImage(uvec2 xy, CmdImage cmd_img)\n{\n vec4 rgba[8];\n for (uint i = 0u; i < 8u; i++)\n {\n ivec2 uv = ivec2(int(xy.x), int(xy.y + (i * 4u))) + cmd_img.offset;\n vec4 fg_rgba = imageLoad(images[0], uv);\n vec3 param = fg_rgba.xyz;\n vec3 _722 = fromsRGB(param);\n fg_rgba = vec4(_722.x, _722.y, _722.z, fg_rgba.w);\n rgba[i] = fg_rgba;\n }\n return rgba;\n}\n\nMallocResult malloc(uint size)\n{\n MallocResult r;\n r.failed = false;\n uint _196 = atomicAdd(_190.mem_offset, size);\n uint offset = _196;\n uint param = offset;\n uint param_1 = size;\n r.alloc = new_alloc(param, param_1);\n if ((offset + size) > uint(int(uint(_190.memory.length())) * 4))\n {\n r.failed = true;\n uint _217 = atomicMax(_190.mem_error, 1u);\n return r;\n }\n return r;\n}\n\nvoid write_mem(Alloc alloc, uint offset, uint val)\n{\n Alloc param = alloc;\n uint param_1 = offset;\n if (!touch_mem(param, param_1))\n {\n return;\n }\n _190.memory[offset] = val;\n}\n\nMallocResult alloc_clip_buf(uint link)\n{\n bool _569 = gl_LocalInvocationID.x == 0u;\n bool _575;\n if (_569)\n {\n _575 = gl_LocalInvocationID.y == 0u;\n }\n else\n {\n _575 = _569;\n }\n if (_575)\n {\n uint param = 8196u;\n MallocResult _581 = malloc(param);\n MallocResult m = _581;\n if (!m.failed)\n {\n Alloc param_1 = m.alloc;\n uint param_2 = (m.alloc.offset >> uint(2)) + 2048u;\n uint param_3 = link;\n write_mem(param_1, param_2, param_3);\n }\n sh_clip_alloc = m;\n }\n barrier();\n return sh_clip_alloc;\n}\n\nvec3 tosRGB(vec3 rgb)\n{\n bvec3 cutoff = greaterThanEqual(rgb, vec3(0.003130800090730190277099609375));\n vec3 below = vec3(12.9200000762939453125) * rgb;\n vec3 above = (vec3(1.05499994754791259765625) * pow(rgb, vec3(0.416660010814666748046875))) - vec3(0.054999999701976776123046875);\n return mix(below, above, cutoff);\n}\n\nuint packsRGB(inout vec4 rgba)\n{\n vec3 param = rgba.xyz;\n rgba = vec4(tosRGB(param), rgba.w);\n return packUnorm4x8(rgba.wzyx);\n}\n\nCmdJump CmdJump_read(Alloc a, CmdJumpRef ref)\n{\n uint ix = ref.offset >> uint(2);\n Alloc param = a;\n uint param_1 = ix + 0u;\n uint raw0 = read_mem(param, param_1);\n CmdJump s;\n s.new_ref = raw0;\n return s;\n}\n\nCmdJump Cmd_Jump_read(Alloc a, CmdRef ref)\n{\n Alloc param = a;\n CmdJumpRef param_1 = CmdJumpRef(ref.offset + 4u);\n return CmdJump_read(param, param_1);\n}\n\nvoid main()\n{\n if (_190.mem_error != 0u)\n {\n return;\n }\n uint tile_ix = (gl_WorkGroupID.y * _749.conf.width_in_tiles) + gl_WorkGroupID.x;\n Alloc param;\n param.offset = _749.conf.ptcl_alloc.offset;\n uint param_1 = tile_ix * 1024u;\n uint param_2 = 1024u;\n Alloc cmd_alloc = slice_mem(param, param_1, param_2);\n CmdRef cmd_ref = CmdRef(cmd_alloc.offset);\n uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + (32u * gl_WorkGroupID.y));\n vec2 xy = vec2(xy_uint);\n uint blend_spill = 0u;\n uint blend_sp = 0u;\n uint param_3 = 0u;\n uint param_4 = 0u;\n Alloc clip_tos = new_alloc(param_3, param_4);\n vec4 rgba[8];\n for (uint i = 0u; i < 8u; i++)\n {\n rgba[i] = vec4(0.0);\n }\n float df[8];\n TileSegRef tile_seg_ref;\n float area[8];\n uint blend_slot;\n ClipState blend_stack[4][8];\n while (true)\n {\n Alloc param_5 = cmd_alloc;\n CmdRef param_6 = cmd_ref;\n uint tag = Cmd_tag(param_5, param_6).tag;\n if (tag == 0u)\n {\n break;\n }\n switch (tag)\n {\n case 2u:\n {\n Alloc param_7 = cmd_alloc;\n CmdRef param_8 = cmd_ref;\n CmdStroke stroke = Cmd_Stroke_read(param_7, param_8);\n for (uint k = 0u; k < 8u; k++)\n {\n df[k] = 1000000000.0;\n }\n tile_seg_ref = TileSegRef(stroke.tile_ref);\n do\n {\n uint param_9 = tile_seg_ref.offset;\n uint param_10 = 24u;\n Alloc param_11 = new_alloc(param_9, param_10);\n TileSegRef param_12 = tile_seg_ref;\n TileSeg seg = TileSeg_read(param_11, param_12);\n vec2 line_vec = seg.vector;\n for (uint k_1 = 0u; k_1 < 8u; k_1++)\n {\n vec2 dpos = (xy + vec2(0.5)) - seg.origin;\n dpos.y += float(k_1 * 4u);\n float t = clamp(dot(line_vec, dpos) / dot(line_vec, line_vec), 0.0, 1.0);\n df[k_1] = min(df[k_1], length((line_vec * t) - dpos));\n }\n tile_seg_ref = seg.next;\n } while (tile_seg_ref.offset != 0u);\n for (uint k_2 = 0u; k_2 < 8u; k_2++)\n {\n area[k_2] = clamp((stroke.half_width + 0.5) - df[k_2], 0.0, 1.0);\n }\n cmd_ref.offset += 12u;\n break;\n }\n case 1u:\n {\n Alloc param_13 = cmd_alloc;\n CmdRef param_14 = cmd_ref;\n CmdFill fill = Cmd_Fill_read(param_13, param_14);\n for (uint k_3 = 0u; k_3 < 8u; k_3++)\n {\n area[k_3] = float(fill.backdrop);\n }\n tile_seg_ref = TileSegRef(fill.tile_ref);\n do\n {\n uint param_15 = tile_seg_ref.offset;\n uint param_16 = 24u;\n Alloc param_17 = new_alloc(param_15, param_16);\n TileSegRef param_18 = tile_seg_ref;\n TileSeg seg_1 = TileSeg_read(param_17, param_18);\n for (uint k_4 = 0u; k_4 < 8u; k_4++)\n {\n vec2 my_xy = vec2(xy.x, xy.y + float(k_4 * 4u));\n vec2 start = seg_1.origin - my_xy;\n vec2 end = start + seg_1.vector;\n vec2 window = clamp(vec2(start.y, end.y), vec2(0.0), vec2(1.0));\n if (!(window.x == window.y))\n {\n vec2 t_1 = (window - vec2(start.y)) / vec2(seg_1.vector.y);\n vec2 xs = vec2(mix(start.x, end.x, t_1.x), mix(start.x, end.x, t_1.y));\n float xmin = min(min(xs.x, xs.y), 1.0) - 9.9999999747524270787835121154785e-07;\n float xmax = max(xs.x, xs.y);\n float b = min(xmax, 1.0);\n float c = max(b, 0.0);\n float d = max(xmin, 0.0);\n float a = ((b + (0.5 * ((d * d) - (c * c)))) - xmin) / (xmax - xmin);\n area[k_4] += (a * (window.x - window.y));\n }\n area[k_4] += (sign(seg_1.vector.x) * clamp((my_xy.y - seg_1.y_edge) + 1.0, 0.0, 1.0));\n }\n tile_seg_ref = seg_1.next;\n } while (tile_seg_ref.offset != 0u);\n for (uint k_5 = 0u; k_5 < 8u; k_5++)\n {\n area[k_5] = min(abs(area[k_5]), 1.0);\n }\n cmd_ref.offset += 12u;\n break;\n }\n case 3u:\n {\n for (uint k_6 = 0u; k_6 < 8u; k_6++)\n {\n area[k_6] = 1.0;\n }\n cmd_ref.offset += 4u;\n break;\n }\n case 4u:\n {\n Alloc param_19 = cmd_alloc;\n CmdRef param_20 = cmd_ref;\n CmdAlpha alpha = Cmd_Alpha_read(param_19, param_20);\n for (uint k_7 = 0u; k_7 < 8u; k_7++)\n {\n area[k_7] = alpha.alpha;\n }\n cmd_ref.offset += 8u;\n break;\n }\n case 5u:\n {\n Alloc param_21 = cmd_alloc;\n CmdRef param_22 = cmd_ref;\n CmdColor color = Cmd_Color_read(param_21, param_22);\n uint param_23 = color.rgba_color;\n vec4 fg = unpacksRGB(param_23);\n for (uint k_8 = 0u; k_8 < 8u; k_8++)\n {\n vec4 fg_k = fg * area[k_8];\n rgba[k_8] = (rgba[k_8] * (1.0 - fg_k.w)) + fg_k;\n }\n cmd_ref.offset += 8u;\n break;\n }\n case 6u:\n {\n Alloc param_24 = cmd_alloc;\n CmdRef param_25 = cmd_ref;\n CmdImage fill_img = Cmd_Image_read(param_24, param_25);\n uvec2 param_26 = xy_uint;\n CmdImage param_27 = fill_img;\n vec4 img[8] = fillImage(param_26, param_27);\n for (uint k_9 = 0u; k_9 < 8u; k_9++)\n {\n vec4 fg_k_1 = img[k_9] * area[k_9];\n rgba[k_9] = (rgba[k_9] * (1.0 - fg_k_1.w)) + fg_k_1;\n }\n cmd_ref.offset += 12u;\n break;\n }\n case 7u:\n {\n blend_slot = blend_sp % 4u;\n if (blend_sp == (blend_spill + 4u))\n {\n uint param_28 = clip_tos.offset;\n MallocResult _1311 = alloc_clip_buf(param_28);\n MallocResult m = _1311;\n if (m.failed)\n {\n return;\n }\n clip_tos = m.alloc;\n uint base_ix = (clip_tos.offset >> uint(2)) + (2u * (gl_LocalInvocationID.x + (32u * gl_LocalInvocationID.y)));\n for (uint k_10 = 0u; k_10 < 8u; k_10++)\n {\n uint srgb = blend_stack[blend_slot][k_10].srgb;\n uint area_1 = floatBitsToUint(blend_stack[blend_slot][k_10].area);\n Alloc param_29 = clip_tos;\n uint param_30 = (base_ix + 0u) + (((k_10 * 2u) * 32u) * 4u);\n uint param_31 = srgb;\n write_mem(param_29, param_30, param_31);\n Alloc param_32 = clip_tos;\n uint param_33 = (base_ix + 1u) + (((k_10 * 2u) * 32u) * 4u);\n uint param_34 = area_1;\n write_mem(param_32, param_33, param_34);\n }\n blend_spill++;\n }\n for (uint k_11 = 0u; k_11 < 8u; k_11++)\n {\n vec4 param_35 = rgba[k_11];\n uint _1399 = packsRGB(param_35);\n blend_stack[blend_slot][k_11] = ClipState(_1399, clamp(abs(area[k_11]), 0.0, 1.0));\n rgba[k_11] = vec4(0.0);\n }\n blend_sp++;\n cmd_ref.offset += 4u;\n break;\n }\n case 8u:\n {\n blend_slot = (blend_sp - 1u) % 4u;\n if (blend_sp == blend_spill)\n {\n uint base_ix_1 = (clip_tos.offset >> uint(2)) + (2u * (gl_LocalInvocationID.x + (32u * gl_LocalInvocationID.y)));\n for (uint k_12 = 0u; k_12 < 8u; k_12++)\n {\n Alloc param_36 = clip_tos;\n uint param_37 = (base_ix_1 + 0u) + (((k_12 * 2u) * 32u) * 4u);\n uint srgb_1 = read_mem(param_36, param_37);\n Alloc param_38 = clip_tos;\n uint param_39 = (base_ix_1 + 1u) + (((k_12 * 2u) * 32u) * 4u);\n uint area_2 = read_mem(param_38, param_39);\n ClipState state = ClipState(srgb_1, uintBitsToFloat(area_2));\n blend_stack[blend_slot][k_12] = state;\n }\n Alloc param_40 = clip_tos;\n uint param_41 = (clip_tos.offset >> uint(2)) + 2048u;\n clip_tos.offset = read_mem(param_40, param_41);\n blend_spill--;\n }\n blend_sp--;\n for (uint k_13 = 0u; k_13 < 8u; k_13++)\n {\n uint param_42 = blend_stack[blend_slot][k_13].srgb;\n vec4 bg = unpacksRGB(param_42);\n vec4 fg_1 = (rgba[k_13] * area[k_13]) * blend_stack[blend_slot][k_13].area;\n rgba[k_13] = (bg * (1.0 - fg_1.w)) + fg_1;\n }\n cmd_ref.offset += 4u;\n break;\n }\n case 9u:\n {\n Alloc param_43 = cmd_alloc;\n CmdRef param_44 = cmd_ref;\n cmd_ref = CmdRef(Cmd_Jump_read(param_43, param_44).new_ref);\n cmd_alloc.offset = cmd_ref.offset;\n continue;\n }\n }\n }\n for (uint i_1 = 0u; i_1 < 8u; i_1++)\n {\n vec3 param_45 = rgba[i_1].xyz;\n imageStore(image, ivec2(int(xy_uint.x), int(xy_uint.y + (4u * i_1))), vec4(tosRGB(param_45), rgba[i_1].w));\n }\n}\n\n", } shader_material_frag = driver.ShaderSources{ Name: "material.frag", diff --git a/gpu/shaders/kernel4.comp b/gpu/shaders/kernel4.comp index b9f59dc3..69c10288 100644 --- a/gpu/shaders/kernel4.comp +++ b/gpu/shaders/kernel4.comp @@ -35,10 +35,16 @@ layout(rgba8, set = 0, binding = 3) uniform readonly image2D images[1]; #define BLEND_STACK_SIZE 4 // Layout of a clip scratch frame: -// Each frame is WIDTH * HEIGHT 32-bit words, then a link reference. +// Each frame is WIDTH * HEIGHT ClipStates, then a link reference. + +struct ClipState { + uint srgb; + float area; +}; // Link offset and frame size in 32-bit words. -#define CLIP_LINK_OFFSET (TILE_WIDTH_PX * TILE_HEIGHT_PX) +#define CLIP_STATE_SIZE 2 +#define CLIP_LINK_OFFSET (TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE) #define CLIP_BUF_SIZE (CLIP_LINK_OFFSET + 1) shared MallocResult sh_clip_alloc; @@ -110,20 +116,18 @@ void main() { uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y); vec2 xy = vec2(xy_uint); - vec3 rgb[CHUNK]; - float mask[CHUNK]; - uint blend_stack[BLEND_STACK_SIZE][CHUNK]; + vec4 rgba[CHUNK]; + ClipState blend_stack[BLEND_STACK_SIZE][CHUNK]; uint blend_spill = 0; uint blend_sp = 0; Alloc clip_tos = new_alloc(0, 0); for (uint i = 0; i < CHUNK; i++) { - rgb[i] = vec3(0.5); + rgba[i] = vec4(0.0); #ifdef ENABLE_IMAGE_INDICES if (xy_uint.x < 1024 && xy_uint.y < 1024) { - rgb[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i)/4).rgb; + rgba[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i)/4); } #endif - mask[i] = 1.0; } float area[CHUNK]; @@ -202,17 +206,19 @@ void main() { break; case Cmd_Color: CmdColor color = Cmd_Color_read(cmd_alloc, cmd_ref); - vec4 fg_rgba = unpacksRGB(color.rgba_color); + vec4 fg = unpacksRGB(color.rgba_color); for (uint k = 0; k < CHUNK; k++) { - rgb[k] = mix(rgb[k], fg_rgba.rgb, mask[k] * area[k] * fg_rgba.a); + vec4 fg_k = fg * area[k]; + rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k; } cmd_ref.offset += 4 + CmdColor_size; break; case Cmd_Image: CmdImage fill_img = Cmd_Image_read(cmd_alloc, cmd_ref); - vec4 rgba[CHUNK] = fillImage(xy_uint, fill_img); + vec4 img[CHUNK] = fillImage(xy_uint, fill_img); for (uint k = 0; k < CHUNK; k++) { - rgb[k] = mix(rgb[k], rgba[k].rgb, mask[k] * area[k] * rgba[k].a); + vec4 fg_k = img[k] * area[k]; + rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k; } cmd_ref.offset += 4 + CmdImage_size; break; @@ -225,14 +231,18 @@ void main() { return; } clip_tos = m.alloc; - uint base_ix = (clip_tos.offset >> 2) + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y; + uint base_ix = (clip_tos.offset >> 2) + CLIP_STATE_SIZE * (gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y); for (uint k = 0; k < CHUNK; k++) { - write_mem(clip_tos, base_ix + k * TILE_WIDTH_PX * CHUNK_DY, blend_stack[blend_slot][k]); + uint srgb = blend_stack[blend_slot][k].srgb; + uint area = floatBitsToUint(blend_stack[blend_slot][k].area); + write_mem(clip_tos, base_ix + 0 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY, srgb); + write_mem(clip_tos, base_ix + 1 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY, area); } blend_spill++; } for (uint k = 0; k < CHUNK; k++) { - blend_stack[blend_slot][k] = packsRGB(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0))); + blend_stack[blend_slot][k] = ClipState(packsRGB(rgba[k]), clamp(abs(area[k]), 0.0, 1.0)); + rgba[k] = vec4(0.0); } blend_sp++; cmd_ref.offset += 4; @@ -240,17 +250,21 @@ void main() { case Cmd_EndClip: blend_slot = (blend_sp - 1) % BLEND_STACK_SIZE; if (blend_sp == blend_spill) { - uint base_ix = (clip_tos.offset >> 2) + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y; + uint base_ix = (clip_tos.offset >> 2) + CLIP_STATE_SIZE * (gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y); for (uint k = 0; k < CHUNK; k++) { - blend_stack[blend_slot][k] = read_mem(clip_tos, base_ix + k * TILE_WIDTH_PX * CHUNK_DY); + uint srgb = read_mem(clip_tos, base_ix + 0 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY); + uint area = read_mem(clip_tos, base_ix + 1 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY); + ClipState state = ClipState(srgb, uintBitsToFloat(area)); + blend_stack[blend_slot][k] = state; } clip_tos.offset = read_mem(clip_tos, (clip_tos.offset >> 2) + CLIP_LINK_OFFSET); blend_spill--; } blend_sp--; for (uint k = 0; k < CHUNK; k++) { - vec4 rgba = unpacksRGB(blend_stack[blend_slot][k]); - rgb[k] = mix(rgba.rgb, rgb[k], area[k] * rgba.a); + vec4 bg = unpacksRGB(blend_stack[blend_slot][k].srgb); + vec4 fg = rgba[k] * area[k] * blend_stack[blend_slot][k].area; + rgba[k] = bg * (1.0 - fg.a) + fg; } cmd_ref.offset += 4; break; @@ -262,6 +276,6 @@ void main() { } for (uint i = 0; i < CHUNK; i++) { - imageStore(image, ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i), vec4(tosRGB(rgb[i]), 1.0)); + imageStore(image, ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i), vec4(tosRGB(rgba[i].rgb), rgba[i].a)); } }