gpu,gpu/shaders: [compute] add alpha to output

Fixes the glfw example where Gio content is composited (alpha blended)
on top of custom content.

Signed-off-by: Elias Naur <mail@eliasnaur.com>
This commit is contained in:
Elias Naur
2021-03-22 16:27:26 +01:00
parent 1dde94d8dd
commit 8750828c69
3 changed files with 41 additions and 22 deletions
+34 -20
View File
@@ -35,10 +35,16 @@ layout(rgba8, set = 0, binding = 3) uniform readonly image2D images[1];
#define BLEND_STACK_SIZE 4
// Layout of a clip scratch frame:
// Each frame is WIDTH * HEIGHT 32-bit words, then a link reference.
// Each frame is WIDTH * HEIGHT ClipStates, then a link reference.
struct ClipState {
uint srgb;
float area;
};
// Link offset and frame size in 32-bit words.
#define CLIP_LINK_OFFSET (TILE_WIDTH_PX * TILE_HEIGHT_PX)
#define CLIP_STATE_SIZE 2
#define CLIP_LINK_OFFSET (TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE)
#define CLIP_BUF_SIZE (CLIP_LINK_OFFSET + 1)
shared MallocResult sh_clip_alloc;
@@ -110,20 +116,18 @@ void main() {
uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
vec2 xy = vec2(xy_uint);
vec3 rgb[CHUNK];
float mask[CHUNK];
uint blend_stack[BLEND_STACK_SIZE][CHUNK];
vec4 rgba[CHUNK];
ClipState blend_stack[BLEND_STACK_SIZE][CHUNK];
uint blend_spill = 0;
uint blend_sp = 0;
Alloc clip_tos = new_alloc(0, 0);
for (uint i = 0; i < CHUNK; i++) {
rgb[i] = vec3(0.5);
rgba[i] = vec4(0.0);
#ifdef ENABLE_IMAGE_INDICES
if (xy_uint.x < 1024 && xy_uint.y < 1024) {
rgb[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i)/4).rgb;
rgba[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i)/4);
}
#endif
mask[i] = 1.0;
}
float area[CHUNK];
@@ -202,17 +206,19 @@ void main() {
break;
case Cmd_Color:
CmdColor color = Cmd_Color_read(cmd_alloc, cmd_ref);
vec4 fg_rgba = unpacksRGB(color.rgba_color);
vec4 fg = unpacksRGB(color.rgba_color);
for (uint k = 0; k < CHUNK; k++) {
rgb[k] = mix(rgb[k], fg_rgba.rgb, mask[k] * area[k] * fg_rgba.a);
vec4 fg_k = fg * area[k];
rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k;
}
cmd_ref.offset += 4 + CmdColor_size;
break;
case Cmd_Image:
CmdImage fill_img = Cmd_Image_read(cmd_alloc, cmd_ref);
vec4 rgba[CHUNK] = fillImage(xy_uint, fill_img);
vec4 img[CHUNK] = fillImage(xy_uint, fill_img);
for (uint k = 0; k < CHUNK; k++) {
rgb[k] = mix(rgb[k], rgba[k].rgb, mask[k] * area[k] * rgba[k].a);
vec4 fg_k = img[k] * area[k];
rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k;
}
cmd_ref.offset += 4 + CmdImage_size;
break;
@@ -225,14 +231,18 @@ void main() {
return;
}
clip_tos = m.alloc;
uint base_ix = (clip_tos.offset >> 2) + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
uint base_ix = (clip_tos.offset >> 2) + CLIP_STATE_SIZE * (gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y);
for (uint k = 0; k < CHUNK; k++) {
write_mem(clip_tos, base_ix + k * TILE_WIDTH_PX * CHUNK_DY, blend_stack[blend_slot][k]);
uint srgb = blend_stack[blend_slot][k].srgb;
uint area = floatBitsToUint(blend_stack[blend_slot][k].area);
write_mem(clip_tos, base_ix + 0 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY, srgb);
write_mem(clip_tos, base_ix + 1 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY, area);
}
blend_spill++;
}
for (uint k = 0; k < CHUNK; k++) {
blend_stack[blend_slot][k] = packsRGB(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0)));
blend_stack[blend_slot][k] = ClipState(packsRGB(rgba[k]), clamp(abs(area[k]), 0.0, 1.0));
rgba[k] = vec4(0.0);
}
blend_sp++;
cmd_ref.offset += 4;
@@ -240,17 +250,21 @@ void main() {
case Cmd_EndClip:
blend_slot = (blend_sp - 1) % BLEND_STACK_SIZE;
if (blend_sp == blend_spill) {
uint base_ix = (clip_tos.offset >> 2) + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
uint base_ix = (clip_tos.offset >> 2) + CLIP_STATE_SIZE * (gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y);
for (uint k = 0; k < CHUNK; k++) {
blend_stack[blend_slot][k] = read_mem(clip_tos, base_ix + k * TILE_WIDTH_PX * CHUNK_DY);
uint srgb = read_mem(clip_tos, base_ix + 0 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY);
uint area = read_mem(clip_tos, base_ix + 1 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY);
ClipState state = ClipState(srgb, uintBitsToFloat(area));
blend_stack[blend_slot][k] = state;
}
clip_tos.offset = read_mem(clip_tos, (clip_tos.offset >> 2) + CLIP_LINK_OFFSET);
blend_spill--;
}
blend_sp--;
for (uint k = 0; k < CHUNK; k++) {
vec4 rgba = unpacksRGB(blend_stack[blend_slot][k]);
rgb[k] = mix(rgba.rgb, rgb[k], area[k] * rgba.a);
vec4 bg = unpacksRGB(blend_stack[blend_slot][k].srgb);
vec4 fg = rgba[k] * area[k] * blend_stack[blend_slot][k].area;
rgba[k] = bg * (1.0 - fg.a) + fg;
}
cmd_ref.offset += 4;
break;
@@ -262,6 +276,6 @@ void main() {
}
for (uint i = 0; i < CHUNK; i++) {
imageStore(image, ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i), vec4(tosRGB(rgb[i]), 1.0));
imageStore(image, ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i), vec4(tosRGB(rgba[i].rgb), rgba[i].a));
}
}