gpu,gpu/shaders: [compute] add alpha to output

Fixes the glfw example where Gio content is composited (alpha blended)
on top of custom content.

Signed-off-by: Elias Naur <mail@eliasnaur.com>
This commit is contained in:
Elias Naur
2021-03-22 16:27:26 +01:00
parent 1dde94d8dd
commit 8750828c69
3 changed files with 41 additions and 22 deletions
+6 -1
View File
@@ -319,6 +319,7 @@ func (g *compute) Frame() error {
ct, k4t = ct.Round(q), k4t.Round(q)
t.profile = fmt.Sprintf("ft:%7s et:%7s tat:%7s pct:%7s bbt:%7s ct:%7s k4t:%7s", ft, et, tat, pct, bbt, ct, k4t)
}
g.drawOps.clear = false
return nil
}
@@ -331,6 +332,11 @@ func (g *compute) Profile() string {
// shader can only write to RGBA textures, but since we actually render in sRGB
// format we can't use glBlitFramebuffer, because it does sRGB conversion.
func (g *compute) blitOutput(viewport image.Point) {
if !g.drawOps.clear {
g.ctx.BlendFunc(driver.BlendFactorOne, driver.BlendFactorOneMinusSrcAlpha)
g.ctx.SetBlend(true)
defer g.ctx.SetBlend(false)
}
g.ctx.Viewport(0, 0, viewport.X, viewport.Y)
g.ctx.BindTexture(0, g.output.image)
g.ctx.BindProgram(g.output.blitProg)
@@ -345,7 +351,6 @@ func (g *compute) encode(viewport image.Point) error {
flipY := f32.Affine2D{}.Scale(f32.Pt(0, 0), f32.Pt(1, -1)).Offset(f32.Pt(0, float32(viewport.Y)))
g.enc.transform(flipY)
if g.drawOps.clear {
g.drawOps.clear = false
g.enc.rect(f32.Rectangle{Max: layout.FPt(viewport)})
g.enc.fillColor(f32color.NRGBAToRGBA(g.drawOps.clearColor.SRGB()))
}
+1 -1
View File
File diff suppressed because one or more lines are too long
+34 -20
View File
@@ -35,10 +35,16 @@ layout(rgba8, set = 0, binding = 3) uniform readonly image2D images[1];
#define BLEND_STACK_SIZE 4
// Layout of a clip scratch frame:
// Each frame is WIDTH * HEIGHT 32-bit words, then a link reference.
// Each frame is WIDTH * HEIGHT ClipStates, then a link reference.
struct ClipState {
uint srgb;
float area;
};
// Link offset and frame size in 32-bit words.
#define CLIP_LINK_OFFSET (TILE_WIDTH_PX * TILE_HEIGHT_PX)
#define CLIP_STATE_SIZE 2
#define CLIP_LINK_OFFSET (TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE)
#define CLIP_BUF_SIZE (CLIP_LINK_OFFSET + 1)
shared MallocResult sh_clip_alloc;
@@ -110,20 +116,18 @@ void main() {
uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
vec2 xy = vec2(xy_uint);
vec3 rgb[CHUNK];
float mask[CHUNK];
uint blend_stack[BLEND_STACK_SIZE][CHUNK];
vec4 rgba[CHUNK];
ClipState blend_stack[BLEND_STACK_SIZE][CHUNK];
uint blend_spill = 0;
uint blend_sp = 0;
Alloc clip_tos = new_alloc(0, 0);
for (uint i = 0; i < CHUNK; i++) {
rgb[i] = vec3(0.5);
rgba[i] = vec4(0.0);
#ifdef ENABLE_IMAGE_INDICES
if (xy_uint.x < 1024 && xy_uint.y < 1024) {
rgb[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i)/4).rgb;
rgba[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i)/4);
}
#endif
mask[i] = 1.0;
}
float area[CHUNK];
@@ -202,17 +206,19 @@ void main() {
break;
case Cmd_Color:
CmdColor color = Cmd_Color_read(cmd_alloc, cmd_ref);
vec4 fg_rgba = unpacksRGB(color.rgba_color);
vec4 fg = unpacksRGB(color.rgba_color);
for (uint k = 0; k < CHUNK; k++) {
rgb[k] = mix(rgb[k], fg_rgba.rgb, mask[k] * area[k] * fg_rgba.a);
vec4 fg_k = fg * area[k];
rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k;
}
cmd_ref.offset += 4 + CmdColor_size;
break;
case Cmd_Image:
CmdImage fill_img = Cmd_Image_read(cmd_alloc, cmd_ref);
vec4 rgba[CHUNK] = fillImage(xy_uint, fill_img);
vec4 img[CHUNK] = fillImage(xy_uint, fill_img);
for (uint k = 0; k < CHUNK; k++) {
rgb[k] = mix(rgb[k], rgba[k].rgb, mask[k] * area[k] * rgba[k].a);
vec4 fg_k = img[k] * area[k];
rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k;
}
cmd_ref.offset += 4 + CmdImage_size;
break;
@@ -225,14 +231,18 @@ void main() {
return;
}
clip_tos = m.alloc;
uint base_ix = (clip_tos.offset >> 2) + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
uint base_ix = (clip_tos.offset >> 2) + CLIP_STATE_SIZE * (gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y);
for (uint k = 0; k < CHUNK; k++) {
write_mem(clip_tos, base_ix + k * TILE_WIDTH_PX * CHUNK_DY, blend_stack[blend_slot][k]);
uint srgb = blend_stack[blend_slot][k].srgb;
uint area = floatBitsToUint(blend_stack[blend_slot][k].area);
write_mem(clip_tos, base_ix + 0 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY, srgb);
write_mem(clip_tos, base_ix + 1 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY, area);
}
blend_spill++;
}
for (uint k = 0; k < CHUNK; k++) {
blend_stack[blend_slot][k] = packsRGB(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0)));
blend_stack[blend_slot][k] = ClipState(packsRGB(rgba[k]), clamp(abs(area[k]), 0.0, 1.0));
rgba[k] = vec4(0.0);
}
blend_sp++;
cmd_ref.offset += 4;
@@ -240,17 +250,21 @@ void main() {
case Cmd_EndClip:
blend_slot = (blend_sp - 1) % BLEND_STACK_SIZE;
if (blend_sp == blend_spill) {
uint base_ix = (clip_tos.offset >> 2) + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
uint base_ix = (clip_tos.offset >> 2) + CLIP_STATE_SIZE * (gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y);
for (uint k = 0; k < CHUNK; k++) {
blend_stack[blend_slot][k] = read_mem(clip_tos, base_ix + k * TILE_WIDTH_PX * CHUNK_DY);
uint srgb = read_mem(clip_tos, base_ix + 0 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY);
uint area = read_mem(clip_tos, base_ix + 1 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY);
ClipState state = ClipState(srgb, uintBitsToFloat(area));
blend_stack[blend_slot][k] = state;
}
clip_tos.offset = read_mem(clip_tos, (clip_tos.offset >> 2) + CLIP_LINK_OFFSET);
blend_spill--;
}
blend_sp--;
for (uint k = 0; k < CHUNK; k++) {
vec4 rgba = unpacksRGB(blend_stack[blend_slot][k]);
rgb[k] = mix(rgba.rgb, rgb[k], area[k] * rgba.a);
vec4 bg = unpacksRGB(blend_stack[blend_slot][k].srgb);
vec4 fg = rgba[k] * area[k] * blend_stack[blend_slot][k].area;
rgba[k] = bg * (1.0 - fg.a) + fg;
}
cmd_ref.offset += 4;
break;
@@ -262,6 +276,6 @@ void main() {
}
for (uint i = 0; i < CHUNK; i++) {
imageStore(image, ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i), vec4(tosRGB(rgb[i]), 1.0));
imageStore(image, ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i), vec4(tosRGB(rgba[i].rgb), rgba[i].a));
}
}