forked from joejulian/gio
gpu,gpu/shaders: [compute] add alpha to output
Fixes the glfw example where Gio content is composited (alpha blended) on top of custom content. Signed-off-by: Elias Naur <mail@eliasnaur.com>
This commit is contained in:
+6
-1
@@ -319,6 +319,7 @@ func (g *compute) Frame() error {
|
||||
ct, k4t = ct.Round(q), k4t.Round(q)
|
||||
t.profile = fmt.Sprintf("ft:%7s et:%7s tat:%7s pct:%7s bbt:%7s ct:%7s k4t:%7s", ft, et, tat, pct, bbt, ct, k4t)
|
||||
}
|
||||
g.drawOps.clear = false
|
||||
return nil
|
||||
}
|
||||
|
||||
@@ -331,6 +332,11 @@ func (g *compute) Profile() string {
|
||||
// shader can only write to RGBA textures, but since we actually render in sRGB
|
||||
// format we can't use glBlitFramebuffer, because it does sRGB conversion.
|
||||
func (g *compute) blitOutput(viewport image.Point) {
|
||||
if !g.drawOps.clear {
|
||||
g.ctx.BlendFunc(driver.BlendFactorOne, driver.BlendFactorOneMinusSrcAlpha)
|
||||
g.ctx.SetBlend(true)
|
||||
defer g.ctx.SetBlend(false)
|
||||
}
|
||||
g.ctx.Viewport(0, 0, viewport.X, viewport.Y)
|
||||
g.ctx.BindTexture(0, g.output.image)
|
||||
g.ctx.BindProgram(g.output.blitProg)
|
||||
@@ -345,7 +351,6 @@ func (g *compute) encode(viewport image.Point) error {
|
||||
flipY := f32.Affine2D{}.Scale(f32.Pt(0, 0), f32.Pt(1, -1)).Offset(f32.Pt(0, float32(viewport.Y)))
|
||||
g.enc.transform(flipY)
|
||||
if g.drawOps.clear {
|
||||
g.drawOps.clear = false
|
||||
g.enc.rect(f32.Rectangle{Max: layout.FPt(viewport)})
|
||||
g.enc.fillColor(f32color.NRGBAToRGBA(g.drawOps.clearColor.SRGB()))
|
||||
}
|
||||
|
||||
+1
-1
File diff suppressed because one or more lines are too long
+34
-20
@@ -35,10 +35,16 @@ layout(rgba8, set = 0, binding = 3) uniform readonly image2D images[1];
|
||||
#define BLEND_STACK_SIZE 4
|
||||
|
||||
// Layout of a clip scratch frame:
|
||||
// Each frame is WIDTH * HEIGHT 32-bit words, then a link reference.
|
||||
// Each frame is WIDTH * HEIGHT ClipStates, then a link reference.
|
||||
|
||||
struct ClipState {
|
||||
uint srgb;
|
||||
float area;
|
||||
};
|
||||
|
||||
// Link offset and frame size in 32-bit words.
|
||||
#define CLIP_LINK_OFFSET (TILE_WIDTH_PX * TILE_HEIGHT_PX)
|
||||
#define CLIP_STATE_SIZE 2
|
||||
#define CLIP_LINK_OFFSET (TILE_WIDTH_PX * TILE_HEIGHT_PX * CLIP_STATE_SIZE)
|
||||
#define CLIP_BUF_SIZE (CLIP_LINK_OFFSET + 1)
|
||||
|
||||
shared MallocResult sh_clip_alloc;
|
||||
@@ -110,20 +116,18 @@ void main() {
|
||||
|
||||
uvec2 xy_uint = uvec2(gl_GlobalInvocationID.x, gl_LocalInvocationID.y + TILE_HEIGHT_PX * gl_WorkGroupID.y);
|
||||
vec2 xy = vec2(xy_uint);
|
||||
vec3 rgb[CHUNK];
|
||||
float mask[CHUNK];
|
||||
uint blend_stack[BLEND_STACK_SIZE][CHUNK];
|
||||
vec4 rgba[CHUNK];
|
||||
ClipState blend_stack[BLEND_STACK_SIZE][CHUNK];
|
||||
uint blend_spill = 0;
|
||||
uint blend_sp = 0;
|
||||
Alloc clip_tos = new_alloc(0, 0);
|
||||
for (uint i = 0; i < CHUNK; i++) {
|
||||
rgb[i] = vec3(0.5);
|
||||
rgba[i] = vec4(0.0);
|
||||
#ifdef ENABLE_IMAGE_INDICES
|
||||
if (xy_uint.x < 1024 && xy_uint.y < 1024) {
|
||||
rgb[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i)/4).rgb;
|
||||
rgba[i] = imageLoad(images[gl_WorkGroupID.x / 64], ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i)/4);
|
||||
}
|
||||
#endif
|
||||
mask[i] = 1.0;
|
||||
}
|
||||
|
||||
float area[CHUNK];
|
||||
@@ -202,17 +206,19 @@ void main() {
|
||||
break;
|
||||
case Cmd_Color:
|
||||
CmdColor color = Cmd_Color_read(cmd_alloc, cmd_ref);
|
||||
vec4 fg_rgba = unpacksRGB(color.rgba_color);
|
||||
vec4 fg = unpacksRGB(color.rgba_color);
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
rgb[k] = mix(rgb[k], fg_rgba.rgb, mask[k] * area[k] * fg_rgba.a);
|
||||
vec4 fg_k = fg * area[k];
|
||||
rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k;
|
||||
}
|
||||
cmd_ref.offset += 4 + CmdColor_size;
|
||||
break;
|
||||
case Cmd_Image:
|
||||
CmdImage fill_img = Cmd_Image_read(cmd_alloc, cmd_ref);
|
||||
vec4 rgba[CHUNK] = fillImage(xy_uint, fill_img);
|
||||
vec4 img[CHUNK] = fillImage(xy_uint, fill_img);
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
rgb[k] = mix(rgb[k], rgba[k].rgb, mask[k] * area[k] * rgba[k].a);
|
||||
vec4 fg_k = img[k] * area[k];
|
||||
rgba[k] = rgba[k] * (1.0 - fg_k.a) + fg_k;
|
||||
}
|
||||
cmd_ref.offset += 4 + CmdImage_size;
|
||||
break;
|
||||
@@ -225,14 +231,18 @@ void main() {
|
||||
return;
|
||||
}
|
||||
clip_tos = m.alloc;
|
||||
uint base_ix = (clip_tos.offset >> 2) + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
|
||||
uint base_ix = (clip_tos.offset >> 2) + CLIP_STATE_SIZE * (gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y);
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
write_mem(clip_tos, base_ix + k * TILE_WIDTH_PX * CHUNK_DY, blend_stack[blend_slot][k]);
|
||||
uint srgb = blend_stack[blend_slot][k].srgb;
|
||||
uint area = floatBitsToUint(blend_stack[blend_slot][k].area);
|
||||
write_mem(clip_tos, base_ix + 0 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY, srgb);
|
||||
write_mem(clip_tos, base_ix + 1 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY, area);
|
||||
}
|
||||
blend_spill++;
|
||||
}
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
blend_stack[blend_slot][k] = packsRGB(vec4(rgb[k], clamp(abs(area[k]), 0.0, 1.0)));
|
||||
blend_stack[blend_slot][k] = ClipState(packsRGB(rgba[k]), clamp(abs(area[k]), 0.0, 1.0));
|
||||
rgba[k] = vec4(0.0);
|
||||
}
|
||||
blend_sp++;
|
||||
cmd_ref.offset += 4;
|
||||
@@ -240,17 +250,21 @@ void main() {
|
||||
case Cmd_EndClip:
|
||||
blend_slot = (blend_sp - 1) % BLEND_STACK_SIZE;
|
||||
if (blend_sp == blend_spill) {
|
||||
uint base_ix = (clip_tos.offset >> 2) + gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y;
|
||||
uint base_ix = (clip_tos.offset >> 2) + CLIP_STATE_SIZE * (gl_LocalInvocationID.x + TILE_WIDTH_PX * gl_LocalInvocationID.y);
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
blend_stack[blend_slot][k] = read_mem(clip_tos, base_ix + k * TILE_WIDTH_PX * CHUNK_DY);
|
||||
uint srgb = read_mem(clip_tos, base_ix + 0 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY);
|
||||
uint area = read_mem(clip_tos, base_ix + 1 + k * CLIP_STATE_SIZE * TILE_WIDTH_PX * CHUNK_DY);
|
||||
ClipState state = ClipState(srgb, uintBitsToFloat(area));
|
||||
blend_stack[blend_slot][k] = state;
|
||||
}
|
||||
clip_tos.offset = read_mem(clip_tos, (clip_tos.offset >> 2) + CLIP_LINK_OFFSET);
|
||||
blend_spill--;
|
||||
}
|
||||
blend_sp--;
|
||||
for (uint k = 0; k < CHUNK; k++) {
|
||||
vec4 rgba = unpacksRGB(blend_stack[blend_slot][k]);
|
||||
rgb[k] = mix(rgba.rgb, rgb[k], area[k] * rgba.a);
|
||||
vec4 bg = unpacksRGB(blend_stack[blend_slot][k].srgb);
|
||||
vec4 fg = rgba[k] * area[k] * blend_stack[blend_slot][k].area;
|
||||
rgba[k] = bg * (1.0 - fg.a) + fg;
|
||||
}
|
||||
cmd_ref.offset += 4;
|
||||
break;
|
||||
@@ -262,6 +276,6 @@ void main() {
|
||||
}
|
||||
|
||||
for (uint i = 0; i < CHUNK; i++) {
|
||||
imageStore(image, ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i), vec4(tosRGB(rgb[i]), 1.0));
|
||||
imageStore(image, ivec2(xy_uint.x, xy_uint.y + CHUNK_DY * i), vec4(tosRGB(rgba[i].rgb), rgba[i].a));
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user