From 4f40b58e0d14cb8df7372818471e8a1bdb4d7de3 Mon Sep 17 00:00:00 2001 From: Elias Naur Date: Mon, 19 Jul 2021 19:14:50 +0200 Subject: [PATCH] gpu/shaders: fix GPU hang This is a port of https://github.com/linebender/piet-gpu/pull/108. Updates gio#214 Updates gio#219 Signed-off-by: Elias Naur --- gpu/shaders.go | 115 +++++++++++++++++------------------ gpu/shaders/path_coarse.comp | 6 +- 2 files changed, 62 insertions(+), 59 deletions(-) diff --git a/gpu/shaders.go b/gpu/shaders.go index 89caf145..a01a5482 100644 --- a/gpu/shaders.go +++ b/gpu/shaders.go @@ -5672,9 +5672,11 @@ void main() vec2 err_v = (((cubic.p2 - cubic.p1) * 3.0) + cubic.p0) - cubic.p3; float err = (err_v.x * err_v.x) + (err_v.y * err_v.y); uint n_quads = max(uint(ceil(pow(err * 3.7037036418914794921875, 0.16666667163372039794921875))), 1u); + n_quads = min(n_quads, 16u); float val = 0.0; vec2 qp0 = cubic.p0; float _step = 1.0 / float(n_quads); + SubdivResult keep_params[16]; for (uint i = 0u; i < n_quads; i++) { float t = float(i + 1u) * _step; @@ -5696,6 +5698,7 @@ void main() vec2 param_18 = qp2; float param_19 = 0.4743416607379913330078125; SubdivResult params = estimate_subdiv(param_16, param_17, param_18, param_19); + keep_params[i] = params; val += params.val; qp0 = qp2; } @@ -5718,7 +5721,7 @@ void main() int n_out = 1; float val_sum = 0.0; vec2 p1; - float _1314; + float _1318; TileSeg tile_seg; for (uint i_1 = 0u; i_1 < n_quads; i_1++) { @@ -5736,30 +5739,26 @@ void main() float param_35 = t_1 - (0.5 * _step); vec2 qp1_1 = eval_cubic(param_31, param_32, param_33, param_34, param_35); qp1_1 = (qp1_1 * 2.0) - ((qp0 + qp2_1) * 0.5); - vec2 param_36 = qp0; - vec2 param_37 = qp1_1; - vec2 param_38 = qp2_1; - float param_39 = 0.4743416607379913330078125; - SubdivResult params_1 = estimate_subdiv(param_36, param_37, param_38, param_39); - float param_40 = params_1.a0; - float u0 = approx_parabola_inv_integral(param_40); - float param_41 = params_1.a2; - float u2 = approx_parabola_inv_integral(param_41); + SubdivResult params_1 = keep_params[i_1]; + float param_36 = params_1.a0; + float u0 = approx_parabola_inv_integral(param_36); + float param_37 = params_1.a2; + float u2 = approx_parabola_inv_integral(param_37); float uscale = 1.0 / (u2 - u0); float target = float(n_out) * v_step; for (;;) { - bool _1207 = uint(n_out) == n; - bool _1217; - if (!_1207) + bool _1211 = uint(n_out) == n; + bool _1221; + if (!_1211) { - _1217 = target < (val_sum + params_1.val); + _1221 = target < (val_sum + params_1.val); } else { - _1217 = _1207; + _1221 = _1211; } - if (_1217) + if (_1221) { if (uint(n_out) == n) { @@ -5769,14 +5768,14 @@ void main() { float u = (target - val_sum) / params_1.val; float a = mix(params_1.a0, params_1.a2, u); - float param_42 = a; - float au = approx_parabola_inv_integral(param_42); + float param_38 = a; + float au = approx_parabola_inv_integral(param_38); float t_2 = (au - u0) * uscale; - vec2 param_43 = qp0; - vec2 param_44 = qp1_1; - vec2 param_45 = qp2_1; - float param_46 = t_2; - p1 = eval_quad(param_43, param_44, param_45, param_46); + vec2 param_39 = qp0; + vec2 param_40 = qp1_1; + vec2 param_41 = qp2_1; + float param_42 = t_2; + p1 = eval_quad(param_39, param_40, param_41, param_42); } float xmin = min(p0.x, p1.x) - cubic.stroke.x; float xmax = max(p0.x, p1.x) + cubic.stroke.x; @@ -5786,13 +5785,13 @@ void main() float dy = p1.y - p0.y; if (abs(dy) < 9.999999717180685365747194737196e-10) { - _1314 = 1000000000.0; + _1318 = 1000000000.0; } else { - _1314 = dx / dy; + _1318 = dx / dy; } - float invslope = _1314; + float invslope = _1318; float c = (cubic.stroke.x + (abs(invslope) * (16.0 + cubic.stroke.y))) * 0.03125; float b = invslope; float a_1 = (p0.x - ((p0.y - 16.0) * b)) * 0.03125; @@ -5808,9 +5807,9 @@ void main() int stride = bbox.z - bbox.x; int base = ((y0 - bbox.y) * stride) - bbox.x; uint n_tile_alloc = uint((x1 - x0) * (y1 - y0)); - uint param_47 = n_tile_alloc * 24u; - MallocResult _1429 = malloc(param_47); - MallocResult tile_alloc = _1429; + uint param_43 = n_tile_alloc * 24u; + MallocResult _1433 = malloc(param_43); + MallocResult tile_alloc = _1433; if (tile_alloc.failed || (!mem_ok)) { return; @@ -5828,37 +5827,37 @@ void main() { float tile_y0 = float(y * 32); int xbackdrop = max((xray + 1), bbox.x); - bool _1486 = !is_stroke; - bool _1496; - if (_1486) + bool _1490 = !is_stroke; + bool _1500; + if (_1490) { - _1496 = min(p0.y, p1.y) < tile_y0; + _1500 = min(p0.y, p1.y) < tile_y0; } else { - _1496 = _1486; + _1500 = _1490; } - bool _1503; - if (_1496) + bool _1507; + if (_1500) { - _1503 = xbackdrop < bbox.z; + _1507 = xbackdrop < bbox.z; } else { - _1503 = _1496; + _1507 = _1500; } - if (_1503) + if (_1507) { int backdrop = (p1.y < p0.y) ? 1 : (-1); - TileRef param_48 = path.tiles; - uint param_49 = uint(base + xbackdrop); - TileRef tile_ref = Tile_index(param_48, param_49); + TileRef param_44 = path.tiles; + uint param_45 = uint(base + xbackdrop); + TileRef tile_ref = Tile_index(param_44, param_45); uint tile_el = tile_ref.offset >> uint(2); - Alloc param_50 = path_alloc; - uint param_51 = tile_el + 1u; - if (touch_mem(param_50, param_51)) + Alloc param_46 = path_alloc; + uint param_47 = tile_el + 1u; + if (touch_mem(param_46, param_47)) { - uint _1541 = atomicAdd(_145.memory[tile_el + 1u], uint(backdrop)); + uint _1545 = atomicAdd(_145.memory[tile_el + 1u], uint(backdrop)); } } int next_xray = last_xray; @@ -5877,17 +5876,17 @@ void main() for (int x = xx0; x < xx1; x++) { float tile_x0 = float(x * 32); - TileRef param_52 = TileRef(path.tiles.offset); - uint param_53 = uint(base + x); - TileRef tile_ref_1 = Tile_index(param_52, param_53); + TileRef param_48 = TileRef(path.tiles.offset); + uint param_49 = uint(base + x); + TileRef tile_ref_1 = Tile_index(param_48, param_49); uint tile_el_1 = tile_ref_1.offset >> uint(2); uint old = 0u; - Alloc param_54 = path_alloc; - uint param_55 = tile_el_1; - if (touch_mem(param_54, param_55)) + Alloc param_50 = path_alloc; + uint param_51 = tile_el_1; + if (touch_mem(param_50, param_51)) { - uint _1644 = atomicExchange(_145.memory[tile_el_1], tile_offset); - old = _1644; + uint _1648 = atomicExchange(_145.memory[tile_el_1], tile_offset); + old = _1648; } tile_seg.origin = p0; tile_seg.vector = p1 - p0; @@ -5919,10 +5918,10 @@ void main() } tile_seg.y_edge = y_edge; tile_seg.next.offset = old; - Alloc param_56 = tile_alloc.alloc; - TileSegRef param_57 = TileSegRef(tile_offset); - TileSeg param_58 = tile_seg; - TileSeg_write(param_56, param_57, param_58); + Alloc param_52 = tile_alloc.alloc; + TileSegRef param_53 = TileSegRef(tile_offset); + TileSeg param_54 = tile_seg; + TileSeg_write(param_52, param_53, param_54); tile_offset += 24u; } xc += b; diff --git a/gpu/shaders/path_coarse.comp b/gpu/shaders/path_coarse.comp index 147c8a50..ea525f5c 100644 --- a/gpu/shaders/path_coarse.comp +++ b/gpu/shaders/path_coarse.comp @@ -30,6 +30,7 @@ layout(set = 0, binding = 1) readonly buffer ConfigBuf { #define Q_ACCURACY (ACCURACY * 0.1) #define REM_ACCURACY (ACCURACY - Q_ACCURACY) #define MAX_HYPOT2 (432.0 * Q_ACCURACY * Q_ACCURACY) +#define MAX_QUADS 16 vec2 eval_quad(vec2 p0, vec2 p1, vec2 p2, float t) { float mt = 1.0 - t; @@ -113,6 +114,8 @@ void main() { float err = err_v.x * err_v.x + err_v.y * err_v.y; // The number of quadratics. uint n_quads = max(uint(ceil(pow(err * (1.0 / MAX_HYPOT2), 1.0 / 6.0))), 1); + n_quads = min(n_quads, MAX_QUADS); + SubdivResult keep_params[MAX_QUADS]; // Iterate over quadratics and tote up the estimated number of segments. float val = 0.0; vec2 qp0 = cubic.p0; @@ -123,6 +126,7 @@ void main() { vec2 qp1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step); qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2); SubdivResult params = estimate_subdiv(qp0, qp1, qp2, sqrt(REM_ACCURACY)); + keep_params[i] = params; val += params.val; qp0 = qp2; @@ -144,7 +148,7 @@ void main() { vec2 qp2 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t); vec2 qp1 = eval_cubic(cubic.p0, cubic.p1, cubic.p2, cubic.p3, t - 0.5 * step); qp1 = 2.0 * qp1 - 0.5 * (qp0 + qp2); - SubdivResult params = estimate_subdiv(qp0, qp1, qp2, sqrt(REM_ACCURACY)); + SubdivResult params = keep_params[i]; float u0 = approx_parabola_inv_integral(params.a0); float u2 = approx_parabola_inv_integral(params.a2); float uscale = 1.0 / (u2 - u0);