From b183774063fcebf1438ffff56a8c3d33a8c9e3bb Mon Sep 17 00:00:00 2001 From: Dominik Honnef Date: Thu, 15 Jun 2023 01:31:40 +0200 Subject: [PATCH] internal/stroke, gpu: reuse slice when splitting cubics When building GPU vertices from paths, we call stroke.SplitCubic once per OpCubic. Before this change, each call to stroke.SplitCubic would allocate a slice, which we would only use to iterate over. This allocation can be easily avoided by reusing the slice. We can conveniently store it in gpu.quadSplitter. In a real application that renders hundreds of paths with tens of rounded rectangles per path, this saved roughly 4500 allocations (or 1 MB worth) per frame. Signed-off-by: Dominik Honnef --- gpu/clip.go | 3 +++ gpu/gpu.go | 4 +++- internal/stroke/stroke.go | 8 +++++--- 3 files changed, 11 insertions(+), 4 deletions(-) diff --git a/gpu/clip.go b/gpu/clip.go index 292a6c04..3ba61d00 100644 --- a/gpu/clip.go +++ b/gpu/clip.go @@ -12,6 +12,9 @@ type quadSplitter struct { bounds f32.Rectangle contour uint32 d *drawOps + + // scratch space used by calls to stroke.SplitCubic + scratch []stroke.QuadSegment } func encodeQuadTo(data []byte, meta uint32, from, ctrl, to f32.Point) { diff --git a/gpu/gpu.go b/gpu/gpu.go index 1b92ccee..ab850fa4 100644 --- a/gpu/gpu.go +++ b/gpu/gpu.go @@ -1327,7 +1327,9 @@ func decodeToOutlineQuads(qs *quadSplitter, tr f32.Affine2D, pathData []byte) { q = q.Transform(tr) qs.splitAndEncode(q) case scene.OpCubic: - for _, q := range stroke.SplitCubic(scene.DecodeCubic(cmd)) { + from, ctrl0, ctrl1, to := scene.DecodeCubic(cmd) + qs.scratch = stroke.SplitCubic(from, ctrl0, ctrl1, to, qs.scratch[:0]) + for _, q := range qs.scratch { q = q.Transform(tr) qs.splitAndEncode(q) } diff --git a/internal/stroke/stroke.go b/internal/stroke/stroke.go index cc18be73..daf472d2 100644 --- a/internal/stroke/stroke.go +++ b/internal/stroke/stroke.go @@ -621,6 +621,7 @@ func StrokePathCommands(style StrokeStyle, scene []byte) StrokeQuads { // decodeToStrokeQuads decodes scene commands to quads ready to stroke. func decodeToStrokeQuads(pathData []byte) StrokeQuads { quads := make(StrokeQuads, 0, 2*len(pathData)/(scene.CommandSize+4)) + scratch := make([]QuadSegment, 0, 10) for len(pathData) >= scene.CommandSize+4 { contour := binary.LittleEndian.Uint32(pathData) cmd := ops.DecodeCommand(pathData[4:]) @@ -645,7 +646,9 @@ func decodeToStrokeQuads(pathData []byte) StrokeQuads { } quads = append(quads, quad) case scene.OpCubic: - for _, q := range SplitCubic(scene.DecodeCubic(cmd)) { + from, ctrl0, ctrl1, to := scene.DecodeCubic(cmd) + scratch = SplitCubic(from, ctrl0, ctrl1, to, scratch[:0]) + for _, q := range scratch { quad := StrokeQuad{ Contour: contour, Quad: q, @@ -660,8 +663,7 @@ func decodeToStrokeQuads(pathData []byte) StrokeQuads { return quads } -func SplitCubic(from, ctrl0, ctrl1, to f32.Point) []QuadSegment { - quads := make([]QuadSegment, 0, 10) +func SplitCubic(from, ctrl0, ctrl1, to f32.Point, quads []QuadSegment) []QuadSegment { // Set the maximum distance proportionally to the longest side // of the bounding rectangle. hull := f32.Rectangle{