gpu: optimize encodeQuadTo

name             old time/op  new time/op  delta
EncodeQuadTo-32  35.4ns ± 1%  11.9ns ± 3%  -66.34%  (p=0.008 n=5+5)

Signed-off-by: Egon Elbre <egonelbre@gmail.com>
This commit is contained in:
Egon Elbre
2023-01-05 14:22:13 +02:00
committed by Elias Naur
parent c81a1f9671
commit 8bc6737dea
3 changed files with 68 additions and 16 deletions
+37 -8
View File
@@ -1,6 +1,9 @@
package gpu
import (
"encoding/binary"
"math"
"gioui.org/internal/f32"
"gioui.org/internal/stroke"
)
@@ -12,16 +15,42 @@ type quadSplitter struct {
}
func encodeQuadTo(data []byte, meta uint32, from, ctrl, to f32.Point) {
// NW.
encodeVertex(data, meta, -1, 1, from, ctrl, to)
// NE.
encodeVertex(data[vertStride:], meta, 1, 1, from, ctrl, to)
// SW.
encodeVertex(data[vertStride*2:], meta, -1, -1, from, ctrl, to)
// SE.
encodeVertex(data[vertStride*3:], meta, 1, -1, from, ctrl, to)
// inlined code:
// encodeVertex(data, meta, -1, 1, from, ctrl, to)
// encodeVertex(data[vertStride:], meta, 1, 1, from, ctrl, to)
// encodeVertex(data[vertStride*2:], meta, -1, -1, from, ctrl, to)
// encodeVertex(data[vertStride*3:], meta, 1, -1, from, ctrl, to)
// this code needs to stay in sync with `vertex.encode`.
bo := binary.LittleEndian
data = data[:vertStride*4]
// encode the main template
bo.PutUint32(data[4:8], meta)
bo.PutUint32(data[8:12], math.Float32bits(from.X))
bo.PutUint32(data[12:16], math.Float32bits(from.Y))
bo.PutUint32(data[16:20], math.Float32bits(ctrl.X))
bo.PutUint32(data[20:24], math.Float32bits(ctrl.Y))
bo.PutUint32(data[24:28], math.Float32bits(to.X))
bo.PutUint32(data[28:32], math.Float32bits(to.Y))
copy(data[vertStride*1:vertStride*2], data[vertStride*0:vertStride*1])
copy(data[vertStride*2:vertStride*3], data[vertStride*0:vertStride*1])
copy(data[vertStride*3:vertStride*4], data[vertStride*0:vertStride*1])
bo.PutUint32(data[vertStride*0:vertStride*0+4], math.Float32bits(nwCorner))
bo.PutUint32(data[vertStride*1:vertStride*1+4], math.Float32bits(neCorner))
bo.PutUint32(data[vertStride*2:vertStride*2+4], math.Float32bits(swCorner))
bo.PutUint32(data[vertStride*3:vertStride*3+4], math.Float32bits(seCorner))
}
const (
nwCorner = 1*0.25 + 0*0.5
neCorner = 1*0.25 + 1*0.5
swCorner = 0*0.25 + 0*0.5
seCorner = 0*0.25 + 1*0.5
)
func encodeVertex(data []byte, meta uint32, cornerx, cornery int16, from, ctrl, to f32.Point) {
var corner float32
if cornerx == 1 {
+21
View File
@@ -0,0 +1,21 @@
// SPDX-License-Identifier: Unlicense OR MIT
package gpu
import (
"testing"
"gioui.org/internal/f32"
)
func BenchmarkEncodeQuadTo(b *testing.B) {
var data [vertStride * 4]byte
for i := 0; i < b.N; i++ {
v := float32(i)
encodeQuadTo(data[:], 123,
f32.Point{X: v, Y: v},
f32.Point{X: v, Y: v},
f32.Point{X: v, Y: v},
)
}
}
+10 -8
View File
@@ -113,16 +113,18 @@ type vertex struct {
ToX, ToY float32
}
// encode needs to stay in-sync with the code in clip.go encodeQuadTo.
func (v vertex) encode(d []byte, maxy uint32) {
d = d[0:32]
bo := binary.LittleEndian
bo.PutUint32(d[0:], math.Float32bits(v.Corner))
bo.PutUint32(d[4:], maxy)
bo.PutUint32(d[8:], math.Float32bits(v.FromX))
bo.PutUint32(d[12:], math.Float32bits(v.FromY))
bo.PutUint32(d[16:], math.Float32bits(v.CtrlX))
bo.PutUint32(d[20:], math.Float32bits(v.CtrlY))
bo.PutUint32(d[24:], math.Float32bits(v.ToX))
bo.PutUint32(d[28:], math.Float32bits(v.ToY))
bo.PutUint32(d[0:4], math.Float32bits(v.Corner))
bo.PutUint32(d[4:8], maxy)
bo.PutUint32(d[8:12], math.Float32bits(v.FromX))
bo.PutUint32(d[12:16], math.Float32bits(v.FromY))
bo.PutUint32(d[16:20], math.Float32bits(v.CtrlX))
bo.PutUint32(d[20:24], math.Float32bits(v.CtrlY))
bo.PutUint32(d[24:28], math.Float32bits(v.ToX))
bo.PutUint32(d[28:32], math.Float32bits(v.ToY))
}
const (