From 7f6e376424973602752b23f1c8013b8a0b5721bf Mon Sep 17 00:00:00 2001 From: Elias Naur Date: Thu, 26 Aug 2021 19:18:29 +0200 Subject: [PATCH] gpu,gpu/internal: support variable strides in ReadPixels It saves a roundtrip to scratch memory when the CPU fallback renderer downloads rendered materials. Signed-off-by: Elias Naur --- gpu/compute.go | 12 +----------- gpu/internal/d3d11/d3d11_windows.go | 4 ++-- gpu/internal/driver/driver.go | 4 ++-- gpu/internal/metal/metal_darwin.go | 3 +-- gpu/internal/opengl/opengl.go | 24 ++++++++++++++++++------ internal/gl/gl.go | 1 + 6 files changed, 25 insertions(+), 23 deletions(-) diff --git a/gpu/compute.go b/gpu/compute.go index 2d3d5c86..7e8bcacd 100644 --- a/gpu/compute.go +++ b/gpu/compute.go @@ -95,7 +95,6 @@ type compute struct { frag struct { buf driver.Buffer } - scratch []byte } timers struct { profile string @@ -1006,20 +1005,11 @@ func (g *compute) renderMaterials() error { copyFBO := atlas.fbo data := atlas.cpuImage.Data() for _, r := range m.regions { - dims := r.Size() - if n := dims.X * dims.Y * 4; n > len(m.scratch) { - m.scratch = make([]byte, n) - } - copyFBO.ReadPixels(r, m.scratch) stride := atlas.size.X * 4 col := r.Min.X * 4 row := stride * r.Min.Y off := col + row - w := dims.X * 4 - for y := 0; y < dims.Y; y++ { - copy(data[off:off+w], m.scratch[y*dims.X*4:]) - off += stride - } + copyFBO.ReadPixels(r, data[off:], stride) } } return nil diff --git a/gpu/internal/d3d11/d3d11_windows.go b/gpu/internal/d3d11/d3d11_windows.go index dc21174e..a0c1d690 100644 --- a/gpu/internal/d3d11/d3d11_windows.go +++ b/gpu/internal/d3d11/d3d11_windows.go @@ -593,7 +593,7 @@ func (b *Buffer) Release() { b.buf = nil } -func (f *Framebuffer) ReadPixels(src image.Rectangle, pixels []byte) error { +func (f *Framebuffer) ReadPixels(src image.Rectangle, pixels []byte, stride int) error { if f.resource == nil { return errors.New("framebuffer does not support ReadPixels") } @@ -636,7 +636,7 @@ func (f *Framebuffer) ReadPixels(src image.Rectangle, pixels []byte) error { return fmt.Errorf("ReadPixels: %v", err) } defer f.ctx.Unmap(res, 0) - srcPitch := w * 4 + srcPitch := stride dstPitch := int(resMap.RowPitch) mapSize := dstPitch * h data := sliceOf(resMap.PData, mapSize) diff --git a/gpu/internal/driver/driver.go b/gpu/internal/driver/driver.go index 51ecf0f8..302746be 100644 --- a/gpu/internal/driver/driver.go +++ b/gpu/internal/driver/driver.go @@ -136,7 +136,7 @@ type Buffer interface { type Framebuffer interface { RenderTarget Release() - ReadPixels(src image.Rectangle, pixels []byte) error + ReadPixels(src image.Rectangle, pixels []byte, stride int) error } type Timer interface { @@ -212,7 +212,7 @@ func (f Features) Has(feats Features) bool { func DownloadImage(d Device, f Framebuffer, r image.Rectangle) (*image.RGBA, error) { img := image.NewRGBA(r) - if err := f.ReadPixels(r, img.Pix); err != nil { + if err := f.ReadPixels(r, img.Pix, img.Stride); err != nil { return nil, err } if d.Caps().BottomLeftOrigin { diff --git a/gpu/internal/metal/metal_darwin.go b/gpu/internal/metal/metal_darwin.go index 9479c2ab..d170a2f6 100644 --- a/gpu/internal/metal/metal_darwin.go +++ b/gpu/internal/metal/metal_darwin.go @@ -1121,7 +1121,7 @@ func (b *Buffer) Release() { *b = Buffer{} } -func (f *Framebuffer) ReadPixels(src image.Rectangle, pixels []byte) error { +func (f *Framebuffer) ReadPixels(src image.Rectangle, pixels []byte, stride int) error { if len(pixels) == 0 { return nil } @@ -1135,7 +1135,6 @@ func (f *Framebuffer) ReadPixels(src image.Rectangle, pixels []byte) error { height: C.NSUInteger(sz.Y), depth: 1, } - stride := 4 * sz.X buf, off := f.backend.stagingBuffer(len(pixels)) enc := f.backend.startBlit() C.blitEncCopyTextureToBuffer(enc, f.texture, buf, C.NSUInteger(off), C.NSUInteger(stride), C.NSUInteger(len(pixels)), msize, orig) diff --git a/gpu/internal/opengl/opengl.go b/gpu/internal/opengl/opengl.go index cab30ee1..d39e36df 100644 --- a/gpu/internal/opengl/opengl.go +++ b/gpu/internal/opengl/opengl.go @@ -80,6 +80,7 @@ type glState struct { clearColor [4]float32 viewport [4]int unpack_row_length int + pack_row_length int } type state struct { @@ -297,6 +298,7 @@ func (b *Backend) queryState() glState { clearColor: b.funcs.GetFloat4(gl.COLOR_CLEAR_VALUE), viewport: b.funcs.GetInteger4(gl.VIEWPORT), unpack_row_length: b.funcs.GetInteger(gl.UNPACK_ROW_LENGTH), + pack_row_length: b.funcs.GetInteger(gl.PACK_ROW_LENGTH), } s.blend.enable = b.funcs.IsEnabled(gl.BLEND) s.blend.srcRGB = gl.Enum(b.funcs.GetInteger(gl.BLEND_SRC_RGB)) @@ -372,6 +374,7 @@ func (b *Backend) restoreState(dst glState) { v := dst.viewport src.setViewport(f, v[0], v[1], v[2], v[3]) src.pixelStorei(f, gl.UNPACK_ROW_LENGTH, dst.unpack_row_length) + src.pixelStorei(f, gl.PACK_ROW_LENGTH, dst.pack_row_length) } func (s *glState) setVertexAttribArray(f *gl.Functions, idx int, enabled bool) { @@ -574,13 +577,21 @@ func (s *glState) bindBuffer(f *gl.Functions, target gl.Enum, buf gl.Buffer) { } func (s *glState) pixelStorei(f *gl.Functions, pname gl.Enum, val int) { - if pname != gl.UNPACK_ROW_LENGTH { + switch pname { + case gl.UNPACK_ROW_LENGTH: + if val == s.unpack_row_length { + return + } + s.unpack_row_length = val + case gl.PACK_ROW_LENGTH: + if val == s.pack_row_length { + return + } + s.pack_row_length = val + default: panic("unsupported PixelStorei pname") } - if val != s.unpack_row_length { - f.PixelStorei(pname, val) - s.unpack_row_length = val - } + f.PixelStorei(pname, val) } func (s *glState) setClearColor(f *gl.Functions, r, g, b, a float32) { @@ -1181,12 +1192,13 @@ func (b *Backend) CopyTexture(dst driver.Texture, dstOrigin image.Point, src dri b.funcs.CopyTexSubImage2D(gl.TEXTURE_2D, 0, dstOrigin.X, dstOrigin.Y, srcRect.Min.X, srcRect.Min.Y, sz.X, sz.Y) } -func (f *framebuffer) ReadPixels(src image.Rectangle, pixels []byte) error { +func (f *framebuffer) ReadPixels(src image.Rectangle, pixels []byte, stride int) error { glErr(f.backend.funcs) f.backend.BindFramebuffer(f, driver.LoadDesc{}) if len(pixels) < src.Dx()*src.Dy()*4 { return errors.New("unexpected RGBA size") } + f.backend.glstate.pixelStorei(f.backend.funcs, gl.PACK_ROW_LENGTH, stride/4) f.backend.funcs.ReadPixels(src.Min.X, src.Min.Y, src.Dx(), src.Dy(), gl.RGBA, gl.UNSIGNED_BYTE, pixels) return glErr(f.backend.funcs) } diff --git a/internal/gl/gl.go b/internal/gl/gl.go index 40da7203..a9e378a1 100644 --- a/internal/gl/gl.go +++ b/internal/gl/gl.go @@ -65,6 +65,7 @@ const ( NUM_EXTENSIONS = 0x821D ONE = 0x1 ONE_MINUS_SRC_ALPHA = 0x303 + PACK_ROW_LENGTH = 0x0D02 PROGRAM_BINARY_LENGTH = 0x8741 QUERY_RESULT = 0x8866 QUERY_RESULT_AVAILABLE = 0x8867