widget: [API] implement UAX#29 grapheme clustering in text widgets

This commit teaches the text widgets how to position their cursor according to
grapheme cluster boundaries rather than rune boundaries. While this is more work,
the results better match the expectations of users. A "grapheme cluster" is a
user-perceived character that may be composed of arbitrarily many runes.

I chose to implement this within widgets for two reasons:

- grapheme cluster boundaries would be extremely difficult to encode within the
glyph stream returned by the text shaper
- not all text needs to be segmented, only text that can be interacted with

All mutation operations exposed by widget.Editor now work in terms of grapheme
clusters instead of runes.

Signed-off-by: Chris Waldon <christopher.waldon.dev@gmail.com>
This commit is contained in:
Chris Waldon
2023-03-09 10:13:56 -05:00
committed by Elias Naur
parent 36e768e716
commit 5c54268d40
4 changed files with 411 additions and 23 deletions
+10 -5
View File
@@ -742,18 +742,21 @@ func (e *Editor) CaretCoords() f32.Point {
// direction to delete: positive is forward, negative is backward.
//
// If there is a selection, it is deleted and counts as a single rune.
func (e *Editor) Delete(runes int) {
func (e *Editor) Delete(graphemeClusters int) {
e.initBuffer()
if runes == 0 {
if graphemeClusters == 0 {
return
}
start, end := e.text.Selection()
if start != end {
runes -= sign(runes)
graphemeClusters -= sign(graphemeClusters)
}
end += runes
// Move caret by the target quantity of clusters.
e.text.MoveCaret(0, graphemeClusters)
// Get the new rune offsets of the selection.
start, end = e.text.Selection()
e.replace(start, end, "", true)
// Reset xoff.
e.text.MoveCaret(0, 0)
@@ -889,7 +892,9 @@ func (e *Editor) replace(start, end int, s string, addHistory bool) int {
// MoveCaret moves the caret (aka selection start) and the selection end
// relative to their current positions. Positive distances moves forward,
// negative distances moves backward. Distances are in runes.
// negative distances moves backward. Distances are in grapheme clusters,
// which closely match what users perceive as "characters" even when the
// characters are multiple code points long.
func (e *Editor) MoveCaret(startDelta, endDelta int) {
e.initBuffer()
e.text.MoveCaret(startDelta, endDelta)
+74
View File
@@ -3,11 +3,14 @@
package widget
import (
"bufio"
"image"
"io"
"math"
"sort"
"gioui.org/text"
"github.com/go-text/typesetting/segmenter"
"golang.org/x/image/math/fixed"
)
@@ -415,3 +418,74 @@ func (g *glyphIndex) locate(viewport image.Rectangle, startRune, endRune int, re
}
return rects
}
// graphemeReader segments paragraphs of text into grapheme clusters.
type graphemeReader struct {
segmenter.Segmenter
graphemes []int
paragraph []rune
source io.ReaderAt
cursor int64
reader *bufio.Reader
runeOffset int
}
// SetSource configures the reader to pull from source.
func (p *graphemeReader) SetSource(source io.ReaderAt) {
p.source = source
p.cursor = 0
p.reader = bufio.NewReader(p)
p.runeOffset = 0
}
// Read exists to satisfy io.Reader. It should not be directly invoked.
func (p *graphemeReader) Read(b []byte) (int, error) {
n, err := p.source.ReadAt(b, p.cursor)
p.cursor += int64(n)
return n, err
}
// next decodes one paragraph of rune data.
func (p *graphemeReader) next() ([]rune, bool) {
p.paragraph = p.paragraph[:0]
var err error
var r rune
for err == nil {
r, _, err = p.reader.ReadRune()
if err != nil {
break
}
p.paragraph = append(p.paragraph, r)
if r == '\n' {
break
}
}
return p.paragraph, err == nil
}
// Graphemes will return the next paragraph's grapheme cluster boundaries,
// if any. If it returns an empty slice, there is no more data (all paragraphs
// have been segmented).
func (p *graphemeReader) Graphemes() []int {
var more bool
p.graphemes = p.graphemes[:0]
p.paragraph, more = p.next()
if len(p.paragraph) == 0 && !more {
return nil
}
p.Segmenter.Init(p.paragraph)
iter := p.Segmenter.GraphemeIterator()
if iter.Next() {
graph := iter.Grapheme()
p.graphemes = append(p.graphemes,
p.runeOffset+graph.Offset,
p.runeOffset+graph.Offset+len(graph.Text),
)
}
for iter.Next() {
graph := iter.Grapheme()
p.graphemes = append(p.graphemes, p.runeOffset+graph.Offset+len(graph.Text))
}
p.runeOffset += len(p.paragraph)
return p.graphemes
}
+231
View File
@@ -1,6 +1,8 @@
package widget
import (
"bytes"
"io"
"testing"
nsareg "eliasnaur.com/font/noto/sans/arabic/regular"
@@ -550,3 +552,232 @@ func printGlyphs(t *testing.T, glyphs []text.Glyph) {
t.Logf("glyphs[%2d] = {ID: 0x%013x, Flags: %4s, Advance: %4d(%6v), Runes: %d, Y: %3d, X: %4d(%6v)} ", i, g.ID, g.Flags, g.Advance, g.Advance, g.Runes, g.Y, g.X, g.X)
}
}
func TestGraphemeReaderNext(t *testing.T) {
latinDoc := bytes.NewReader([]byte(latinDocument))
arabicDoc := bytes.NewReader([]byte(arabicDocument))
emojiDoc := bytes.NewReader([]byte(emojiDocument))
complexDoc := bytes.NewReader([]byte(complexDocument))
type testcase struct {
name string
input *bytes.Reader
read func() ([]rune, bool)
}
var pr graphemeReader
for _, tc := range []testcase{
{
name: "latin",
input: latinDoc,
read: pr.next,
},
{
name: "arabic",
input: arabicDoc,
read: pr.next,
},
{
name: "emoji",
input: emojiDoc,
read: pr.next,
},
{
name: "complex",
input: complexDoc,
read: pr.next,
},
} {
t.Run(tc.name, func(t *testing.T) {
pr.SetSource(tc.input)
runes := []rune{}
var paragraph []rune
ok := true
for ok {
paragraph, ok = tc.read()
if ok && len(paragraph) > 0 && paragraph[len(paragraph)-1] != '\n' {
}
for i, r := range paragraph {
if i == len(paragraph)-1 {
if r != '\n' && ok {
t.Error("non-final paragraph does not end with newline")
}
} else if r == '\n' {
t.Errorf("paragraph[%d] contains newline", i)
}
}
runes = append(runes, paragraph...)
}
tc.input.Seek(0, 0)
b, _ := io.ReadAll(tc.input)
asRunes := []rune(string(b))
if len(asRunes) != len(runes) {
t.Errorf("expected %d runes, got %d", len(asRunes), len(runes))
}
for i := 0; i < max(len(asRunes), len(runes)); i++ {
if i < min(len(asRunes), len(runes)) {
if runes[i] != asRunes[i] {
t.Errorf("expected runes[%d]=%d, got %d", i, asRunes[i], runes[i])
}
} else if i < len(asRunes) {
t.Errorf("expected runes[%d]=%d, got nothing", i, asRunes[i])
} else if i < len(runes) {
t.Errorf("expected runes[%d]=nothing, got %d", i, runes[i])
}
}
})
}
}
func TestGraphemeReaderGraphemes(t *testing.T) {
latinDoc := bytes.NewReader([]byte(latinDocument))
arabicDoc := bytes.NewReader([]byte(arabicDocument))
emojiDoc := bytes.NewReader([]byte(emojiDocument))
complexDoc := bytes.NewReader([]byte(complexDocument))
type testcase struct {
name string
input *bytes.Reader
read func() []int
}
var pr graphemeReader
for _, tc := range []testcase{
{
name: "latin",
input: latinDoc,
read: pr.Graphemes,
},
{
name: "arabic",
input: arabicDoc,
read: pr.Graphemes,
},
{
name: "emoji",
input: emojiDoc,
read: pr.Graphemes,
},
{
name: "complex",
input: complexDoc,
read: pr.Graphemes,
},
} {
t.Run(tc.name, func(t *testing.T) {
pr.SetSource(tc.input)
graphemes := []int{}
for g := tc.read(); len(g) > 0; g = tc.read() {
if len(graphemes) > 0 && g[0] != graphemes[len(graphemes)-1] {
t.Errorf("expected first boundary in new paragraph %d to match final boundary in previous %d", g[0], graphemes[len(graphemes)-1])
}
if len(graphemes) > 0 {
// Drop duplicated boundary.
g = g[1:]
}
graphemes = append(graphemes, g...)
}
tc.input.Seek(0, 0)
b, _ := io.ReadAll(tc.input)
asRunes := []rune(string(b))
if len(asRunes)+1 < len(graphemes) {
t.Errorf("expected <= %d graphemes, got %d", len(asRunes)+1, len(graphemes))
}
for i := 0; i < len(graphemes)-1; i++ {
if graphemes[i] >= graphemes[i+1] {
t.Errorf("graphemes[%d](%d) >= graphemes[%d](%d)", i, graphemes[i], i+1, graphemes[i+1])
}
}
})
}
}
func BenchmarkGraphemeReaderNext(b *testing.B) {
latinDoc := bytes.NewReader([]byte(latinDocument))
arabicDoc := bytes.NewReader([]byte(arabicDocument))
emojiDoc := bytes.NewReader([]byte(emojiDocument))
complexDoc := bytes.NewReader([]byte(complexDocument))
type testcase struct {
name string
input *bytes.Reader
read func() ([]rune, bool)
}
pr := &graphemeReader{}
for _, tc := range []testcase{
{
name: "latin",
input: latinDoc,
read: pr.next,
},
{
name: "arabic",
input: arabicDoc,
read: pr.next,
},
{
name: "emoji",
input: emojiDoc,
read: pr.next,
},
{
name: "complex",
input: complexDoc,
read: pr.next,
},
} {
var paragraph []rune = make([]rune, 4096)
b.Run(tc.name, func(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
pr.SetSource(tc.input)
ok := true
for ok {
paragraph, ok = tc.read()
_ = paragraph
}
_ = paragraph
}
})
}
}
func BenchmarkGraphemeReaderGraphemes(b *testing.B) {
latinDoc := bytes.NewReader([]byte(latinDocument))
arabicDoc := bytes.NewReader([]byte(arabicDocument))
emojiDoc := bytes.NewReader([]byte(emojiDocument))
complexDoc := bytes.NewReader([]byte(complexDocument))
type testcase struct {
name string
input *bytes.Reader
read func() []int
}
pr := &graphemeReader{}
for _, tc := range []testcase{
{
name: "latin",
input: latinDoc,
read: pr.Graphemes,
},
{
name: "arabic",
input: arabicDoc,
read: pr.Graphemes,
},
{
name: "emoji",
input: emojiDoc,
read: pr.Graphemes,
},
{
name: "complex",
input: complexDoc,
read: pr.Graphemes,
},
} {
b.Run(tc.name, func(b *testing.B) {
b.ResetTimer()
for i := 0; i < b.N; i++ {
pr.SetSource(tc.input)
for g := tc.read(); len(g) > 0; g = tc.read() {
_ = g
}
}
})
}
}
+96 -18
View File
@@ -17,6 +17,7 @@ import (
"gioui.org/op/paint"
"gioui.org/text"
"gioui.org/unit"
"golang.org/x/exp/slices"
"golang.org/x/image/math/fixed"
)
@@ -54,12 +55,16 @@ type textView struct {
// are accessed by Len, Text, and SetText.
Mask rune
font text.Font
shaper *text.Shaper
textSize fixed.Int26_6
seekCursor int64
rr textSource
maskReader maskReader
font text.Font
shaper *text.Shaper
textSize fixed.Int26_6
seekCursor int64
rr textSource
maskReader maskReader
// graphemes tracks the indices of grapheme cluster boundaries within rr.
graphemes []int
// paragraphReader is used to populate graphemes.
paragraphReader graphemeReader
lastMask rune
maxWidth, minWidth int
viewSize image.Point
@@ -163,12 +168,43 @@ func (e *textView) closestToXY(x fixed.Int26_6, y int) combinedPos {
return e.index.closestToXY(x, y)
}
func (e *textView) closestToXYGraphemes(x fixed.Int26_6, y int) combinedPos {
// Find the closest existing rune position to the provided coordinates.
pos := e.closestToXY(x, y)
// Resolve cluster boundaries on either side of the rune position.
firstOption := e.moveByGraphemes(pos.runes, 0)
distance := 1
if firstOption > pos.runes {
distance = -1
}
secondOption := e.moveByGraphemes(firstOption, distance)
// Choose the closest grapheme cluster boundary to the desired point.
first := e.closestToRune(firstOption)
firstDist := absFixed(first.x - x)
second := e.closestToRune(secondOption)
secondDist := absFixed(second.x - x)
if firstDist > secondDist {
return second
} else {
return first
}
}
func absFixed(i fixed.Int26_6) fixed.Int26_6 {
if i < 0 {
return -i
}
return i
}
// MaxLines moves the cursor the specified number of lines vertically, ensuring
// that the resulting position is aligned to a grapheme cluster.
func (e *textView) MoveLines(distance int, selAct selectionAction) {
caretStart := e.closestToRune(e.caret.start)
x := caretStart.x + e.caret.xoff
// Seek to line.
pos := e.closestToLineCol(caretStart.lineCol.line+distance, 0)
pos = e.closestToXY(x, pos.y)
pos = e.closestToXYGraphemes(x, pos.y)
e.caret.start = pos.runes
e.caret.xoff = x - pos.x
e.updateSelection(selAct)
@@ -399,10 +435,12 @@ func (e *textView) scrollAbs(x, y int) {
}
}
// MoveCoord moves the caret to the position closest to the provided
// point that is aligned to a grapheme cluster boundary.
func (e *textView) MoveCoord(pos image.Point) {
x := fixed.I(pos.X + e.scrollOff.X)
y := pos.Y + e.scrollOff.Y
e.caret.start = e.closestToXY(x, y).runes
e.caret.start = e.closestToXYGraphemes(x, y).runes
e.caret.xoff = 0
}
@@ -431,9 +469,16 @@ func (e *textView) layoutText(lt *text.Shaper) {
for _, _, err := b.ReadRune(); err != io.EOF; _, _, err = b.ReadRune() {
g, _ := it.processGlyph(text.Glyph{Runes: 1, Flags: text.FlagClusterBreak}, true)
e.index.Glyph(g)
}
}
e.paragraphReader.SetSource(e.rr)
e.graphemes = e.graphemes[:0]
for g := e.paragraphReader.Graphemes(); len(g) > 0; g = e.paragraphReader.Graphemes() {
if len(e.graphemes) > 0 && g[0] == e.graphemes[len(e.graphemes)-1] {
g = g[1:]
}
e.graphemes = append(e.graphemes, g...)
}
dims := layout.Dimensions{Size: it.bounds.Size()}
dims.Baseline = dims.Size.Y - it.baseline
e.dims = dims
@@ -521,44 +566,74 @@ func (e *textView) Replace(start, end int, s string) int {
return sc
}
// MovePages moves the caret position by vertical pages of text, ensuring that
// the final position is aligned to a grapheme cluster boundary.
func (e *textView) MovePages(pages int, selAct selectionAction) {
caret := e.closestToRune(e.caret.start)
x := caret.x + e.caret.xoff
y := caret.y + pages*e.viewSize.Y
pos := e.closestToXY(x, y)
pos := e.closestToXYGraphemes(x, y)
e.caret.start = pos.runes
e.caret.xoff = x - pos.x
e.updateSelection(selAct)
}
// MoveCaret moves the caret (aka selection start) and the selection end
// relative to their current positions. Positive distances moves forward,
// negative distances moves backward. Distances are in runes.
func (e *textView) MoveCaret(startDelta, endDelta int) {
e.caret.xoff = 0
e.caret.start = e.closestToRune(e.caret.start + startDelta).runes
e.caret.end = e.closestToRune(e.caret.end + endDelta).runes
// moveByGraphemes returns the rune index resulting from moving the
// specified number of grapheme clusters from startRuneidx.
func (e *textView) moveByGraphemes(startRuneidx, graphemes int) int {
if len(e.graphemes) == 0 {
return startRuneidx
}
startGraphemeIdx, _ := slices.BinarySearch(e.graphemes, startRuneidx)
startGraphemeIdx = max(startGraphemeIdx+graphemes, 0)
startGraphemeIdx = min(startGraphemeIdx, len(e.graphemes)-1)
startRuneIdx := e.graphemes[startGraphemeIdx]
return e.closestToRune(startRuneIdx).runes
}
// clampCursorToGraphemes ensures that the final start/end positions of
// the cursor are on grapheme cluster boundaries.
func (e *textView) clampCursorToGraphemes() {
e.caret.start = e.moveByGraphemes(e.caret.start, 0)
e.caret.end = e.moveByGraphemes(e.caret.end, 0)
}
// MoveCaret moves the caret (aka selection start) and the selection end
// relative to their current positions. Positive distances moves forward,
// negative distances moves backward. Distances are in grapheme clusters which
// better match the expectations of users than runes.
func (e *textView) MoveCaret(startDelta, endDelta int) {
e.caret.xoff = 0
e.caret.start = e.moveByGraphemes(e.caret.start, startDelta)
e.caret.end = e.moveByGraphemes(e.caret.end, endDelta)
}
// MoveStart moves the caret to the start of the current line, ensuring that the resulting
// cursor position is on a grapheme cluster boundary.
func (e *textView) MoveStart(selAct selectionAction) {
caret := e.closestToRune(e.caret.start)
caret = e.closestToLineCol(caret.lineCol.line, 0)
e.caret.start = caret.runes
e.caret.xoff = -caret.x
e.updateSelection(selAct)
e.clampCursorToGraphemes()
}
// MoveEnd moves the caret to the end of the current line, ensuring that the resulting
// cursor position is on a grapheme cluster boundary.
func (e *textView) MoveEnd(selAct selectionAction) {
caret := e.closestToRune(e.caret.start)
caret = e.closestToLineCol(caret.lineCol.line, math.MaxInt)
e.caret.start = caret.runes
e.caret.xoff = fixed.I(e.maxWidth) - caret.x
e.updateSelection(selAct)
e.clampCursorToGraphemes()
}
// MoveWord moves the caret to the next word in the specified direction.
// Positive is forward, negative is backward.
// Absolute values greater than one will skip that many words.
// The final caret position will be aligned to a grapheme cluster boundary.
// BUG(whereswaldon): this method's definition of a "word" is currently
// whitespace-delimited. Languages that do not use whitespace to delimit
// words will experience counter-intuitive behavior when navigating by
@@ -598,6 +673,7 @@ func (e *textView) MoveWord(distance int, selAct selectionAction) {
}
}
e.updateSelection(selAct)
e.clampCursorToGraphemes()
}
func (e *textView) ScrollToCaret() {
@@ -635,11 +711,13 @@ func (e *textView) Selection() (start, end int) {
return e.caret.start, e.caret.end
}
// SetCaret moves the caret to start, and sets the selection end to end. start
// SetCaret moves the caret to start, and sets the selection end to end. Then
// the two ends are clamped to the nearest grapheme cluster boundary. start
// and end are in runes, and represent offsets into the editor text.
func (e *textView) SetCaret(start, end int) {
e.caret.start = e.closestToRune(start).runes
e.caret.end = e.closestToRune(end).runes
e.clampCursorToGraphemes()
}
// SelectedText returns the currently selected text (if any) from the editor,