diff --git a/widget/editor.go b/widget/editor.go index ce067541..89fd9af7 100644 --- a/widget/editor.go +++ b/widget/editor.go @@ -742,18 +742,21 @@ func (e *Editor) CaretCoords() f32.Point { // direction to delete: positive is forward, negative is backward. // // If there is a selection, it is deleted and counts as a single rune. -func (e *Editor) Delete(runes int) { +func (e *Editor) Delete(graphemeClusters int) { e.initBuffer() - if runes == 0 { + if graphemeClusters == 0 { return } start, end := e.text.Selection() if start != end { - runes -= sign(runes) + graphemeClusters -= sign(graphemeClusters) } - end += runes + // Move caret by the target quantity of clusters. + e.text.MoveCaret(0, graphemeClusters) + // Get the new rune offsets of the selection. + start, end = e.text.Selection() e.replace(start, end, "", true) // Reset xoff. e.text.MoveCaret(0, 0) @@ -889,7 +892,9 @@ func (e *Editor) replace(start, end int, s string, addHistory bool) int { // MoveCaret moves the caret (aka selection start) and the selection end // relative to their current positions. Positive distances moves forward, -// negative distances moves backward. Distances are in runes. +// negative distances moves backward. Distances are in grapheme clusters, +// which closely match what users perceive as "characters" even when the +// characters are multiple code points long. func (e *Editor) MoveCaret(startDelta, endDelta int) { e.initBuffer() e.text.MoveCaret(startDelta, endDelta) diff --git a/widget/index.go b/widget/index.go index f8a00cb8..6623f9cb 100644 --- a/widget/index.go +++ b/widget/index.go @@ -3,11 +3,14 @@ package widget import ( + "bufio" "image" + "io" "math" "sort" "gioui.org/text" + "github.com/go-text/typesetting/segmenter" "golang.org/x/image/math/fixed" ) @@ -415,3 +418,74 @@ func (g *glyphIndex) locate(viewport image.Rectangle, startRune, endRune int, re } return rects } + +// graphemeReader segments paragraphs of text into grapheme clusters. +type graphemeReader struct { + segmenter.Segmenter + graphemes []int + paragraph []rune + source io.ReaderAt + cursor int64 + reader *bufio.Reader + runeOffset int +} + +// SetSource configures the reader to pull from source. +func (p *graphemeReader) SetSource(source io.ReaderAt) { + p.source = source + p.cursor = 0 + p.reader = bufio.NewReader(p) + p.runeOffset = 0 +} + +// Read exists to satisfy io.Reader. It should not be directly invoked. +func (p *graphemeReader) Read(b []byte) (int, error) { + n, err := p.source.ReadAt(b, p.cursor) + p.cursor += int64(n) + return n, err +} + +// next decodes one paragraph of rune data. +func (p *graphemeReader) next() ([]rune, bool) { + p.paragraph = p.paragraph[:0] + var err error + var r rune + for err == nil { + r, _, err = p.reader.ReadRune() + if err != nil { + break + } + p.paragraph = append(p.paragraph, r) + if r == '\n' { + break + } + } + return p.paragraph, err == nil +} + +// Graphemes will return the next paragraph's grapheme cluster boundaries, +// if any. If it returns an empty slice, there is no more data (all paragraphs +// have been segmented). +func (p *graphemeReader) Graphemes() []int { + var more bool + p.graphemes = p.graphemes[:0] + p.paragraph, more = p.next() + if len(p.paragraph) == 0 && !more { + return nil + } + p.Segmenter.Init(p.paragraph) + iter := p.Segmenter.GraphemeIterator() + if iter.Next() { + graph := iter.Grapheme() + p.graphemes = append(p.graphemes, + p.runeOffset+graph.Offset, + p.runeOffset+graph.Offset+len(graph.Text), + ) + } + for iter.Next() { + graph := iter.Grapheme() + p.graphemes = append(p.graphemes, p.runeOffset+graph.Offset+len(graph.Text)) + } + p.runeOffset += len(p.paragraph) + return p.graphemes +} diff --git a/widget/index_test.go b/widget/index_test.go index bfa9ab97..5b8d2f9e 100644 --- a/widget/index_test.go +++ b/widget/index_test.go @@ -1,6 +1,8 @@ package widget import ( + "bytes" + "io" "testing" nsareg "eliasnaur.com/font/noto/sans/arabic/regular" @@ -550,3 +552,232 @@ func printGlyphs(t *testing.T, glyphs []text.Glyph) { t.Logf("glyphs[%2d] = {ID: 0x%013x, Flags: %4s, Advance: %4d(%6v), Runes: %d, Y: %3d, X: %4d(%6v)} ", i, g.ID, g.Flags, g.Advance, g.Advance, g.Runes, g.Y, g.X, g.X) } } + +func TestGraphemeReaderNext(t *testing.T) { + latinDoc := bytes.NewReader([]byte(latinDocument)) + arabicDoc := bytes.NewReader([]byte(arabicDocument)) + emojiDoc := bytes.NewReader([]byte(emojiDocument)) + complexDoc := bytes.NewReader([]byte(complexDocument)) + type testcase struct { + name string + input *bytes.Reader + read func() ([]rune, bool) + } + var pr graphemeReader + for _, tc := range []testcase{ + { + name: "latin", + input: latinDoc, + read: pr.next, + }, + { + name: "arabic", + input: arabicDoc, + read: pr.next, + }, + { + name: "emoji", + input: emojiDoc, + read: pr.next, + }, + { + name: "complex", + input: complexDoc, + read: pr.next, + }, + } { + t.Run(tc.name, func(t *testing.T) { + pr.SetSource(tc.input) + + runes := []rune{} + var paragraph []rune + ok := true + for ok { + paragraph, ok = tc.read() + if ok && len(paragraph) > 0 && paragraph[len(paragraph)-1] != '\n' { + } + for i, r := range paragraph { + if i == len(paragraph)-1 { + if r != '\n' && ok { + t.Error("non-final paragraph does not end with newline") + } + } else if r == '\n' { + t.Errorf("paragraph[%d] contains newline", i) + } + } + runes = append(runes, paragraph...) + } + tc.input.Seek(0, 0) + b, _ := io.ReadAll(tc.input) + asRunes := []rune(string(b)) + if len(asRunes) != len(runes) { + t.Errorf("expected %d runes, got %d", len(asRunes), len(runes)) + } + for i := 0; i < max(len(asRunes), len(runes)); i++ { + if i < min(len(asRunes), len(runes)) { + if runes[i] != asRunes[i] { + t.Errorf("expected runes[%d]=%d, got %d", i, asRunes[i], runes[i]) + } + } else if i < len(asRunes) { + t.Errorf("expected runes[%d]=%d, got nothing", i, asRunes[i]) + } else if i < len(runes) { + t.Errorf("expected runes[%d]=nothing, got %d", i, runes[i]) + } + } + }) + } +} +func TestGraphemeReaderGraphemes(t *testing.T) { + latinDoc := bytes.NewReader([]byte(latinDocument)) + arabicDoc := bytes.NewReader([]byte(arabicDocument)) + emojiDoc := bytes.NewReader([]byte(emojiDocument)) + complexDoc := bytes.NewReader([]byte(complexDocument)) + type testcase struct { + name string + input *bytes.Reader + read func() []int + } + var pr graphemeReader + for _, tc := range []testcase{ + { + name: "latin", + input: latinDoc, + read: pr.Graphemes, + }, + { + name: "arabic", + input: arabicDoc, + read: pr.Graphemes, + }, + { + name: "emoji", + input: emojiDoc, + read: pr.Graphemes, + }, + { + name: "complex", + input: complexDoc, + read: pr.Graphemes, + }, + } { + t.Run(tc.name, func(t *testing.T) { + pr.SetSource(tc.input) + + graphemes := []int{} + for g := tc.read(); len(g) > 0; g = tc.read() { + if len(graphemes) > 0 && g[0] != graphemes[len(graphemes)-1] { + t.Errorf("expected first boundary in new paragraph %d to match final boundary in previous %d", g[0], graphemes[len(graphemes)-1]) + } + if len(graphemes) > 0 { + // Drop duplicated boundary. + g = g[1:] + } + graphemes = append(graphemes, g...) + } + tc.input.Seek(0, 0) + b, _ := io.ReadAll(tc.input) + asRunes := []rune(string(b)) + if len(asRunes)+1 < len(graphemes) { + t.Errorf("expected <= %d graphemes, got %d", len(asRunes)+1, len(graphemes)) + } + for i := 0; i < len(graphemes)-1; i++ { + if graphemes[i] >= graphemes[i+1] { + t.Errorf("graphemes[%d](%d) >= graphemes[%d](%d)", i, graphemes[i], i+1, graphemes[i+1]) + } + } + }) + } +} +func BenchmarkGraphemeReaderNext(b *testing.B) { + latinDoc := bytes.NewReader([]byte(latinDocument)) + arabicDoc := bytes.NewReader([]byte(arabicDocument)) + emojiDoc := bytes.NewReader([]byte(emojiDocument)) + complexDoc := bytes.NewReader([]byte(complexDocument)) + type testcase struct { + name string + input *bytes.Reader + read func() ([]rune, bool) + } + pr := &graphemeReader{} + for _, tc := range []testcase{ + { + name: "latin", + input: latinDoc, + read: pr.next, + }, + { + name: "arabic", + input: arabicDoc, + read: pr.next, + }, + { + name: "emoji", + input: emojiDoc, + read: pr.next, + }, + { + name: "complex", + input: complexDoc, + read: pr.next, + }, + } { + var paragraph []rune = make([]rune, 4096) + b.Run(tc.name, func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + pr.SetSource(tc.input) + + ok := true + for ok { + paragraph, ok = tc.read() + _ = paragraph + } + _ = paragraph + } + }) + } +} +func BenchmarkGraphemeReaderGraphemes(b *testing.B) { + latinDoc := bytes.NewReader([]byte(latinDocument)) + arabicDoc := bytes.NewReader([]byte(arabicDocument)) + emojiDoc := bytes.NewReader([]byte(emojiDocument)) + complexDoc := bytes.NewReader([]byte(complexDocument)) + type testcase struct { + name string + input *bytes.Reader + read func() []int + } + pr := &graphemeReader{} + for _, tc := range []testcase{ + { + name: "latin", + input: latinDoc, + read: pr.Graphemes, + }, + { + name: "arabic", + input: arabicDoc, + read: pr.Graphemes, + }, + { + name: "emoji", + input: emojiDoc, + read: pr.Graphemes, + }, + { + name: "complex", + input: complexDoc, + read: pr.Graphemes, + }, + } { + b.Run(tc.name, func(b *testing.B) { + b.ResetTimer() + for i := 0; i < b.N; i++ { + pr.SetSource(tc.input) + for g := tc.read(); len(g) > 0; g = tc.read() { + _ = g + } + } + }) + } +} diff --git a/widget/text.go b/widget/text.go index 59526b37..c20d5254 100644 --- a/widget/text.go +++ b/widget/text.go @@ -17,6 +17,7 @@ import ( "gioui.org/op/paint" "gioui.org/text" "gioui.org/unit" + "golang.org/x/exp/slices" "golang.org/x/image/math/fixed" ) @@ -54,12 +55,16 @@ type textView struct { // are accessed by Len, Text, and SetText. Mask rune - font text.Font - shaper *text.Shaper - textSize fixed.Int26_6 - seekCursor int64 - rr textSource - maskReader maskReader + font text.Font + shaper *text.Shaper + textSize fixed.Int26_6 + seekCursor int64 + rr textSource + maskReader maskReader + // graphemes tracks the indices of grapheme cluster boundaries within rr. + graphemes []int + // paragraphReader is used to populate graphemes. + paragraphReader graphemeReader lastMask rune maxWidth, minWidth int viewSize image.Point @@ -163,12 +168,43 @@ func (e *textView) closestToXY(x fixed.Int26_6, y int) combinedPos { return e.index.closestToXY(x, y) } +func (e *textView) closestToXYGraphemes(x fixed.Int26_6, y int) combinedPos { + // Find the closest existing rune position to the provided coordinates. + pos := e.closestToXY(x, y) + // Resolve cluster boundaries on either side of the rune position. + firstOption := e.moveByGraphemes(pos.runes, 0) + distance := 1 + if firstOption > pos.runes { + distance = -1 + } + secondOption := e.moveByGraphemes(firstOption, distance) + // Choose the closest grapheme cluster boundary to the desired point. + first := e.closestToRune(firstOption) + firstDist := absFixed(first.x - x) + second := e.closestToRune(secondOption) + secondDist := absFixed(second.x - x) + if firstDist > secondDist { + return second + } else { + return first + } +} + +func absFixed(i fixed.Int26_6) fixed.Int26_6 { + if i < 0 { + return -i + } + return i +} + +// MaxLines moves the cursor the specified number of lines vertically, ensuring +// that the resulting position is aligned to a grapheme cluster. func (e *textView) MoveLines(distance int, selAct selectionAction) { caretStart := e.closestToRune(e.caret.start) x := caretStart.x + e.caret.xoff // Seek to line. pos := e.closestToLineCol(caretStart.lineCol.line+distance, 0) - pos = e.closestToXY(x, pos.y) + pos = e.closestToXYGraphemes(x, pos.y) e.caret.start = pos.runes e.caret.xoff = x - pos.x e.updateSelection(selAct) @@ -399,10 +435,12 @@ func (e *textView) scrollAbs(x, y int) { } } +// MoveCoord moves the caret to the position closest to the provided +// point that is aligned to a grapheme cluster boundary. func (e *textView) MoveCoord(pos image.Point) { x := fixed.I(pos.X + e.scrollOff.X) y := pos.Y + e.scrollOff.Y - e.caret.start = e.closestToXY(x, y).runes + e.caret.start = e.closestToXYGraphemes(x, y).runes e.caret.xoff = 0 } @@ -431,9 +469,16 @@ func (e *textView) layoutText(lt *text.Shaper) { for _, _, err := b.ReadRune(); err != io.EOF; _, _, err = b.ReadRune() { g, _ := it.processGlyph(text.Glyph{Runes: 1, Flags: text.FlagClusterBreak}, true) e.index.Glyph(g) - } } + e.paragraphReader.SetSource(e.rr) + e.graphemes = e.graphemes[:0] + for g := e.paragraphReader.Graphemes(); len(g) > 0; g = e.paragraphReader.Graphemes() { + if len(e.graphemes) > 0 && g[0] == e.graphemes[len(e.graphemes)-1] { + g = g[1:] + } + e.graphemes = append(e.graphemes, g...) + } dims := layout.Dimensions{Size: it.bounds.Size()} dims.Baseline = dims.Size.Y - it.baseline e.dims = dims @@ -521,44 +566,74 @@ func (e *textView) Replace(start, end int, s string) int { return sc } +// MovePages moves the caret position by vertical pages of text, ensuring that +// the final position is aligned to a grapheme cluster boundary. func (e *textView) MovePages(pages int, selAct selectionAction) { caret := e.closestToRune(e.caret.start) x := caret.x + e.caret.xoff y := caret.y + pages*e.viewSize.Y - pos := e.closestToXY(x, y) + pos := e.closestToXYGraphemes(x, y) e.caret.start = pos.runes e.caret.xoff = x - pos.x e.updateSelection(selAct) } -// MoveCaret moves the caret (aka selection start) and the selection end -// relative to their current positions. Positive distances moves forward, -// negative distances moves backward. Distances are in runes. -func (e *textView) MoveCaret(startDelta, endDelta int) { - e.caret.xoff = 0 - e.caret.start = e.closestToRune(e.caret.start + startDelta).runes - e.caret.end = e.closestToRune(e.caret.end + endDelta).runes +// moveByGraphemes returns the rune index resulting from moving the +// specified number of grapheme clusters from startRuneidx. +func (e *textView) moveByGraphemes(startRuneidx, graphemes int) int { + if len(e.graphemes) == 0 { + return startRuneidx + } + startGraphemeIdx, _ := slices.BinarySearch(e.graphemes, startRuneidx) + startGraphemeIdx = max(startGraphemeIdx+graphemes, 0) + startGraphemeIdx = min(startGraphemeIdx, len(e.graphemes)-1) + startRuneIdx := e.graphemes[startGraphemeIdx] + return e.closestToRune(startRuneIdx).runes } +// clampCursorToGraphemes ensures that the final start/end positions of +// the cursor are on grapheme cluster boundaries. +func (e *textView) clampCursorToGraphemes() { + e.caret.start = e.moveByGraphemes(e.caret.start, 0) + e.caret.end = e.moveByGraphemes(e.caret.end, 0) +} + +// MoveCaret moves the caret (aka selection start) and the selection end +// relative to their current positions. Positive distances moves forward, +// negative distances moves backward. Distances are in grapheme clusters which +// better match the expectations of users than runes. +func (e *textView) MoveCaret(startDelta, endDelta int) { + e.caret.xoff = 0 + e.caret.start = e.moveByGraphemes(e.caret.start, startDelta) + e.caret.end = e.moveByGraphemes(e.caret.end, endDelta) +} + +// MoveStart moves the caret to the start of the current line, ensuring that the resulting +// cursor position is on a grapheme cluster boundary. func (e *textView) MoveStart(selAct selectionAction) { caret := e.closestToRune(e.caret.start) caret = e.closestToLineCol(caret.lineCol.line, 0) e.caret.start = caret.runes e.caret.xoff = -caret.x e.updateSelection(selAct) + e.clampCursorToGraphemes() } +// MoveEnd moves the caret to the end of the current line, ensuring that the resulting +// cursor position is on a grapheme cluster boundary. func (e *textView) MoveEnd(selAct selectionAction) { caret := e.closestToRune(e.caret.start) caret = e.closestToLineCol(caret.lineCol.line, math.MaxInt) e.caret.start = caret.runes e.caret.xoff = fixed.I(e.maxWidth) - caret.x e.updateSelection(selAct) + e.clampCursorToGraphemes() } // MoveWord moves the caret to the next word in the specified direction. // Positive is forward, negative is backward. // Absolute values greater than one will skip that many words. +// The final caret position will be aligned to a grapheme cluster boundary. // BUG(whereswaldon): this method's definition of a "word" is currently // whitespace-delimited. Languages that do not use whitespace to delimit // words will experience counter-intuitive behavior when navigating by @@ -598,6 +673,7 @@ func (e *textView) MoveWord(distance int, selAct selectionAction) { } } e.updateSelection(selAct) + e.clampCursorToGraphemes() } func (e *textView) ScrollToCaret() { @@ -635,11 +711,13 @@ func (e *textView) Selection() (start, end int) { return e.caret.start, e.caret.end } -// SetCaret moves the caret to start, and sets the selection end to end. start +// SetCaret moves the caret to start, and sets the selection end to end. Then +// the two ends are clamped to the nearest grapheme cluster boundary. start // and end are in runes, and represent offsets into the editor text. func (e *textView) SetCaret(start, end int) { e.caret.start = e.closestToRune(start).runes e.caret.end = e.closestToRune(end).runes + e.clampCursorToGraphemes() } // SelectedText returns the currently selected text (if any) from the editor,