From 9b7ec167bcab68d800ef6037bec2c8321b92dd23 Mon Sep 17 00:00:00 2001 From: Fabien Jansem Date: Sun, 2 Jan 2022 13:13:23 +0100 Subject: [PATCH] delete unicode chars with length > 1 correctly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When there were non ASCII characters (for exemple éèàçîï) in a deleted selection or word, more characters were deleted because there was a mismatch between runes and bytes in Delete and deleteWord Fixes: https://todo.sr.ht/~eliasnaur/gio/330 Signed-off-by: Fabien Jansem --- widget/buffer.go | 19 ++++++++ widget/editor.go | 26 ++++++---- widget/editor_test.go | 110 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 145 insertions(+), 10 deletions(-) diff --git a/widget/buffer.go b/widget/buffer.go index 50e71756..4b953dd1 100644 --- a/widget/buffer.go +++ b/widget/buffer.go @@ -46,6 +46,25 @@ func (e *editBuffer) deleteRunes(caret, runes int) int { return caret } +func (e *editBuffer) deleteBytes(caret, bytes int) int { + e.moveGap(caret, 0) + if bytes < 0 { + e.gapstart += bytes + if e.gapstart < 0 { + e.gapstart = 0 + } + caret = e.gapstart + } + if bytes > 0 { + e.gapend += bytes + if e.gapend > len(e.text) { + e.gapend = len(e.text) + } + } + e.changed = e.changed || bytes != 0 + return caret +} + // moveGap moves the gap to the caret position. After returning, // the gap is guaranteed to be at least space bytes long. func (e *editBuffer) moveGap(caret, space int) { diff --git a/widget/editor.go b/widget/editor.go index a20bc46c..a9785575 100644 --- a/widget/editor.go +++ b/widget/editor.go @@ -838,7 +838,7 @@ func (e *Editor) Delete(runes int) { } if l := e.caret.end.ofs - e.caret.start.ofs; l != 0 { - e.caret.start.ofs = e.rr.deleteRunes(e.caret.start.ofs, l) + e.caret.start.ofs = e.rr.deleteBytes(e.caret.start.ofs, l) runes -= sign(runes) } @@ -871,7 +871,7 @@ func (e *Editor) prepend(s string) { if e.SingleLine { s = strings.ReplaceAll(s, "\n", " ") } - e.caret.start.ofs = e.rr.deleteRunes(e.caret.start.ofs, e.caret.end.ofs-e.caret.start.ofs) // Delete any selection first. + e.caret.start.ofs = e.rr.deleteBytes(e.caret.start.ofs, e.caret.end.ofs-e.caret.start.ofs) // Delete any selection first. e.rr.prepend(e.caret.start.ofs, s) e.caret.start.xoff = 0 e.invalidate() @@ -1122,8 +1122,8 @@ func (e *Editor) deleteWord(distance int) { idx := e.caret.start.ofs + offset*direction return idx <= 0 || idx >= e.rr.len() } - // next returns the appropriate rune given the direction and offset. - next := func(offset int) (r rune) { + // next returns the appropriate rune and length given the direction and offset (in bytes). + next := func(offset int) (r rune, l int) { idx := e.caret.start.ofs + offset*direction if idx < 0 { idx = 0 @@ -1131,21 +1131,27 @@ func (e *Editor) deleteWord(distance int) { idx = e.rr.len() } if direction < 0 { - r, _ = e.rr.runeBefore(idx) + r, l = e.rr.runeBefore(idx) } else { - r, _ = e.rr.runeAt(idx) + r, l = e.rr.runeAt(idx) } - return r + return } var runes = 1 + _, bytes := e.rr.runeAt(e.caret.start.ofs) + if direction < 0 { + _, bytes = e.rr.runeBefore(e.caret.start.ofs) + } for ii := 0; ii < words; ii++ { - if r := next(runes); unicode.IsSpace(r) { - for r := next(runes); unicode.IsSpace(r) && !atEnd(runes); r = next(runes) { + if r, _ := next(bytes); unicode.IsSpace(r) { + for r, lg := next(bytes); unicode.IsSpace(r) && !atEnd(bytes); r, lg = next(bytes) { runes += 1 + bytes += lg } } else { - for r := next(runes); !unicode.IsSpace(r) && !atEnd(runes); r = next(runes) { + for r, lg := next(bytes); !unicode.IsSpace(r) && !atEnd(bytes); r, lg = next(bytes) { runes += 1 + bytes += lg } } } diff --git a/widget/editor_test.go b/widget/editor_test.go index d66192eb..f7bda577 100644 --- a/widget/editor_test.go +++ b/widget/editor_test.go @@ -245,6 +245,110 @@ func TestEditorMoveWord(t *testing.T) { } } +func TestEditorInsert(t *testing.T) { + type Test struct { + Text string + Start int + Selection int + Insertion string + + Result string + } + tests := []Test{ + // Nothing inserted + {"", 0, 0, "", ""}, + {"", 0, -1, "", ""}, + {"", 0, 1, "", ""}, + {"", 0, -2, "", ""}, + {"", 0, 2, "", ""}, + {"world", 0, 0, "", "world"}, + {"world", 0, -1, "", "world"}, + {"world", 0, 1, "", "orld"}, + {"world", 2, 0, "", "world"}, + {"world", 2, -1, "", "wrld"}, + {"world", 2, 1, "", "wold"}, + {"world", 5, 0, "", "world"}, + {"world", 5, -1, "", "worl"}, + {"world", 5, 1, "", "world"}, + // One rune inserted + {"", 0, 0, "_", "_"}, + {"", 0, -1, "_", "_"}, + {"", 0, 1, "_", "_"}, + {"", 0, -2, "_", "_"}, + {"", 0, 2, "_", "_"}, + {"world", 0, 0, "_", "_world"}, + {"world", 0, -1, "_", "_world"}, + {"world", 0, 1, "_", "_orld"}, + {"world", 2, 0, "_", "wo_rld"}, + {"world", 2, -1, "_", "w_rld"}, + {"world", 2, 1, "_", "wo_ld"}, + {"world", 5, 0, "_", "world_"}, + {"world", 5, -1, "_", "worl_"}, + {"world", 5, 1, "_", "world_"}, + // More runes inserted + {"", 0, 0, "-3-", "-3-"}, + {"", 0, -1, "-3-", "-3-"}, + {"", 0, 1, "-3-", "-3-"}, + {"", 0, -2, "-3-", "-3-"}, + {"", 0, 2, "-3-", "-3-"}, + {"world", 0, 0, "-3-", "-3-world"}, + {"world", 0, -1, "-3-", "-3-world"}, + {"world", 0, 1, "-3-", "-3-orld"}, + {"world", 2, 0, "-3-", "wo-3-rld"}, + {"world", 2, -1, "-3-", "w-3-rld"}, + {"world", 2, 1, "-3-", "wo-3-ld"}, + {"world", 5, 0, "-3-", "world-3-"}, + {"world", 5, -1, "-3-", "worl-3-"}, + {"world", 5, 1, "-3-", "world-3-"}, + // Runes with length > 1 inserted + {"", 0, 0, "éêè", "éêè"}, + {"", 0, -1, "éêè", "éêè"}, + {"", 0, 1, "éêè", "éêè"}, + {"", 0, -2, "éêè", "éêè"}, + {"", 0, 2, "éêè", "éêè"}, + {"world", 0, 0, "éêè", "éêèworld"}, + {"world", 0, -1, "éêè", "éêèworld"}, + {"world", 0, 1, "éêè", "éêèorld"}, + {"world", 2, 0, "éêè", "woéêèrld"}, + {"world", 2, -1, "éêè", "wéêèrld"}, + {"world", 2, 1, "éêè", "woéêèld"}, + {"world", 5, 0, "éêè", "worldéêè"}, + {"world", 5, -1, "éêè", "worléêè"}, + {"world", 5, 1, "éêè", "worldéêè"}, + // Runes with length > 1 deleted from selection + {"élançé", 0, 1, "", "lançé"}, + {"élançé", 0, 1, "-3-", "-3-lançé"}, + {"élançé", 3, 2, "-3-", "éla-3-é"}, + {"élançé", 3, 3, "-3-", "éla-3-"}, + {"élançé", 3, 10, "-3-", "éla-3-"}, + {"élançé", 5, -1, "-3-", "élan-3-é"}, + {"élançé", 6, -1, "-3-", "élanç-3-"}, + {"élançé", 6, -3, "-3-", "éla-3-"}, + } + setup := func(t string) *Editor { + e := new(Editor) + gtx := layout.Context{ + Ops: new(op.Ops), + Constraints: layout.Exact(image.Pt(100, 100)), + } + cache := text.NewCache(gofont.Collection()) + fontSize := unit.Px(10) + font := text.Font{} + e.SetText(t) + e.Layout(gtx, cache, font, fontSize, nil) + return e + } + for ii, tt := range tests { + e := setup(tt.Text) + e.MoveCaret(tt.Start, tt.Start) + e.MoveCaret(0, tt.Selection) + e.Insert(tt.Insertion) + if e.Text() != tt.Result { + t.Fatalf("[%d] Insert: invalid result: got %q, want %q", ii, e.Text(), tt.Result) + } + } +} + func TestEditorDeleteWord(t *testing.T) { type Test struct { Text string @@ -283,6 +387,7 @@ func TestEditorDeleteWord(t *testing.T) { {"hello world", 8, 0, 1, 8, "hello "}, {"hello world", 8, 0, -1, 5, "hello world"}, {"hello brave new world", 0, 0, 3, 0, " new world"}, + {"helléèçàô world", 3, 0, 1, 3, "hel world"}, // unicode char with length > 1 in deleted part // Add selected text. // // Several permutations must be tested: @@ -295,11 +400,16 @@ func TestEditorDeleteWord(t *testing.T) { {"hello there brave new world", 12, 6, 2, 12, "hello there world"}, // The two spaces after "there" are actually suboptimal, if you ask me. See also above cases. {"hello there brave new world", 12, 6, -1, 12, "hello there new world"}, {"hello there brave new world", 12, 6, -2, 6, "hello new world"}, + {"hello there b®âve new world", 12, 6, 1, 12, "hello there new world"}, // unicode chars with length > 1 in selection + {"hello there b®âve new world", 12, 6, 2, 12, "hello there world"}, // ditto + {"hello there b®âve new world", 12, 6, -1, 12, "hello there new world"}, // ditto + {"hello there b®âve new world", 12, 6, -2, 6, "hello new world"}, // ditto // "|brave " selected {"hello there brave new world", 18, -6, 1, 12, "hello there new world"}, // #20 {"hello there brave new world", 18, -6, 2, 12, "hello there world"}, // ditto {"hello there brave new world", 18, -6, -1, 12, "hello there new world"}, {"hello there brave new world", 18, -6, -2, 6, "hello new world"}, + {"hello there b®âve new world", 18, -6, 1, 12, "hello there new world"}, // unicode chars with length > 1 in selection // Random edge cases {"hello there brave new world", 12, 6, 99, 12, "hello there "}, {"hello there brave new world", 18, -6, -99, 0, "new world"},