From 9b7ec167bcab68d800ef6037bec2c8321b92dd23 Mon Sep 17 00:00:00 2001
From: Fabien Jansem <fabien@jansem.eu.org>
Date: Sun, 2 Jan 2022 13:13:23 +0100
Subject: [PATCH] delete unicode chars with length > 1 correctly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When there were non ASCII characters (for exemple éèàçîï) in a deleted
selection or word, more characters were deleted because there was a
mismatch between runes and bytes in Delete and deleteWord

Fixes: https://todo.sr.ht/~eliasnaur/gio/330
Signed-off-by: Fabien Jansem <fabien@jansem.eu.org>
---
 widget/buffer.go      |  19 ++++++++
 widget/editor.go      |  26 ++++++----
 widget/editor_test.go | 110 ++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 145 insertions(+), 10 deletions(-)

diff --git a/widget/buffer.go b/widget/buffer.go
index 50e71756..4b953dd1 100644
--- a/widget/buffer.go
+++ b/widget/buffer.go
@@ -46,6 +46,25 @@ func (e *editBuffer) deleteRunes(caret, runes int) int {
 	return caret
 }
 
+func (e *editBuffer) deleteBytes(caret, bytes int) int {
+	e.moveGap(caret, 0)
+	if bytes < 0 {
+		e.gapstart += bytes
+		if e.gapstart < 0 {
+			e.gapstart = 0
+		}
+		caret = e.gapstart
+	}
+	if bytes > 0 {
+		e.gapend += bytes
+		if e.gapend > len(e.text) {
+			e.gapend = len(e.text)
+		}
+	}
+	e.changed = e.changed || bytes != 0
+	return caret
+}
+
 // moveGap moves the gap to the caret position. After returning,
 // the gap is guaranteed to be at least space bytes long.
 func (e *editBuffer) moveGap(caret, space int) {
diff --git a/widget/editor.go b/widget/editor.go
index a20bc46c..a9785575 100644
--- a/widget/editor.go
+++ b/widget/editor.go
@@ -838,7 +838,7 @@ func (e *Editor) Delete(runes int) {
 	}
 
 	if l := e.caret.end.ofs - e.caret.start.ofs; l != 0 {
-		e.caret.start.ofs = e.rr.deleteRunes(e.caret.start.ofs, l)
+		e.caret.start.ofs = e.rr.deleteBytes(e.caret.start.ofs, l)
 		runes -= sign(runes)
 	}
 
@@ -871,7 +871,7 @@ func (e *Editor) prepend(s string) {
 	if e.SingleLine {
 		s = strings.ReplaceAll(s, "\n", " ")
 	}
-	e.caret.start.ofs = e.rr.deleteRunes(e.caret.start.ofs, e.caret.end.ofs-e.caret.start.ofs) // Delete any selection first.
+	e.caret.start.ofs = e.rr.deleteBytes(e.caret.start.ofs, e.caret.end.ofs-e.caret.start.ofs) // Delete any selection first.
 	e.rr.prepend(e.caret.start.ofs, s)
 	e.caret.start.xoff = 0
 	e.invalidate()
@@ -1122,8 +1122,8 @@ func (e *Editor) deleteWord(distance int) {
 		idx := e.caret.start.ofs + offset*direction
 		return idx <= 0 || idx >= e.rr.len()
 	}
-	// next returns the appropriate rune given the direction and offset.
-	next := func(offset int) (r rune) {
+	// next returns the appropriate rune and length given the direction and offset (in bytes).
+	next := func(offset int) (r rune, l int) {
 		idx := e.caret.start.ofs + offset*direction
 		if idx < 0 {
 			idx = 0
@@ -1131,21 +1131,27 @@ func (e *Editor) deleteWord(distance int) {
 			idx = e.rr.len()
 		}
 		if direction < 0 {
-			r, _ = e.rr.runeBefore(idx)
+			r, l = e.rr.runeBefore(idx)
 		} else {
-			r, _ = e.rr.runeAt(idx)
+			r, l = e.rr.runeAt(idx)
 		}
-		return r
+		return
 	}
 	var runes = 1
+	_, bytes := e.rr.runeAt(e.caret.start.ofs)
+	if direction < 0 {
+		_, bytes = e.rr.runeBefore(e.caret.start.ofs)
+	}
 	for ii := 0; ii < words; ii++ {
-		if r := next(runes); unicode.IsSpace(r) {
-			for r := next(runes); unicode.IsSpace(r) && !atEnd(runes); r = next(runes) {
+		if r, _ := next(bytes); unicode.IsSpace(r) {
+			for r, lg := next(bytes); unicode.IsSpace(r) && !atEnd(bytes); r, lg = next(bytes) {
 				runes += 1
+				bytes += lg
 			}
 		} else {
-			for r := next(runes); !unicode.IsSpace(r) && !atEnd(runes); r = next(runes) {
+			for r, lg := next(bytes); !unicode.IsSpace(r) && !atEnd(bytes); r, lg = next(bytes) {
 				runes += 1
+				bytes += lg
 			}
 		}
 	}
diff --git a/widget/editor_test.go b/widget/editor_test.go
index d66192eb..f7bda577 100644
--- a/widget/editor_test.go
+++ b/widget/editor_test.go
@@ -245,6 +245,110 @@ func TestEditorMoveWord(t *testing.T) {
 	}
 }
 
+func TestEditorInsert(t *testing.T) {
+	type Test struct {
+		Text      string
+		Start     int
+		Selection int
+		Insertion string
+
+		Result string
+	}
+	tests := []Test{
+		// Nothing inserted
+		{"", 0, 0, "", ""},
+		{"", 0, -1, "", ""},
+		{"", 0, 1, "", ""},
+		{"", 0, -2, "", ""},
+		{"", 0, 2, "", ""},
+		{"world", 0, 0, "", "world"},
+		{"world", 0, -1, "", "world"},
+		{"world", 0, 1, "", "orld"},
+		{"world", 2, 0, "", "world"},
+		{"world", 2, -1, "", "wrld"},
+		{"world", 2, 1, "", "wold"},
+		{"world", 5, 0, "", "world"},
+		{"world", 5, -1, "", "worl"},
+		{"world", 5, 1, "", "world"},
+		// One rune inserted
+		{"", 0, 0, "_", "_"},
+		{"", 0, -1, "_", "_"},
+		{"", 0, 1, "_", "_"},
+		{"", 0, -2, "_", "_"},
+		{"", 0, 2, "_", "_"},
+		{"world", 0, 0, "_", "_world"},
+		{"world", 0, -1, "_", "_world"},
+		{"world", 0, 1, "_", "_orld"},
+		{"world", 2, 0, "_", "wo_rld"},
+		{"world", 2, -1, "_", "w_rld"},
+		{"world", 2, 1, "_", "wo_ld"},
+		{"world", 5, 0, "_", "world_"},
+		{"world", 5, -1, "_", "worl_"},
+		{"world", 5, 1, "_", "world_"},
+		// More runes inserted
+		{"", 0, 0, "-3-", "-3-"},
+		{"", 0, -1, "-3-", "-3-"},
+		{"", 0, 1, "-3-", "-3-"},
+		{"", 0, -2, "-3-", "-3-"},
+		{"", 0, 2, "-3-", "-3-"},
+		{"world", 0, 0, "-3-", "-3-world"},
+		{"world", 0, -1, "-3-", "-3-world"},
+		{"world", 0, 1, "-3-", "-3-orld"},
+		{"world", 2, 0, "-3-", "wo-3-rld"},
+		{"world", 2, -1, "-3-", "w-3-rld"},
+		{"world", 2, 1, "-3-", "wo-3-ld"},
+		{"world", 5, 0, "-3-", "world-3-"},
+		{"world", 5, -1, "-3-", "worl-3-"},
+		{"world", 5, 1, "-3-", "world-3-"},
+		// Runes with length > 1 inserted
+		{"", 0, 0, "éêè", "éêè"},
+		{"", 0, -1, "éêè", "éêè"},
+		{"", 0, 1, "éêè", "éêè"},
+		{"", 0, -2, "éêè", "éêè"},
+		{"", 0, 2, "éêè", "éêè"},
+		{"world", 0, 0, "éêè", "éêèworld"},
+		{"world", 0, -1, "éêè", "éêèworld"},
+		{"world", 0, 1, "éêè", "éêèorld"},
+		{"world", 2, 0, "éêè", "woéêèrld"},
+		{"world", 2, -1, "éêè", "wéêèrld"},
+		{"world", 2, 1, "éêè", "woéêèld"},
+		{"world", 5, 0, "éêè", "worldéêè"},
+		{"world", 5, -1, "éêè", "worléêè"},
+		{"world", 5, 1, "éêè", "worldéêè"},
+		// Runes with length > 1 deleted from selection
+		{"élançé", 0, 1, "", "lançé"},
+		{"élançé", 0, 1, "-3-", "-3-lançé"},
+		{"élançé", 3, 2, "-3-", "éla-3-é"},
+		{"élançé", 3, 3, "-3-", "éla-3-"},
+		{"élançé", 3, 10, "-3-", "éla-3-"},
+		{"élançé", 5, -1, "-3-", "élan-3-é"},
+		{"élançé", 6, -1, "-3-", "élanç-3-"},
+		{"élançé", 6, -3, "-3-", "éla-3-"},
+	}
+	setup := func(t string) *Editor {
+		e := new(Editor)
+		gtx := layout.Context{
+			Ops:         new(op.Ops),
+			Constraints: layout.Exact(image.Pt(100, 100)),
+		}
+		cache := text.NewCache(gofont.Collection())
+		fontSize := unit.Px(10)
+		font := text.Font{}
+		e.SetText(t)
+		e.Layout(gtx, cache, font, fontSize, nil)
+		return e
+	}
+	for ii, tt := range tests {
+		e := setup(tt.Text)
+		e.MoveCaret(tt.Start, tt.Start)
+		e.MoveCaret(0, tt.Selection)
+		e.Insert(tt.Insertion)
+		if e.Text() != tt.Result {
+			t.Fatalf("[%d] Insert: invalid result: got %q, want %q", ii, e.Text(), tt.Result)
+		}
+	}
+}
+
 func TestEditorDeleteWord(t *testing.T) {
 	type Test struct {
 		Text      string
@@ -283,6 +387,7 @@ func TestEditorDeleteWord(t *testing.T) {
 		{"hello    world", 8, 0, 1, 8, "hello   "},
 		{"hello    world", 8, 0, -1, 5, "hello world"},
 		{"hello brave new world", 0, 0, 3, 0, " new world"},
+		{"helléèçàô world", 3, 0, 1, 3, "hel world"}, // unicode char with length > 1 in deleted part
 		// Add selected text.
 		//
 		// Several permutations must be tested:
@@ -295,11 +400,16 @@ func TestEditorDeleteWord(t *testing.T) {
 		{"hello there brave new world", 12, 6, 2, 12, "hello there  world"},    // The two spaces after "there" are actually suboptimal, if you ask me. See also above cases.
 		{"hello there brave new world", 12, 6, -1, 12, "hello there new world"},
 		{"hello there brave new world", 12, 6, -2, 6, "hello new world"},
+		{"hello there b®âve new world", 12, 6, 1, 12, "hello there new world"},  // unicode chars with length > 1 in selection
+		{"hello there b®âve new world", 12, 6, 2, 12, "hello there  world"},     // ditto
+		{"hello there b®âve new world", 12, 6, -1, 12, "hello there new world"}, // ditto
+		{"hello there b®âve new world", 12, 6, -2, 6, "hello new world"},        // ditto
 		// "|brave " selected
 		{"hello there brave new world", 18, -6, 1, 12, "hello there new world"}, // #20
 		{"hello there brave new world", 18, -6, 2, 12, "hello there  world"},    // ditto
 		{"hello there brave new world", 18, -6, -1, 12, "hello there new world"},
 		{"hello there brave new world", 18, -6, -2, 6, "hello new world"},
+		{"hello there b®âve new world", 18, -6, 1, 12, "hello there new world"}, // unicode chars with length > 1 in selection
 		// Random edge cases
 		{"hello there brave new world", 12, 6, 99, 12, "hello there "},
 		{"hello there brave new world", 18, -6, -99, 0, "new world"},