diff --git a/text/gotext.go b/text/gotext.go index f37314de..16605da7 100644 --- a/text/gotext.go +++ b/text/gotext.go @@ -312,7 +312,7 @@ func splitByScript(inputs []shaping.Input, documentDir di.Direction, buf []shapi r := input.Text[i] runeScript := language.LookupScript(r) - if runeScript == language.Common || runeScript == currentInput.Script { + if runeScript == language.Common || runeScript == language.Inherited || runeScript == currentInput.Script { continue } @@ -661,7 +661,7 @@ func (s *shaperImpl) Shape(pathOps *op.Ops, gs []Glyph) clip.PathSpec { outline := glyphData // Move to glyph position. pos := f32.Point{ - X: fixedToFloat((g.X - x) - g.Offset.X), + X: fixedToFloat((g.X - x) + g.Offset.X), Y: -fixedToFloat(g.Offset.Y), } builder.Move(pos.Sub(lastPos)) @@ -761,7 +761,7 @@ func (s *shaperImpl) Bitmaps(ops *op.Ops, gs []Glyph) op.CallOp { imgSize = bitmapData.size } off := op.Affine(f32.AffineId().Offset(f32.Point{ - X: fixedToFloat((g.X - x) - g.Offset.X), + X: fixedToFloat((g.X - x) + g.Offset.X), Y: fixedToFloat(g.Offset.Y + g.Bounds.Min.Y), })).Push(ops) cl := clip.Rect{Max: imgSize}.Push(ops) diff --git a/text/gotext_test.go b/text/gotext_test.go index 497f4730..6f0c0435 100644 --- a/text/gotext_test.go +++ b/text/gotext_test.go @@ -10,6 +10,8 @@ import ( nsareg "eliasnaur.com/font/noto/sans/arabic/regular" "github.com/go-text/typesetting/font" "github.com/go-text/typesetting/shaping" + "github.com/go-text/typesetting/language" + "github.com/go-text/typesetting/di" "golang.org/x/image/font/gofont/goregular" "golang.org/x/image/math/fixed" @@ -593,3 +595,103 @@ func TestGlyphIDPacking(t *testing.T) { }) } } + +// TestArabicDiacriticClustering verifies that Arabic diacritics (which usually have +// script 'Inherited') are correctly clustered with their base Arabic letters, +// rather than being split into a separate shaping run. +func TestArabicDiacriticClustering(t *testing.T) { + tests := []struct { + name string + input []rune + wantRuns int + wantScript language.Script + wantDirection di.Direction + }{ + { + name: "Arabic Letter + Diacritic", + // \u0628 => BEH + // \u0650 => KASRA (Diacritic) + input: []rune{'\u0628', '\u0650'}, + wantRuns: 1, + wantScript: language.Arabic, + wantDirection: di.DirectionRTL, + }, + { + name: "Arabic Word with Multiple Diacritics", + input: []rune{ + '\u0628', // BEH + '\u0650', // KASRA + '\u0633', // SEEN + '\u0652', // SUKUN + '\u0645', // MEEM + '\u0650', // KASRA + }, + wantRuns: 1, + wantScript: language.Arabic, + wantDirection: di.DirectionRTL, + }, + { + name: "Mixed Script (CONTROL Case) #1", + // Arabic Letter + Latin Letter + // THESE MUST SPLIT TO 2. + input: []rune{'\u0628', 'A'}, + wantRuns: 2, + wantScript: language.Arabic, + wantDirection: di.DirectionRTL, + }, + { + name: "Mixed Script (CONTROL Case) #2", + // Arabic Letter + Diacritic + Diacritic + Latin Letter + Arabic Letter + Diacritic + // THESE MUST SPLIT TO 3. + input: []rune{'\u0628', '\u0651', '\u0650', 'A', '\u0628', '\u0650'}, + wantRuns: 3, + wantScript: language.Arabic, + wantDirection: di.DirectionRTL, + }, + { + name: "Mixed Script (A little 'stress' test)", + // Latin 's' + Arabic Kasra + Latin 'r' + Arabic Fatha + // this technically valid unicode! + // the diacritics should inherit "Latin" + input: []rune{'s', '\u0651', '\u0650', 'r', '\u064E'}, + wantRuns: 1, + wantScript: language.Latin, + wantDirection: di.DirectionLTR, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + inputs := []shaping.Input{{ + Text: tt.input, + RunStart: 0, + RunEnd: len(tt.input), + Direction: tt.wantDirection, + Script: language.Arabic, + Face: nil, // face doesn't really matter for splitting anyway + Size: fixed.I(10), + }} + + got := splitByScript(inputs, tt.wantDirection, nil) + + if len(got) != tt.wantRuns { + t.Fatalf("splitByScript produced %d runs, expected %d. \nRun details: %+v", len(got), tt.wantRuns, got) + } + + // this is for the single-run cases + // we need to verify the integrity of the single run + // to ensure + // - the truncation didn't happen early on (when first hitting a diacritic) + // - and the right dominant script label was used + if tt.wantRuns == 1 { + run := got[0] + if run.RunEnd != len(tt.input) { + t.Errorf("Run truncated early. End = %d, expected %d", run.RunEnd, len(tt.input)) + } + if run.Script != tt.wantScript { + t.Errorf("Run assigned wrong script. Got %s, expected %s", run.Script, tt.wantScript) + } + } + }) + } +}