forked from joejulian/gio
text: correct arabic diacritics handling
This commit fixes the association of diacritical marks with the proper script during text segmentation, as well as fixing the visual position of diacritical marks. The prior code inverted the Y axis positioning for diacritics, which made them frequently overlap the glyph they were meant to appear above or below. Signed-off-by: runitclean <runitclean@disroot.org>
This commit is contained in:
+3
-3
@@ -312,7 +312,7 @@ func splitByScript(inputs []shaping.Input, documentDir di.Direction, buf []shapi
|
||||
r := input.Text[i]
|
||||
runeScript := language.LookupScript(r)
|
||||
|
||||
if runeScript == language.Common || runeScript == currentInput.Script {
|
||||
if runeScript == language.Common || runeScript == language.Inherited || runeScript == currentInput.Script {
|
||||
continue
|
||||
}
|
||||
|
||||
@@ -661,7 +661,7 @@ func (s *shaperImpl) Shape(pathOps *op.Ops, gs []Glyph) clip.PathSpec {
|
||||
outline := glyphData
|
||||
// Move to glyph position.
|
||||
pos := f32.Point{
|
||||
X: fixedToFloat((g.X - x) - g.Offset.X),
|
||||
X: fixedToFloat((g.X - x) + g.Offset.X),
|
||||
Y: -fixedToFloat(g.Offset.Y),
|
||||
}
|
||||
builder.Move(pos.Sub(lastPos))
|
||||
@@ -761,7 +761,7 @@ func (s *shaperImpl) Bitmaps(ops *op.Ops, gs []Glyph) op.CallOp {
|
||||
imgSize = bitmapData.size
|
||||
}
|
||||
off := op.Affine(f32.AffineId().Offset(f32.Point{
|
||||
X: fixedToFloat((g.X - x) - g.Offset.X),
|
||||
X: fixedToFloat((g.X - x) + g.Offset.X),
|
||||
Y: fixedToFloat(g.Offset.Y + g.Bounds.Min.Y),
|
||||
})).Push(ops)
|
||||
cl := clip.Rect{Max: imgSize}.Push(ops)
|
||||
|
||||
@@ -10,6 +10,8 @@ import (
|
||||
nsareg "eliasnaur.com/font/noto/sans/arabic/regular"
|
||||
"github.com/go-text/typesetting/font"
|
||||
"github.com/go-text/typesetting/shaping"
|
||||
"github.com/go-text/typesetting/language"
|
||||
"github.com/go-text/typesetting/di"
|
||||
"golang.org/x/image/font/gofont/goregular"
|
||||
"golang.org/x/image/math/fixed"
|
||||
|
||||
@@ -593,3 +595,103 @@ func TestGlyphIDPacking(t *testing.T) {
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
// TestArabicDiacriticClustering verifies that Arabic diacritics (which usually have
|
||||
// script 'Inherited') are correctly clustered with their base Arabic letters,
|
||||
// rather than being split into a separate shaping run.
|
||||
func TestArabicDiacriticClustering(t *testing.T) {
|
||||
tests := []struct {
|
||||
name string
|
||||
input []rune
|
||||
wantRuns int
|
||||
wantScript language.Script
|
||||
wantDirection di.Direction
|
||||
}{
|
||||
{
|
||||
name: "Arabic Letter + Diacritic",
|
||||
// \u0628 => BEH
|
||||
// \u0650 => KASRA (Diacritic)
|
||||
input: []rune{'\u0628', '\u0650'},
|
||||
wantRuns: 1,
|
||||
wantScript: language.Arabic,
|
||||
wantDirection: di.DirectionRTL,
|
||||
},
|
||||
{
|
||||
name: "Arabic Word with Multiple Diacritics",
|
||||
input: []rune{
|
||||
'\u0628', // BEH
|
||||
'\u0650', // KASRA
|
||||
'\u0633', // SEEN
|
||||
'\u0652', // SUKUN
|
||||
'\u0645', // MEEM
|
||||
'\u0650', // KASRA
|
||||
},
|
||||
wantRuns: 1,
|
||||
wantScript: language.Arabic,
|
||||
wantDirection: di.DirectionRTL,
|
||||
},
|
||||
{
|
||||
name: "Mixed Script (CONTROL Case) #1",
|
||||
// Arabic Letter + Latin Letter
|
||||
// THESE MUST SPLIT TO 2.
|
||||
input: []rune{'\u0628', 'A'},
|
||||
wantRuns: 2,
|
||||
wantScript: language.Arabic,
|
||||
wantDirection: di.DirectionRTL,
|
||||
},
|
||||
{
|
||||
name: "Mixed Script (CONTROL Case) #2",
|
||||
// Arabic Letter + Diacritic + Diacritic + Latin Letter + Arabic Letter + Diacritic
|
||||
// THESE MUST SPLIT TO 3.
|
||||
input: []rune{'\u0628', '\u0651', '\u0650', 'A', '\u0628', '\u0650'},
|
||||
wantRuns: 3,
|
||||
wantScript: language.Arabic,
|
||||
wantDirection: di.DirectionRTL,
|
||||
},
|
||||
{
|
||||
name: "Mixed Script (A little 'stress' test)",
|
||||
// Latin 's' + Arabic Kasra + Latin 'r' + Arabic Fatha
|
||||
// this technically valid unicode!
|
||||
// the diacritics should inherit "Latin"
|
||||
input: []rune{'s', '\u0651', '\u0650', 'r', '\u064E'},
|
||||
wantRuns: 1,
|
||||
wantScript: language.Latin,
|
||||
wantDirection: di.DirectionLTR,
|
||||
},
|
||||
}
|
||||
|
||||
for _, tt := range tests {
|
||||
t.Run(tt.name, func(t *testing.T) {
|
||||
inputs := []shaping.Input{{
|
||||
Text: tt.input,
|
||||
RunStart: 0,
|
||||
RunEnd: len(tt.input),
|
||||
Direction: tt.wantDirection,
|
||||
Script: language.Arabic,
|
||||
Face: nil, // face doesn't really matter for splitting anyway
|
||||
Size: fixed.I(10),
|
||||
}}
|
||||
|
||||
got := splitByScript(inputs, tt.wantDirection, nil)
|
||||
|
||||
if len(got) != tt.wantRuns {
|
||||
t.Fatalf("splitByScript produced %d runs, expected %d. \nRun details: %+v", len(got), tt.wantRuns, got)
|
||||
}
|
||||
|
||||
// this is for the single-run cases
|
||||
// we need to verify the integrity of the single run
|
||||
// to ensure
|
||||
// - the truncation didn't happen early on (when first hitting a diacritic)
|
||||
// - and the right dominant script label was used
|
||||
if tt.wantRuns == 1 {
|
||||
run := got[0]
|
||||
if run.RunEnd != len(tt.input) {
|
||||
t.Errorf("Run truncated early. End = %d, expected %d", run.RunEnd, len(tt.input))
|
||||
}
|
||||
if run.Script != tt.wantScript {
|
||||
t.Errorf("Run assigned wrong script. Got %s, expected %s", run.Script, tt.wantScript)
|
||||
}
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user