Files
gio/text/family_parser.go
Egon Elbre 9ab8095d1a text: fix length check
Signed-off-by: Egon Elbre <egonelbre@gmail.com>
2026-02-18 08:36:57 +01:00

247 lines
4.7 KiB
Go

package text
import (
"fmt"
"strings"
"unicode"
"unicode/utf8"
)
type tokenKind uint8
const (
tokenStr tokenKind = iota
tokenComma
tokenEOF
)
type token struct {
kind tokenKind
value string
}
func (t token) String() string {
switch t.kind {
case tokenStr:
return t.value
case tokenComma:
return ","
case tokenEOF:
return "EOF"
default:
return "unknown"
}
}
type lexState func(*lexer) lexState
func lexText(l *lexer) lexState {
for {
switch r := l.next(); {
case r == -1:
l.ignore()
l.emit(tokenEOF)
return nil
case unicode.IsSpace(r):
continue
case r == ',':
l.ignore()
l.emit(tokenComma)
case r == '"':
l.ignore()
return lexDquote
case r == '\'':
l.ignore()
return lexSquote
default:
return lexBareStr
}
}
}
func lexBareStr(l *lexer) lexState {
defer l.emitProcessed(tokenStr, func(s string) (string, error) {
return strings.TrimSpace(s), nil
})
for {
if strings.HasPrefix(l.input[l.pos:], `,`) {
return lexText
}
switch r := l.next(); {
case r == -1:
return lexText
}
}
}
func lexDquote(l *lexer) lexState {
return lexQuote(l, `"`)
}
func lexSquote(l *lexer) lexState {
return lexQuote(l, `'`)
}
func unescape(s string, quote rune) (string, error) {
var b strings.Builder
hitNonSpace := false
var wb strings.Builder
for i := 0; i < len(s); {
r, sz := utf8.DecodeRuneInString(s[i:])
i += sz
if unicode.IsSpace(r) {
if !hitNonSpace {
continue
}
wb.WriteRune(r)
continue
}
hitNonSpace = true
// If we get here, we're not looking at whitespace.
// Insert any buffered up whitespace characters from
// the gap between words.
b.WriteString(wb.String())
wb.Reset()
if r == '\\' {
r, sz := utf8.DecodeRuneInString(s[i:])
i += sz
switch r {
case '\\', quote:
b.WriteRune(r)
default:
return "", fmt.Errorf("illegal escape sequence \\%c", r)
}
} else {
b.WriteRune(r)
}
}
return b.String(), nil
}
func lexQuote(l *lexer, mark string) lexState {
escaping := false
for {
if isQuote := strings.HasPrefix(l.input[l.pos:], mark); isQuote && !escaping {
err := l.emitProcessed(tokenStr, func(s string) (string, error) {
return unescape(s, []rune(mark)[0])
})
if err != nil {
l.err = err
return nil
}
l.next()
l.ignore()
return lexText
}
escaped := escaping
switch r := l.next(); {
case r == -1:
l.err = fmt.Errorf("unexpected EOF while parsing %s-quoted family", mark)
return lexText
case r == '\\':
if !escaped {
escaping = true
}
}
if escaped {
escaping = false
}
}
}
type lexer struct {
input string
pos int
tokens []token
err error
}
func (l *lexer) ignore() {
l.input = l.input[l.pos:]
l.pos = 0
}
// next decodes the next rune in the input and returns it.
func (l *lexer) next() int32 {
if l.pos >= len(l.input) {
return -1
}
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
l.pos += w
return r
}
// emit adds a token of the given kind.
func (l *lexer) emit(t tokenKind) {
l.emitProcessed(t, func(s string) (string, error) { return s, nil })
}
// emitProcessed adds a token of the given kind, but transforms its value
// with the provided closure first.
func (l *lexer) emitProcessed(t tokenKind, f func(string) (string, error)) error {
val, err := f(l.input[:l.pos])
l.tokens = append(l.tokens, token{
kind: t,
value: val,
})
l.ignore()
return err
}
// run executes the lexer on the given input.
func (l *lexer) run(input string) ([]token, error) {
l.input = input
l.tokens = l.tokens[:0]
l.pos = 0
for state := lexText; state != nil; {
state = state(l)
}
return l.tokens, l.err
}
// parser implements a simple recursive descent parser for font family fallback
// expressions.
type parser struct {
faces []string
lexer lexer
tokens []token
}
// parse the provided rule and return the extracted font families. The returned families
// are valid only until the next call to parse. If parsing fails, an error describing the
// failure is returned instead.
func (p *parser) parse(rule string) ([]string, error) {
var err error
p.tokens, err = p.lexer.run(rule)
if err != nil {
return nil, err
}
p.faces = p.faces[:0]
return p.faces, p.parseList()
}
// parse implements the production:
//
// LIST ::= <FACE> <COMMA> <LIST> | <FACE>
func (p *parser) parseList() error {
if len(p.tokens) == 0 {
return fmt.Errorf("expected family name, got EOF")
}
if head := p.tokens[0]; head.kind != tokenStr {
return fmt.Errorf("expected family name, got %s", head)
} else {
p.faces = append(p.faces, head.value)
p.tokens = p.tokens[1:]
}
switch head := p.tokens[0]; head.kind {
case tokenEOF:
return nil
case tokenComma:
p.tokens = p.tokens[1:]
return p.parseList()
default:
return fmt.Errorf("unexpected token %s", head)
}
}