forked from joejulian/gio
9ab8095d1a
Signed-off-by: Egon Elbre <egonelbre@gmail.com>
247 lines
4.7 KiB
Go
247 lines
4.7 KiB
Go
package text
|
|
|
|
import (
|
|
"fmt"
|
|
"strings"
|
|
"unicode"
|
|
"unicode/utf8"
|
|
)
|
|
|
|
type tokenKind uint8
|
|
|
|
const (
|
|
tokenStr tokenKind = iota
|
|
tokenComma
|
|
tokenEOF
|
|
)
|
|
|
|
type token struct {
|
|
kind tokenKind
|
|
value string
|
|
}
|
|
|
|
func (t token) String() string {
|
|
switch t.kind {
|
|
case tokenStr:
|
|
return t.value
|
|
case tokenComma:
|
|
return ","
|
|
case tokenEOF:
|
|
return "EOF"
|
|
default:
|
|
return "unknown"
|
|
}
|
|
}
|
|
|
|
type lexState func(*lexer) lexState
|
|
|
|
func lexText(l *lexer) lexState {
|
|
for {
|
|
switch r := l.next(); {
|
|
case r == -1:
|
|
l.ignore()
|
|
l.emit(tokenEOF)
|
|
return nil
|
|
case unicode.IsSpace(r):
|
|
continue
|
|
case r == ',':
|
|
l.ignore()
|
|
l.emit(tokenComma)
|
|
case r == '"':
|
|
l.ignore()
|
|
return lexDquote
|
|
case r == '\'':
|
|
l.ignore()
|
|
return lexSquote
|
|
default:
|
|
return lexBareStr
|
|
}
|
|
}
|
|
}
|
|
|
|
func lexBareStr(l *lexer) lexState {
|
|
defer l.emitProcessed(tokenStr, func(s string) (string, error) {
|
|
return strings.TrimSpace(s), nil
|
|
})
|
|
for {
|
|
if strings.HasPrefix(l.input[l.pos:], `,`) {
|
|
return lexText
|
|
}
|
|
switch r := l.next(); {
|
|
case r == -1:
|
|
return lexText
|
|
}
|
|
}
|
|
}
|
|
|
|
func lexDquote(l *lexer) lexState {
|
|
return lexQuote(l, `"`)
|
|
}
|
|
|
|
func lexSquote(l *lexer) lexState {
|
|
return lexQuote(l, `'`)
|
|
}
|
|
|
|
func unescape(s string, quote rune) (string, error) {
|
|
var b strings.Builder
|
|
hitNonSpace := false
|
|
var wb strings.Builder
|
|
for i := 0; i < len(s); {
|
|
r, sz := utf8.DecodeRuneInString(s[i:])
|
|
i += sz
|
|
if unicode.IsSpace(r) {
|
|
if !hitNonSpace {
|
|
continue
|
|
}
|
|
wb.WriteRune(r)
|
|
continue
|
|
}
|
|
hitNonSpace = true
|
|
// If we get here, we're not looking at whitespace.
|
|
// Insert any buffered up whitespace characters from
|
|
// the gap between words.
|
|
b.WriteString(wb.String())
|
|
wb.Reset()
|
|
if r == '\\' {
|
|
r, sz := utf8.DecodeRuneInString(s[i:])
|
|
i += sz
|
|
switch r {
|
|
case '\\', quote:
|
|
b.WriteRune(r)
|
|
default:
|
|
return "", fmt.Errorf("illegal escape sequence \\%c", r)
|
|
}
|
|
} else {
|
|
b.WriteRune(r)
|
|
}
|
|
}
|
|
return b.String(), nil
|
|
}
|
|
|
|
func lexQuote(l *lexer, mark string) lexState {
|
|
escaping := false
|
|
for {
|
|
if isQuote := strings.HasPrefix(l.input[l.pos:], mark); isQuote && !escaping {
|
|
err := l.emitProcessed(tokenStr, func(s string) (string, error) {
|
|
return unescape(s, []rune(mark)[0])
|
|
})
|
|
if err != nil {
|
|
l.err = err
|
|
return nil
|
|
}
|
|
l.next()
|
|
l.ignore()
|
|
return lexText
|
|
}
|
|
escaped := escaping
|
|
switch r := l.next(); {
|
|
case r == -1:
|
|
l.err = fmt.Errorf("unexpected EOF while parsing %s-quoted family", mark)
|
|
return lexText
|
|
case r == '\\':
|
|
if !escaped {
|
|
escaping = true
|
|
}
|
|
}
|
|
if escaped {
|
|
escaping = false
|
|
}
|
|
}
|
|
}
|
|
|
|
type lexer struct {
|
|
input string
|
|
pos int
|
|
tokens []token
|
|
err error
|
|
}
|
|
|
|
func (l *lexer) ignore() {
|
|
l.input = l.input[l.pos:]
|
|
l.pos = 0
|
|
}
|
|
|
|
// next decodes the next rune in the input and returns it.
|
|
func (l *lexer) next() int32 {
|
|
if l.pos >= len(l.input) {
|
|
return -1
|
|
}
|
|
r, w := utf8.DecodeRuneInString(l.input[l.pos:])
|
|
l.pos += w
|
|
return r
|
|
}
|
|
|
|
// emit adds a token of the given kind.
|
|
func (l *lexer) emit(t tokenKind) {
|
|
l.emitProcessed(t, func(s string) (string, error) { return s, nil })
|
|
}
|
|
|
|
// emitProcessed adds a token of the given kind, but transforms its value
|
|
// with the provided closure first.
|
|
func (l *lexer) emitProcessed(t tokenKind, f func(string) (string, error)) error {
|
|
val, err := f(l.input[:l.pos])
|
|
l.tokens = append(l.tokens, token{
|
|
kind: t,
|
|
value: val,
|
|
})
|
|
l.ignore()
|
|
return err
|
|
}
|
|
|
|
// run executes the lexer on the given input.
|
|
func (l *lexer) run(input string) ([]token, error) {
|
|
l.input = input
|
|
l.tokens = l.tokens[:0]
|
|
l.pos = 0
|
|
for state := lexText; state != nil; {
|
|
state = state(l)
|
|
}
|
|
return l.tokens, l.err
|
|
}
|
|
|
|
// parser implements a simple recursive descent parser for font family fallback
|
|
// expressions.
|
|
type parser struct {
|
|
faces []string
|
|
lexer lexer
|
|
tokens []token
|
|
}
|
|
|
|
// parse the provided rule and return the extracted font families. The returned families
|
|
// are valid only until the next call to parse. If parsing fails, an error describing the
|
|
// failure is returned instead.
|
|
func (p *parser) parse(rule string) ([]string, error) {
|
|
var err error
|
|
p.tokens, err = p.lexer.run(rule)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
p.faces = p.faces[:0]
|
|
return p.faces, p.parseList()
|
|
}
|
|
|
|
// parse implements the production:
|
|
//
|
|
// LIST ::= <FACE> <COMMA> <LIST> | <FACE>
|
|
func (p *parser) parseList() error {
|
|
if len(p.tokens) == 0 {
|
|
return fmt.Errorf("expected family name, got EOF")
|
|
}
|
|
if head := p.tokens[0]; head.kind != tokenStr {
|
|
return fmt.Errorf("expected family name, got %s", head)
|
|
} else {
|
|
p.faces = append(p.faces, head.value)
|
|
p.tokens = p.tokens[1:]
|
|
}
|
|
|
|
switch head := p.tokens[0]; head.kind {
|
|
case tokenEOF:
|
|
return nil
|
|
case tokenComma:
|
|
p.tokens = p.tokens[1:]
|
|
return p.parseList()
|
|
default:
|
|
return fmt.Errorf("unexpected token %s", head)
|
|
}
|
|
}
|