diff --git a/font/font.go b/font/font.go index 03d2485c..c81c517d 100644 --- a/font/font.go +++ b/font/font.go @@ -22,7 +22,8 @@ type Weight int // Font specify a particular typeface variant, style and weight. type Font struct { - // Typeface specifies the name of the family of faces. + // Typeface specifies the name(s) of the the font faces to try. See [Typeface] + // for details. Typeface Typeface // Style specifies the kind of text style. Style Style @@ -36,8 +37,39 @@ type Face interface { Face() font.Face } -// Typeface identifies a particular typeface design. The empty -// string denotes the default typeface. +// Typeface identifies a list of font families to attempt to use for displaying +// a string. The syntax is a comma-delimited list of family names. In order to +// allow for the remote possibility of needing to express a font family name +// containing a comma, name entries may be quoted using either single or double +// quotes. Within quotes, a literal quotation mark can be expressed by escaping +// it with `\`. A literal backslash may be expressed by escaping it with another +// `\`. +// +// Here's an example Typeface: +// +// Times New Roman, Georgia, serif +// +// This is equivalent to the above: +// +// "Times New Roman", 'Georgia', serif +// +// Here are some valid uses of escape sequences: +// +// "Contains a literal \" doublequote", 'Literal \' Singlequote', "\\ Literal backslash", '\\ another' +// +// This syntax has the happy side effect that most CSS "font-family" rules are +// valid Typefaces (without the trailing semicolon). +// +// Generic CSS font families are supported, and are automatically expanded to lists +// of known font families with a matching style. The supported generic families are: +// +// - fantasy +// - math +// - emoji +// - serif +// - sans-serif +// - cursive +// - monospace type Typeface string const ( diff --git a/text/family_parser.go b/text/family_parser.go new file mode 100644 index 00000000..544235b5 --- /dev/null +++ b/text/family_parser.go @@ -0,0 +1,246 @@ +package text + +import ( + "fmt" + "strings" + "unicode" + "unicode/utf8" +) + +type tokenKind uint8 + +const ( + tokenStr tokenKind = iota + tokenComma + tokenEOF +) + +type token struct { + kind tokenKind + value string +} + +func (t token) String() string { + switch t.kind { + case tokenStr: + return t.value + case tokenComma: + return "," + case tokenEOF: + return "EOF" + default: + return "unknown" + } +} + +type lexState func(*lexer) lexState + +func lexText(l *lexer) lexState { + for { + switch r := l.next(); { + case r == -1: + l.ignore() + l.emit(tokenEOF) + return nil + case unicode.IsSpace(r): + continue + case r == ',': + l.ignore() + l.emit(tokenComma) + case r == '"': + l.ignore() + return lexDquote + case r == '\'': + l.ignore() + return lexSquote + default: + return lexBareStr + } + } +} + +func lexBareStr(l *lexer) lexState { + defer l.emitProcessed(tokenStr, func(s string) (string, error) { + return strings.TrimSpace(s), nil + }) + for { + if strings.HasPrefix(l.input[l.pos:], `,`) { + return lexText + } + switch r := l.next(); { + case r == -1: + return lexText + } + } +} + +func lexDquote(l *lexer) lexState { + return lexQuote(l, `"`) +} + +func lexSquote(l *lexer) lexState { + return lexQuote(l, `'`) +} + +func unescape(s string, quote rune) (string, error) { + var b strings.Builder + hitNonSpace := false + var wb strings.Builder + for i := 0; i < len(s); { + r, sz := utf8.DecodeRuneInString(s[i:]) + i += sz + if unicode.IsSpace(r) { + if !hitNonSpace { + continue + } + wb.WriteRune(r) + continue + } + hitNonSpace = true + // If we get here, we're not looking at whitespace. + // Insert any buffered up whitespace characters from + // the gap between words. + b.WriteString(wb.String()) + wb.Reset() + if r == '\\' { + r, sz := utf8.DecodeRuneInString(s[i:]) + i += sz + switch r { + case '\\', quote: + b.WriteRune(r) + default: + return "", fmt.Errorf("illegal escape sequence \\%c", r) + } + } else { + b.WriteRune(r) + } + } + return b.String(), nil +} + +func lexQuote(l *lexer, mark string) lexState { + escaping := false + for { + if isQuote := strings.HasPrefix(l.input[l.pos:], mark); isQuote && !escaping { + err := l.emitProcessed(tokenStr, func(s string) (string, error) { + return unescape(s, []rune(mark)[0]) + }) + if err != nil { + l.err = err + return nil + } + l.next() + l.ignore() + return lexText + } + escaped := escaping + switch r := l.next(); { + case r == -1: + l.err = fmt.Errorf("unexpected EOF while parsing %s-quoted family", mark) + return lexText + case r == '\\': + if !escaped { + escaping = true + } + } + if escaped { + escaping = false + } + } +} + +type lexer struct { + input string + pos int + tokens []token + err error +} + +func (l *lexer) ignore() { + l.input = l.input[l.pos:] + l.pos = 0 +} + +// next decodes the next rune in the input and returns it. +func (l *lexer) next() int32 { + if l.pos >= len(l.input) { + return -1 + } + r, w := utf8.DecodeRuneInString(l.input[l.pos:]) + l.pos += w + return r +} + +// emit adds a token of the given kind. +func (l *lexer) emit(t tokenKind) { + l.emitProcessed(t, func(s string) (string, error) { return s, nil }) +} + +// emitProcessed adds a token of the given kind, but transforms its value +// with the provided closure first. +func (l *lexer) emitProcessed(t tokenKind, f func(string) (string, error)) error { + val, err := f(l.input[:l.pos]) + l.tokens = append(l.tokens, token{ + kind: t, + value: val, + }) + l.ignore() + return err +} + +// run executes the lexer on the given input. +func (l *lexer) run(input string) ([]token, error) { + l.input = input + l.tokens = l.tokens[:0] + l.pos = 0 + for state := lexText; state != nil; { + state = state(l) + } + return l.tokens, l.err +} + +// parser implements a simple recursive descent parser for font family fallback +// expressions. +type parser struct { + faces []string + lexer lexer + tokens []token +} + +// parse the provided rule and return the extracted font families. The returned families +// are valid only until the next call to parse. If parsing fails, an error describing the +// failure is returned instead. +func (p *parser) parse(rule string) ([]string, error) { + var err error + p.tokens, err = p.lexer.run(rule) + if err != nil { + return nil, err + } + p.faces = p.faces[:0] + return p.faces, p.parseList() +} + +// parse implements the production: +// +// LIST ::= | +func (p *parser) parseList() error { + if len(p.tokens) < 0 { + return fmt.Errorf("expected family name, got EOF") + } + if head := p.tokens[0]; head.kind != tokenStr { + return fmt.Errorf("expected family name, got %s", head) + } else { + p.faces = append(p.faces, head.value) + p.tokens = p.tokens[1:] + } + + switch head := p.tokens[0]; head.kind { + case tokenEOF: + return nil + case tokenComma: + p.tokens = p.tokens[1:] + return p.parseList() + default: + return fmt.Errorf("unexpected token %s", head) + } +} diff --git a/text/family_parser_test.go b/text/family_parser_test.go new file mode 100644 index 00000000..f6cfad65 --- /dev/null +++ b/text/family_parser_test.go @@ -0,0 +1,179 @@ +package text + +import ( + "testing" + + "golang.org/x/exp/slices" +) + +func TestParser(t *testing.T) { + type scenario struct { + variantName string + input string + } + type testcase struct { + name string + inputs []scenario + expected []string + shouldErr bool + } + + for _, tc := range []testcase{ + { + name: "empty", + inputs: []scenario{ + { + variantName: "", + }, + }, + shouldErr: true, + }, + { + name: "comma failure", + inputs: []scenario{ + { + variantName: "bare single", + input: ",", + }, + { + variantName: "bare multiple", + input: ",, ,,", + }, + }, + shouldErr: true, + }, + { + name: "comma success", + inputs: []scenario{ + { + variantName: "squote", + input: "','", + }, + { + variantName: "dquote", + input: `","`, + }, + }, + expected: []string{","}, + }, + { + name: "comma success multiple", + inputs: []scenario{ + { + variantName: "squote", + input: "',,', ',,'", + }, + { + variantName: "dquote", + input: `",,", ",,"`, + }, + }, + expected: []string{",,", ",,"}, + }, + { + name: "backslashes", + inputs: []scenario{ + { + variantName: "bare", + input: `\font\\`, + }, + { + variantName: "dquote", + input: `"\\font\\\\"`, + }, + { + variantName: "squote", + input: `'\\font\\\\'`, + }, + }, + expected: []string{`\font\\`}, + }, + { + name: "invalid backslashes", + inputs: []scenario{ + { + variantName: "dquote", + input: `"\\""`, + }, + { + variantName: "squote", + input: `'\\''`, + }, + }, + shouldErr: true, + }, + { + name: "too many quotes", + inputs: []scenario{ + { + variantName: "dquote", + input: `"""`, + }, + { + variantName: "squote", + input: `'''`, + }, + }, + shouldErr: true, + }, + { + name: "serif serif's serif\"s", + inputs: []scenario{ + { + variantName: "bare", + input: `serif, serif's, serif"s`, + }, + { + variantName: "squote", + input: `'serif', 'serif\'s', 'serif"s'`, + }, + { + variantName: "dquote", + input: `"serif", "serif's", "serif\"s"`, + }, + }, + expected: []string{"serif", `serif's`, `serif"s`}, + }, + { + name: "complex list", + inputs: []scenario{ + { + variantName: "bare", + input: `Times New Roman, Georgia Common, Helvetica Neue, serif`, + }, + { + variantName: "squote", + input: `'Times New Roman', 'Georgia Common', 'Helvetica Neue', 'serif'`, + }, + { + variantName: "dquote", + input: `"Times New Roman", "Georgia Common", "Helvetica Neue", "serif"`, + }, + { + variantName: "mixed", + input: `Times New Roman, "Georgia Common", 'Helvetica Neue', "serif"`, + }, + { + variantName: "mixed with weird spacing", + input: `Times New Roman ,"Georgia Common" , 'Helvetica Neue' ,"serif"`, + }, + }, + expected: []string{"Times New Roman", "Georgia Common", "Helvetica Neue", "serif"}, + }, + } { + t.Run(tc.name, func(t *testing.T) { + var p parser + for _, scen := range tc.inputs { + t.Run(scen.variantName, func(t *testing.T) { + actual, err := p.parse(scen.input) + if (err != nil) != tc.shouldErr { + t.Errorf("unexpected error state: %v", err) + } + if !slices.Equal(tc.expected, actual) { + t.Errorf("expected\n%q\ngot\n%q", tc.expected, actual) + } + }) + } + }) + } +} diff --git a/text/gotext.go b/text/gotext.go index 00b039e1..3fb0994f 100644 --- a/text/gotext.go +++ b/text/gotext.go @@ -168,6 +168,7 @@ type shaperImpl struct { logger interface { Printf(format string, args ...any) } + parser parser // Shaping and wrapping state. shaper shaping.HarfbuzzShaper @@ -442,8 +443,17 @@ func (s *shaperImpl) shapeAndWrapText(params Parameters, txt []rune) (_ []shapin TextContinues: params.forceTruncate, BreakPolicy: wrapPolicyToGoText(params.WrapPolicy), } + families := s.defaultFaces + if params.Font.Typeface != "" { + parsed, err := s.parser.parse(string(params.Font.Typeface)) + if err != nil { + s.logger.Printf("Unable to parse typeface %q: %v", params.Font.Typeface, err) + } else { + families = parsed + } + } s.fontMap.SetQuery(fontscan.Query{ - Families: []string{string(params.Font.Typeface)}, + Families: families, Aspect: opentype.FontToDescription(params.Font).Aspect, }) if wc.TruncateAfterLines > 0 {