You cannot select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
zk/internal/adapter/markdown/extensions/tag.go

279 lines
6.5 KiB
Go

package extensions
import (
"strings"
"unicode"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
gast "github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/parser"
"github.com/yuin/goldmark/text"
"github.com/yuin/goldmark/util"
)
// Tags represents a list of inline tags in a Markdown document.
type Tags struct {
gast.BaseInline
// Tags in this list.
Tags []string
}
func (n *Tags) Dump(source []byte, level int) {
m := map[string]string{}
m["Tags"] = strings.Join(n.Tags, ", ")
gast.DumpHelper(n, source, level, m, nil)
}
// KindTags is a NodeKind of the Tags node.
var KindTags = gast.NewNodeKind("Tags")
func (n *Tags) Kind() gast.NodeKind {
return KindTags
}
// TagExt is an extension parsing various flavors of tags.
//
// * #hashtags, including Bear's #multi words# tags
// * :colon:separated:tags:`, e.g. vimwiki and Org mode
//
// Are authorized in a tag:
// * unicode categories [L]etter and [N]umber
// * / @ ' ~ - _ $ % & + = and when possible # :
// * any character escaped with \, including whitespace
type TagExt struct {
// Indicates whether #hashtags are parsed.
HashtagEnabled bool
// Indicates whether Bear's multi-word tags are parsed. Hashtags must be enabled as well.
MultiWordTagEnabled bool
// Indicates whether :colon:tags: are parsed.
ColontagEnabled bool
}
func (t *TagExt) Extend(m goldmark.Markdown) {
parsers := []util.PrioritizedValue{}
if t.HashtagEnabled {
parsers = append(parsers, util.Prioritized(&hashtagParser{
multiWordTagEnabled: t.MultiWordTagEnabled,
}, 2000))
}
if t.ColontagEnabled {
parsers = append(parsers, util.Prioritized(&colontagParser{}, 2000))
}
if len(parsers) > 0 {
m.Parser().AddOptions(parser.WithInlineParsers(parsers...))
}
}
// hashtagParser parses #hashtags, including Bear's #multi words# tags
type hashtagParser struct {
multiWordTagEnabled bool
}
func (p *hashtagParser) Trigger() []byte {
return []byte{'#'}
}
func (p *hashtagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
previousChar := block.PrecendingCharacter()
line, _ := block.PeekLine()
// A hashtag can't be directly preceded by a # or any other valid character.
if isValidTagChar(previousChar, '\x00') {
return nil
}
var (
tag string // Accumulator for the hashtag
multiWordTagCandidate string // Accumulator for a potential Bear multi-word tag
)
var (
escaping = false // Found a backslash, next character will be literal
parsingMultiWordTag = false // Finished parsing a hashtag, now attempt parsing a Bear multi-word tag
endPos = 0 // Last position of the tag in the line
multiWordTagEndPos = 0 // Last position of the multi-word tag in the line
)
appendChar := func(c rune) {
if parsingMultiWordTag {
multiWordTagCandidate += string(c)
} else {
tag += string(c)
}
}
for i, char := range string(line) {
if i == 0 {
// Skip the first character, as it is #
continue
}
if parsingMultiWordTag {
multiWordTagEndPos = i
} else {
endPos = i
}
if escaping {
// Currently escaping? The character will be appended literally.
appendChar(char)
escaping = false
} else if char == '\\' {
// Found a backslash, next character will be escaped.
escaping = true
} else if parsingMultiWordTag {
// Parsing a multi-word tag candidate.
if isValidTagChar(char, '#') || unicode.IsSpace(char) {
appendChar(char)
} else if char == '#' {
// A valid multi-word tag must not have a space before the closing #.
if !unicode.IsSpace(previousChar) {
tag = multiWordTagCandidate
endPos = multiWordTagEndPos
}
break
}
previousChar = char
} else if !p.multiWordTagEnabled && char == '#' {
// A tag terminated with a # is invalid when not in a multi-word tag.
return nil
} else if p.multiWordTagEnabled && unicode.IsSpace(char) {
// Found a space, let's try to parse a multi-word tag.
previousChar = char
multiWordTagCandidate = tag
parsingMultiWordTag = true
appendChar(char)
} else if !isValidTagChar(char, '#') {
// Found an invalid character, the hashtag is complete.
break
} else {
appendChar(char)
}
}
tag = strings.TrimSpace(tag)
if len(tag) == 0 || !isValidHashTag(tag) {
return nil
}
block.Advance(endPos)
return &Tags{
BaseInline: gast.BaseInline{},
Tags: []string{tag},
}
}
func isValidHashTag(tag string) bool {
for _, char := range tag {
if !unicode.IsNumber(char) {
return true
}
}
return false
}
// colontagParser parses :colon:separated:tags:.
type colontagParser struct{}
func (p *colontagParser) Trigger() []byte {
return []byte{':'}
}
func (p *colontagParser) Parse(parent ast.Node, block text.Reader, pc parser.Context) ast.Node {
previousChar := block.PrecendingCharacter()
line, _ := block.PeekLine()
// A colontag can't be directly preceded by a : or any other valid character.
if isValidTagChar(previousChar, '\x00') {
return nil
}
var (
tag string // Accumulator for the current colontag
tags = []string{} // All colontags found
)
var (
escaping = false // Found a backslash, next character will be literal
endPos = 0 // Last position of the colontags in the line
)
appendChar := func(c rune) {
tag += string(c)
}
for i, char := range string(line[1:]) {
endPos = i
if escaping {
// Currently escaping? The character will be appended literally.
appendChar(char)
escaping = false
} else if char == '\\' {
// Found a backslash, next character will be escaped.
escaping = true
} else if char == ':' {
tag = strings.TrimSpace(tag)
if !isValidTag(tag) {
break
}
tags = append(tags, tag)
tag = ""
} else if !isValidTagChar(char, ':') {
// Found an invalid character, the colontag is complete.
break
} else {
appendChar(char)
}
}
if len(tags) == 0 {
return nil
}
block.Advance(endPos)
return &Tags{
BaseInline: gast.BaseInline{},
Tags: tags,
}
}
func isValidTagChar(r rune, excluded rune) bool {
return r != excluded && (unicode.IsLetter(r) || unicode.IsNumber(r) ||
r == '/' || r == '@' || r == '\'' || r == '~' ||
r == '-' || r == '_' || r == '$' || r == '%' ||
r == '&' || r == '+' || r == '=' || r == ':' ||
r == '#')
}
func isValidTag(tag string) bool {
if len(tag) == 0 {
return false
}
// Prevent Markdown table syntax to be parsed a a colon tag, e.g. |:---:|
// https://github.com/zk-org/zk/issues/185
for _, c := range tag {
if c != '-' {
return true
}
}
return false
}