mirror of https://github.com/mickael-menu/zk
Add a proper Markdown parser
parent
a691a8857c
commit
29688186f2
@ -0,0 +1,99 @@
|
||||
package markdown
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"strings"
|
||||
|
||||
"github.com/mickael-menu/zk/core/note"
|
||||
"github.com/mickael-menu/zk/util/opt"
|
||||
"github.com/yuin/goldmark"
|
||||
"github.com/yuin/goldmark/ast"
|
||||
"github.com/yuin/goldmark/text"
|
||||
)
|
||||
|
||||
// Parser parses the content of Markdown notes.
|
||||
type Parser struct {
|
||||
md goldmark.Markdown
|
||||
}
|
||||
|
||||
// NewParser creates a new Markdown Parser.
|
||||
func NewParser() *Parser {
|
||||
return &Parser{
|
||||
md: goldmark.New(),
|
||||
}
|
||||
}
|
||||
|
||||
// Parse implements note.Parse.
|
||||
func (p *Parser) Parse(source string) (note.Content, error) {
|
||||
out := note.Content{}
|
||||
|
||||
bytes := []byte(source)
|
||||
root := p.md.Parser().Parse(text.NewReader(bytes))
|
||||
|
||||
title, titleNode, err := parseTitle(root, bytes)
|
||||
if err != nil {
|
||||
return out, err
|
||||
}
|
||||
|
||||
out.Title = title
|
||||
out.Body = parseBody(titleNode, bytes)
|
||||
out.Lead = parseLead(out.Body)
|
||||
|
||||
return out, nil
|
||||
}
|
||||
|
||||
// parseTitle extracts the note title with its node.
|
||||
func parseTitle(root ast.Node, source []byte) (title opt.String, node ast.Node, err error) {
|
||||
var titleNode *ast.Heading
|
||||
err = ast.Walk(root, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
|
||||
if heading, ok := n.(*ast.Heading); ok && entering &&
|
||||
(titleNode == nil || heading.Level < titleNode.Level) {
|
||||
|
||||
titleNode = heading
|
||||
if heading.Level == 1 {
|
||||
return ast.WalkStop, nil
|
||||
}
|
||||
}
|
||||
|
||||
return ast.WalkContinue, nil
|
||||
})
|
||||
if err != nil {
|
||||
return
|
||||
}
|
||||
|
||||
if titleNode != nil {
|
||||
node = titleNode
|
||||
title = opt.NewNotEmptyString(string(titleNode.Text(source)))
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
// parseBody extracts the whole content after the title.
|
||||
func parseBody(titleNode ast.Node, source []byte) opt.String {
|
||||
start := 0
|
||||
if titleNode != nil {
|
||||
if lines := titleNode.Lines(); lines.Len() > 0 {
|
||||
start = lines.At(lines.Len() - 1).Stop
|
||||
}
|
||||
}
|
||||
|
||||
return opt.NewNotEmptyString(
|
||||
strings.TrimSpace(
|
||||
string(source[start:]),
|
||||
),
|
||||
)
|
||||
}
|
||||
|
||||
// parseLead extracts the body content until the first blank line.
|
||||
func parseLead(body opt.String) opt.String {
|
||||
lead := ""
|
||||
scanner := bufio.NewScanner(strings.NewReader(body.String()))
|
||||
for scanner.Scan() {
|
||||
if strings.TrimSpace(scanner.Text()) == "" {
|
||||
break
|
||||
}
|
||||
lead += scanner.Text() + "\n"
|
||||
}
|
||||
|
||||
return opt.NewNotEmptyString(strings.TrimSpace(lead))
|
||||
}
|
@ -0,0 +1,138 @@
|
||||
package markdown
|
||||
|
||||
import (
|
||||
"testing"
|
||||
|
||||
"github.com/mickael-menu/zk/core/note"
|
||||
"github.com/mickael-menu/zk/util/opt"
|
||||
"github.com/mickael-menu/zk/util/test/assert"
|
||||
)
|
||||
|
||||
func TestParseTitle(t *testing.T) {
|
||||
test := func(source string, expectedTitle string) {
|
||||
content := parse(t, source)
|
||||
assert.Equal(t, content.Title, opt.NewNotEmptyString(expectedTitle))
|
||||
}
|
||||
|
||||
test("", "")
|
||||
test("# ", "")
|
||||
test("#A title", "")
|
||||
test(" # A title", "A title")
|
||||
test("# A title", "A title")
|
||||
test("# A title ", "A title")
|
||||
test("## A title", "A title")
|
||||
test("Paragraph \n\n## A title\nBody", "A title")
|
||||
test("# Heading 1\n## Heading 1.a\n# Heading 2", "Heading 1")
|
||||
test("## Small Heading\n# Bigger Heading", "Bigger Heading")
|
||||
test("# A **title** with [formatting](http://stripped)", "A title with formatting")
|
||||
}
|
||||
|
||||
func TestParseBody(t *testing.T) {
|
||||
test := func(source string, expectedBody string) {
|
||||
content := parse(t, source)
|
||||
assert.Equal(t, content.Body, opt.NewNotEmptyString(expectedBody))
|
||||
}
|
||||
|
||||
test("", "")
|
||||
test("# A title\n \n", "")
|
||||
test("Paragraph \n\n# A title", "")
|
||||
test("Paragraph \n\n# A title\nBody", "Body")
|
||||
|
||||
test(
|
||||
`## Small Heading
|
||||
# Bigger Heading
|
||||
|
||||
## Smaller Heading
|
||||
Body
|
||||
several lines
|
||||
# Body heading
|
||||
|
||||
Paragraph:
|
||||
|
||||
* item1
|
||||
* item2
|
||||
|
||||
`,
|
||||
`## Smaller Heading
|
||||
Body
|
||||
several lines
|
||||
# Body heading
|
||||
|
||||
Paragraph:
|
||||
|
||||
* item1
|
||||
* item2`,
|
||||
)
|
||||
}
|
||||
|
||||
func TestParseLead(t *testing.T) {
|
||||
test := func(source string, expectedLead string) {
|
||||
content := parse(t, source)
|
||||
assert.Equal(t, content.Lead, opt.NewNotEmptyString(expectedLead))
|
||||
}
|
||||
|
||||
test("", "")
|
||||
test("# A title\n \n", "")
|
||||
|
||||
test(
|
||||
`Paragraph
|
||||
# A title`,
|
||||
"",
|
||||
)
|
||||
|
||||
test(
|
||||
`Paragraph
|
||||
# A title
|
||||
Lead`,
|
||||
"Lead",
|
||||
)
|
||||
|
||||
test(
|
||||
`# A title
|
||||
Lead
|
||||
multiline
|
||||
|
||||
other`,
|
||||
"Lead\nmultiline",
|
||||
)
|
||||
|
||||
test(
|
||||
`# A title
|
||||
|
||||
Lead
|
||||
multiline
|
||||
|
||||
## Heading`,
|
||||
"Lead\nmultiline",
|
||||
)
|
||||
|
||||
test(
|
||||
`# A title
|
||||
|
||||
## Heading
|
||||
Lead
|
||||
multiline
|
||||
|
||||
other`,
|
||||
`## Heading
|
||||
Lead
|
||||
multiline`,
|
||||
)
|
||||
|
||||
test(
|
||||
`# A title
|
||||
|
||||
* item1
|
||||
* item2
|
||||
|
||||
Paragraph`,
|
||||
`* item1
|
||||
* item2`,
|
||||
)
|
||||
}
|
||||
|
||||
func parse(t *testing.T, source string) note.Content {
|
||||
content, err := NewParser().Parse(source)
|
||||
assert.Nil(t, err)
|
||||
return content
|
||||
}
|
@ -1,26 +1,18 @@
|
||||
package note
|
||||
|
||||
import (
|
||||
"regexp"
|
||||
"strings"
|
||||
"github.com/mickael-menu/zk/util/opt"
|
||||
)
|
||||
|
||||
type Content struct {
|
||||
Title string
|
||||
Body string
|
||||
// Title is the heading of the note.
|
||||
Title opt.String
|
||||
// Lead is the opening paragraph or section of the note.
|
||||
Lead opt.String
|
||||
// Body is the content of the note, including the Lead but without the Title.
|
||||
Body opt.String
|
||||
}
|
||||
|
||||
var contentRegex = regexp.MustCompile(`(?m)^#\s+(.+?)\s*$`)
|
||||
|
||||
func Parse(content string) Content {
|
||||
var res Content
|
||||
|
||||
if match := contentRegex.FindStringSubmatchIndex(content); len(match) >= 4 {
|
||||
res.Title = content[match[2]:match[3]]
|
||||
res.Body = strings.TrimSpace(content[match[3]:])
|
||||
} else {
|
||||
res.Body = strings.TrimSpace(content)
|
||||
}
|
||||
|
||||
return res
|
||||
type Parser interface {
|
||||
Parse(source string) (Content, error)
|
||||
}
|
||||
|
Loading…
Reference in New Issue