Add a proper Markdown parser

pull/6/head
Mickaël Menu 3 years ago
parent a691a8857c
commit 29688186f2
No known key found for this signature in database
GPG Key ID: 53D73664CD359895

@ -0,0 +1,99 @@
package markdown
import (
"bufio"
"strings"
"github.com/mickael-menu/zk/core/note"
"github.com/mickael-menu/zk/util/opt"
"github.com/yuin/goldmark"
"github.com/yuin/goldmark/ast"
"github.com/yuin/goldmark/text"
)
// Parser parses the content of Markdown notes.
type Parser struct {
md goldmark.Markdown
}
// NewParser creates a new Markdown Parser.
func NewParser() *Parser {
return &Parser{
md: goldmark.New(),
}
}
// Parse implements note.Parse.
func (p *Parser) Parse(source string) (note.Content, error) {
out := note.Content{}
bytes := []byte(source)
root := p.md.Parser().Parse(text.NewReader(bytes))
title, titleNode, err := parseTitle(root, bytes)
if err != nil {
return out, err
}
out.Title = title
out.Body = parseBody(titleNode, bytes)
out.Lead = parseLead(out.Body)
return out, nil
}
// parseTitle extracts the note title with its node.
func parseTitle(root ast.Node, source []byte) (title opt.String, node ast.Node, err error) {
var titleNode *ast.Heading
err = ast.Walk(root, func(n ast.Node, entering bool) (ast.WalkStatus, error) {
if heading, ok := n.(*ast.Heading); ok && entering &&
(titleNode == nil || heading.Level < titleNode.Level) {
titleNode = heading
if heading.Level == 1 {
return ast.WalkStop, nil
}
}
return ast.WalkContinue, nil
})
if err != nil {
return
}
if titleNode != nil {
node = titleNode
title = opt.NewNotEmptyString(string(titleNode.Text(source)))
}
return
}
// parseBody extracts the whole content after the title.
func parseBody(titleNode ast.Node, source []byte) opt.String {
start := 0
if titleNode != nil {
if lines := titleNode.Lines(); lines.Len() > 0 {
start = lines.At(lines.Len() - 1).Stop
}
}
return opt.NewNotEmptyString(
strings.TrimSpace(
string(source[start:]),
),
)
}
// parseLead extracts the body content until the first blank line.
func parseLead(body opt.String) opt.String {
lead := ""
scanner := bufio.NewScanner(strings.NewReader(body.String()))
for scanner.Scan() {
if strings.TrimSpace(scanner.Text()) == "" {
break
}
lead += scanner.Text() + "\n"
}
return opt.NewNotEmptyString(strings.TrimSpace(lead))
}

@ -0,0 +1,138 @@
package markdown
import (
"testing"
"github.com/mickael-menu/zk/core/note"
"github.com/mickael-menu/zk/util/opt"
"github.com/mickael-menu/zk/util/test/assert"
)
func TestParseTitle(t *testing.T) {
test := func(source string, expectedTitle string) {
content := parse(t, source)
assert.Equal(t, content.Title, opt.NewNotEmptyString(expectedTitle))
}
test("", "")
test("# ", "")
test("#A title", "")
test(" # A title", "A title")
test("# A title", "A title")
test("# A title ", "A title")
test("## A title", "A title")
test("Paragraph \n\n## A title\nBody", "A title")
test("# Heading 1\n## Heading 1.a\n# Heading 2", "Heading 1")
test("## Small Heading\n# Bigger Heading", "Bigger Heading")
test("# A **title** with [formatting](http://stripped)", "A title with formatting")
}
func TestParseBody(t *testing.T) {
test := func(source string, expectedBody string) {
content := parse(t, source)
assert.Equal(t, content.Body, opt.NewNotEmptyString(expectedBody))
}
test("", "")
test("# A title\n \n", "")
test("Paragraph \n\n# A title", "")
test("Paragraph \n\n# A title\nBody", "Body")
test(
`## Small Heading
# Bigger Heading
## Smaller Heading
Body
several lines
# Body heading
Paragraph:
* item1
* item2
`,
`## Smaller Heading
Body
several lines
# Body heading
Paragraph:
* item1
* item2`,
)
}
func TestParseLead(t *testing.T) {
test := func(source string, expectedLead string) {
content := parse(t, source)
assert.Equal(t, content.Lead, opt.NewNotEmptyString(expectedLead))
}
test("", "")
test("# A title\n \n", "")
test(
`Paragraph
# A title`,
"",
)
test(
`Paragraph
# A title
Lead`,
"Lead",
)
test(
`# A title
Lead
multiline
other`,
"Lead\nmultiline",
)
test(
`# A title
Lead
multiline
## Heading`,
"Lead\nmultiline",
)
test(
`# A title
## Heading
Lead
multiline
other`,
`## Heading
Lead
multiline`,
)
test(
`# A title
* item1
* item2
Paragraph`,
`* item1
* item2`,
)
}
func parse(t *testing.T, source string) note.Content {
content, err := NewParser().Parse(source)
assert.Nil(t, err)
return content
}

@ -4,6 +4,7 @@ import (
"io"
"github.com/mickael-menu/zk/adapter/handlebars"
"github.com/mickael-menu/zk/adapter/markdown"
"github.com/mickael-menu/zk/adapter/sqlite"
"github.com/mickael-menu/zk/adapter/tty"
"github.com/mickael-menu/zk/core/zk"
@ -37,6 +38,10 @@ func (c *Container) TemplateLoader(lang string) *handlebars.Loader {
return c.templateLoader
}
func (c *Container) Parser() *markdown.Parser {
return markdown.NewParser()
}
// Database returns the DB instance for the given slip box, after executing any
// pending migration.
func (c *Container) Database(path string) (*sqlite.DB, error) {

@ -29,6 +29,6 @@ func (cmd *Index) Run(container *Container) error {
return db.WithTransaction(func(tx sqlite.Transaction) error {
notes := sqlite.NewNoteDAO(tx, container.Logger)
return note.Index(*dir, notes, container.Logger)
return note.Index(*dir, container.Parser(), notes, container.Logger)
})
}

@ -51,7 +51,7 @@ type Indexer interface {
}
// Index indexes the content of the notes in the given directory.
func Index(dir zk.Dir, indexer Indexer, logger util.Logger) error {
func Index(dir zk.Dir, parser Parser, indexer Indexer, logger util.Logger) error {
wrap := errors.Wrapper("indexation failed")
source := paths.Walk(dir.Path, dir.Config.Extension, logger)
@ -63,14 +63,14 @@ func Index(dir zk.Dir, indexer Indexer, logger util.Logger) error {
err = paths.Diff(source, target, func(change paths.DiffChange) error {
switch change.Kind {
case paths.DiffAdded:
metadata, err := metadata(change.Path, dir.Path)
metadata, err := metadata(change.Path, dir.Path, parser)
if err == nil {
err = indexer.Add(metadata)
}
logger.Err(err)
case paths.DiffModified:
metadata, err := metadata(change.Path, dir.Path)
metadata, err := metadata(change.Path, dir.Path, parser)
if err == nil {
err = indexer.Update(metadata)
}
@ -87,7 +87,7 @@ func Index(dir zk.Dir, indexer Indexer, logger util.Logger) error {
}
// metadata retrieves note metadata for the given file.
func metadata(path string, basePath string) (Metadata, error) {
func metadata(path string, basePath string, parser Parser) (Metadata, error) {
metadata := Metadata{
Path: path,
}
@ -98,9 +98,12 @@ func metadata(path string, basePath string) (Metadata, error) {
return metadata, err
}
contentStr := string(content)
contentParts := Parse(contentStr)
metadata.Title = contentParts.Title
metadata.Body = contentParts.Body
contentParts, err := parser.Parse(contentStr)
if err != nil {
return metadata, err
}
metadata.Title = contentParts.Title.String()
metadata.Body = contentParts.Body.String()
metadata.WordCount = len(strings.Fields(contentStr))
metadata.Checksum = fmt.Sprintf("%x", sha256.Sum256(content))

@ -1,26 +1,18 @@
package note
import (
"regexp"
"strings"
"github.com/mickael-menu/zk/util/opt"
)
type Content struct {
Title string
Body string
// Title is the heading of the note.
Title opt.String
// Lead is the opening paragraph or section of the note.
Lead opt.String
// Body is the content of the note, including the Lead but without the Title.
Body opt.String
}
var contentRegex = regexp.MustCompile(`(?m)^#\s+(.+?)\s*$`)
func Parse(content string) Content {
var res Content
if match := contentRegex.FindStringSubmatchIndex(content); len(match) >= 4 {
res.Title = content[match[2]:match[3]]
res.Body = strings.TrimSpace(content[match[3]:])
} else {
res.Body = strings.TrimSpace(content)
}
return res
type Parser interface {
Parse(source string) (Content, error)
}

@ -20,6 +20,7 @@ require (
github.com/rvflash/elapsed v0.2.0
github.com/tebeka/strftime v0.1.5 // indirect
github.com/tj/go-naturaldate v1.3.0
github.com/yuin/goldmark v1.3.1
golang.org/x/sys v0.0.0-20210104204734-6f8348627aad // indirect
gopkg.in/djherbis/times.v1 v1.2.0
gopkg.in/yaml.v2 v2.4.0 // indirect

@ -151,6 +151,8 @@ github.com/tj/assert v0.0.0-20190920132354-ee03d75cd160/go.mod h1:mZ9/Rh9oLWpLLD
github.com/tj/go-naturaldate v1.3.0 h1:OgJIPkR/Jk4bFMBLbxZ8w+QUxwjqSvzd9x+yXocY4RI=
github.com/tj/go-naturaldate v1.3.0/go.mod h1:rpUbjivDKiS1BlfMGc2qUKNZ/yxgthOfmytQs8d8hKk=
github.com/vmihailenco/msgpack v3.3.3+incompatible/go.mod h1:fy3FlTQTDXWkZ7Bh6AcGMlsjHatGryHQYUTf1ShIgkk=
github.com/yuin/goldmark v1.3.1 h1:eVwehsLsZlCJCwXyGLgg+Q4iFWE/eTIMG0e8waCmm/I=
github.com/yuin/goldmark v1.3.1/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/zclconf/go-cty v1.2.0 h1:sPHsy7ADcIZQP3vILvTjrh74ZA175TFP5vqiNK1UmlI=
github.com/zclconf/go-cty v1.2.0/go.mod h1:hOPWgoHbaTUnI5k4D2ld+GRpFJSCe6bCM7m1q/N4PQ8=
github.com/zenazn/goji v0.9.0/go.mod h1:7S9M489iMyHBNxwZnk9/EHS098H4/F6TATF2mIxtB1Q=

Loading…
Cancel
Save