From 05738525294fd5db6686e7dbdef71199da196b1d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mickae=CC=88l=20Menu?= Date: Sun, 3 Jan 2021 14:43:27 +0100 Subject: [PATCH] Index notes content --- adapter/sqlite/db.go | 22 +++---- adapter/sqlite/indexer.go | 123 ++++++++++++++++++++++++++++++++++++++ cmd/container.go | 5 +- cmd/index.go | 43 +++++++++++++ core/note/note.go | 8 --- core/zk/dir.go | 5 +- core/zk/dir_test.go | 20 +++---- core/zk/index.go | 93 ++++++++++++++++++++++++++++ core/zk/zk.go | 1 + go.mod | 1 + go.sum | 2 + main.go | 1 + util/logger.go | 23 +++++++ 13 files changed, 312 insertions(+), 35 deletions(-) create mode 100644 adapter/sqlite/indexer.go create mode 100644 cmd/index.go delete mode 100644 core/note/note.go create mode 100644 core/zk/index.go diff --git a/adapter/sqlite/db.go b/adapter/sqlite/db.go index a132c0b..1188b32 100644 --- a/adapter/sqlite/db.go +++ b/adapter/sqlite/db.go @@ -9,7 +9,7 @@ import ( // DB holds the connections to a SQLite database. type DB struct { - db *sql.DB + *sql.DB } // Open creates a new DB instance for the SQLite database at the given path. @@ -23,7 +23,7 @@ func Open(path string) (*DB, error) { // Close terminates the connections to the SQLite database. func (db *DB) Close() error { - err := db.db.Close() + err := db.Close() return errors.Wrap(err, "failed to close the database") } @@ -31,7 +31,7 @@ func (db *DB) Close() error { func (db *DB) Migrate() error { wrap := errors.Wrapper("database migration failed") - tx, err := db.db.Begin() + tx, err := db.Begin() if err != nil { return wrap(err) } @@ -51,18 +51,18 @@ func (db *DB) Migrate() error { filename TEXT NOT NULL, dir TEXT NOT NULL, title TEXT DEFAULT('') NOT NULL, - content TEXT DEFAULT('') NOT NULL, + body TEXT DEFAULT('') NOT NULL, word_count INTEGER DEFAULT(0) NOT NULL, checksum TEXT NOT NULL, - created TEXT DEFAULT(CURRENT_TIMESTAMP) NOT NULL, - modified TEXT DEFAULT(CURRENT_TIMESTAMP) NOT NULL, + created DATETIME DEFAULT(CURRENT_TIMESTAMP) NOT NULL, + modified DATETIME DEFAULT(CURRENT_TIMESTAMP) NOT NULL, UNIQUE(filename, dir) ) `, `CREATE INDEX IF NOT EXISTS notes_checksum_idx ON notes(checksum)`, ` CREATE VIRTUAL TABLE IF NOT EXISTS notes_fts USING fts5( - title, content, + title, body, content = notes, content_rowid = id, tokenize = 'porter unicode61 remove_diacritics 1' @@ -71,18 +71,18 @@ func (db *DB) Migrate() error { // Triggers to keep the FTS index up to date. ` CREATE TRIGGER IF NOT EXISTS notes_ai AFTER INSERT ON notes BEGIN - INSERT INTO notes_fts(rowid, title, content) VALUES (new.id, new.title, new.content); + INSERT INTO notes_fts(rowid, title, body) VALUES (new.id, new.title, new.body); END `, ` CREATE TRIGGER IF NOT EXISTS notes_ad AFTER DELETE ON notes BEGIN - INSERT INTO notes_fts(notes_fts, rowid, title, content) VALUES('delete', old.id, old.title, old.content); + INSERT INTO notes_fts(notes_fts, rowid, title, body) VALUES('delete', old.id, old.title, old.body); END `, ` CREATE TRIGGER IF NOT EXISTS notes_au AFTER UPDATE ON notes BEGIN - INSERT INTO notes_fts(notes_fts, rowid, title, content) VALUES('delete', old.id, old.title, old.content); - INSERT INTO notes_fts(rowid, title, content) VALUES (new.id, new.title, new.content); + INSERT INTO notes_fts(notes_fts, rowid, title, body) VALUES('delete', old.id, old.title, old.body); + INSERT INTO notes_fts(rowid, title, body) VALUES (new.id, new.title, new.body); END `, `PRAGMA user_version = 1`, diff --git a/adapter/sqlite/indexer.go b/adapter/sqlite/indexer.go new file mode 100644 index 0000000..2f22d69 --- /dev/null +++ b/adapter/sqlite/indexer.go @@ -0,0 +1,123 @@ +package sqlite + +import ( + "database/sql" + "path/filepath" + "time" + + "github.com/mickael-menu/zk/core/zk" + "github.com/mickael-menu/zk/util" +) + +type Indexer struct { + tx *sql.Tx + root string + logger util.Logger + + // Prepared SQL statements + indexedStmt *sql.Stmt + addStmt *sql.Stmt + updateStmt *sql.Stmt + removeStmt *sql.Stmt +} + +func NewIndexer(tx *sql.Tx, root string, logger util.Logger) (*Indexer, error) { + indexedStmt, err := tx.Prepare("SELECT filename, dir, modified from notes") + if err != nil { + return nil, err + } + + addStmt, err := tx.Prepare(` + INSERT INTO notes (filename, dir, title, body, word_count, checksum, created, modified) + VALUES (?, ?, ?, ?, ?, ?, ?, ?) + `) + if err != nil { + return nil, err + } + + updateStmt, err := tx.Prepare(` + UPDATE notes + SET title = ?, body = ?, word_count = ?, checksum = ?, modified = ? + WHERE filename = ? AND dir = ? + `) + if err != nil { + return nil, err + } + + removeStmt, err := tx.Prepare(` + DELETE FROM notes + WHERE filename = ? AND dir = ? + `) + if err != nil { + return nil, err + } + + return &Indexer{ + tx: tx, + root: root, + logger: logger, + indexedStmt: indexedStmt, + addStmt: addStmt, + updateStmt: updateStmt, + removeStmt: removeStmt, + }, nil +} + +func (i *Indexer) Indexed() (<-chan zk.FileMetadata, error) { + rows, err := i.indexedStmt.Query() + if err != nil { + return nil, err + } + + c := make(chan zk.FileMetadata) + go func() { + defer close(c) + defer rows.Close() + var ( + filename, dir string + modified time.Time + ) + + for rows.Next() { + err := rows.Scan(&filename, &dir, &modified) + if err != nil { + i.logger.Err(err) + } + + c <- zk.FileMetadata{ + Path: zk.Path{Dir: dir, Filename: filename, Abs: filepath.Join(i.root, dir, filename)}, + Modified: modified, + } + } + + err = rows.Err() + if err != nil { + i.logger.Err(err) + } + }() + + return c, nil +} + +func (i *Indexer) Add(metadata zk.NoteMetadata) error { + _, err := i.addStmt.Exec( + metadata.Path.Filename, metadata.Path.Dir, metadata.Title, + metadata.Body, metadata.WordCount, metadata.Checksum, + metadata.Created, metadata.Modified, + ) + return err +} + +func (i *Indexer) Update(metadata zk.NoteMetadata) error { + _, err := i.updateStmt.Exec( + metadata.Title, metadata.Body, metadata.WordCount, + metadata.Checksum, metadata.Modified, + metadata.Path.Filename, metadata.Path.Dir, + ) + return err +} + +func (i *Indexer) Remove(path zk.Path) error { + _, err := i.updateStmt.Exec(path.Filename, path.Dir) + return err +} diff --git a/cmd/container.go b/cmd/container.go index 279b1e0..7d4d760 100644 --- a/cmd/container.go +++ b/cmd/container.go @@ -1,9 +1,6 @@ package cmd import ( - "log" - "os" - "github.com/mickael-menu/zk/adapter/handlebars" "github.com/mickael-menu/zk/adapter/sqlite" "github.com/mickael-menu/zk/core/zk" @@ -21,7 +18,7 @@ func NewContainer() *Container { date := date.NewFrozenNow() return &Container{ - Logger: log.New(os.Stderr, "zk: warning: ", 0), + Logger: util.NewStdLogger("zk: ", 0), // zk is short-lived, so we freeze the current date to use the same // date for any rendering during the execution. Date: &date, diff --git a/cmd/index.go b/cmd/index.go new file mode 100644 index 0000000..6276d7c --- /dev/null +++ b/cmd/index.go @@ -0,0 +1,43 @@ +package cmd + +import ( + "github.com/mickael-menu/zk/adapter/sqlite" + "github.com/mickael-menu/zk/core/zk" +) + +// Index indexes the content of all the notes in the slip box. +type Index struct { + Directory string `arg optional type:"path" default:"." help:"Directory containing the notes to index"` +} + +func (cmd *Index) Run(container *Container) error { + z, err := zk.Open(".") + if err != nil { + return err + } + + dir, err := z.RequireDirAt(cmd.Directory) + if err != nil { + return err + } + + db, err := container.Database(z) + if err != nil { + return err + } + tx, err := db.Begin() + defer tx.Rollback() + if err != nil { + return err + } + indexer, err := sqlite.NewIndexer(tx, z.Path, container.Logger) + if err != nil { + return err + } + err = zk.Index(*dir, indexer, container.Logger) + if err != nil { + return err + } + + return tx.Commit() +} diff --git a/core/note/note.go b/core/note/note.go deleted file mode 100644 index 0e5b0c9..0000000 --- a/core/note/note.go +++ /dev/null @@ -1,8 +0,0 @@ -package note - -// Metadata holds information about a particular note. -type Metadata struct { - Title string - Content string - WordCount int -} diff --git a/core/zk/dir.go b/core/zk/dir.go index 3379f95..0d639f6 100644 --- a/core/zk/dir.go +++ b/core/zk/dir.go @@ -31,7 +31,7 @@ func (d Dir) Walk(logger util.Logger) <-chan FileMetadata { go func() { defer close(c) - err := filepath.Walk(d.Path, func(path string, info os.FileInfo, err error) error { + err := filepath.Walk(d.Path, func(abs string, info os.FileInfo, err error) error { if err != nil { return err } @@ -50,7 +50,7 @@ func (d Dir) Walk(logger util.Logger) <-chan FileMetadata { return nil } - path, err := filepath.Rel(d.Path, path) + path, err := filepath.Rel(d.Path, abs) if err != nil { logger.Println(err) return nil @@ -65,6 +65,7 @@ func (d Dir) Walk(logger util.Logger) <-chan FileMetadata { Path: Path{ Dir: filepath.Join(d.Name, curDir), Filename: filename, + Abs: abs, }, Modified: info.ModTime(), } diff --git a/core/zk/dir_test.go b/core/zk/dir_test.go index e745ee5..0adaf27 100644 --- a/core/zk/dir_test.go +++ b/core/zk/dir_test.go @@ -17,27 +17,27 @@ func TestWalkRootDir(t *testing.T) { res := toSlice(dir.Walk(&util.NullLogger)) assert.Equal(t, res, []FileMetadata{ { - Path: Path{Dir: "", Filename: "a.md"}, + Path: Path{Dir: "", Filename: "a.md", Abs: filepath.Join(root, "a.md")}, Modified: date("2021-01-03T11:30:26.069257899+01:00"), }, { - Path: Path{Dir: "", Filename: "b.md"}, + Path: Path{Dir: "", Filename: "b.md", Abs: filepath.Join(root, "b.md")}, Modified: date("2021-01-03T11:30:27.545667767+01:00"), }, { - Path: Path{Dir: "dir1", Filename: "a.md"}, + Path: Path{Dir: "dir1", Filename: "a.md", Abs: filepath.Join(root, "dir1/a.md")}, Modified: date("2021-01-03T11:31:18.961628888+01:00"), }, { - Path: Path{Dir: "dir1", Filename: "b.md"}, + Path: Path{Dir: "dir1", Filename: "b.md", Abs: filepath.Join(root, "dir1/b.md")}, Modified: date("2021-01-03T11:31:24.692881103+01:00"), }, { - Path: Path{Dir: "dir1/dir1", Filename: "a.md"}, + Path: Path{Dir: "dir1/dir1", Filename: "a.md", Abs: filepath.Join(root, "dir1/dir1/a.md")}, Modified: date("2021-01-03T11:31:27.900472856+01:00"), }, { - Path: Path{Dir: "dir2", Filename: "a.md"}, + Path: Path{Dir: "dir2", Filename: "a.md", Abs: filepath.Join(root, "dir2/a.md")}, Modified: date("2021-01-03T11:31:51.001827456+01:00"), }, }) @@ -48,15 +48,15 @@ func TestWalkSubDir(t *testing.T) { res := toSlice(dir.Walk(&util.NullLogger)) assert.Equal(t, res, []FileMetadata{ { - Path: Path{Dir: "dir1", Filename: "a.md"}, + Path: Path{Dir: "dir1", Filename: "a.md", Abs: filepath.Join(root, "dir1/a.md")}, Modified: date("2021-01-03T11:31:18.961628888+01:00"), }, { - Path: Path{Dir: "dir1", Filename: "b.md"}, + Path: Path{Dir: "dir1", Filename: "b.md", Abs: filepath.Join(root, "dir1/b.md")}, Modified: date("2021-01-03T11:31:24.692881103+01:00"), }, { - Path: Path{Dir: "dir1/dir1", Filename: "a.md"}, + Path: Path{Dir: "dir1/dir1", Filename: "a.md", Abs: filepath.Join(root, "dir1/dir1/a.md")}, Modified: date("2021-01-03T11:31:27.900472856+01:00"), }, }) @@ -67,7 +67,7 @@ func TestWalkSubSubDir(t *testing.T) { res := toSlice(dir.Walk(&util.NullLogger)) assert.Equal(t, res, []FileMetadata{ { - Path: Path{Dir: "dir1/dir1", Filename: "a.md"}, + Path: Path{Dir: "dir1/dir1", Filename: "a.md", Abs: filepath.Join(root, "dir1/dir1/a.md")}, Modified: date("2021-01-03T11:31:27.900472856+01:00"), }, }) diff --git a/core/zk/index.go b/core/zk/index.go new file mode 100644 index 0000000..b10dc13 --- /dev/null +++ b/core/zk/index.go @@ -0,0 +1,93 @@ +package zk + +import ( + "crypto/sha256" + "fmt" + "io/ioutil" + "strings" + "time" + + "github.com/mickael-menu/zk/util" + "github.com/mickael-menu/zk/util/errors" + "gopkg.in/djherbis/times.v1" +) + +// NoteMetadata holds information about a particular note. +type NoteMetadata struct { + Path Path + Title string + Body string + WordCount int + Created time.Time + Modified time.Time + Checksum string +} + +type Indexer interface { + Indexed() (<-chan FileMetadata, error) + Add(metadata NoteMetadata) error + Update(metadata NoteMetadata) error + Remove(path Path) error +} + +// Index indexes the content of the notes in the given directory. +func Index(dir Dir, indexer Indexer, logger util.Logger) error { + wrap := errors.Wrapper("indexation failed") + + source := dir.Walk(logger) + target, err := indexer.Indexed() + if err != nil { + return wrap(err) + } + + return Diff(source, target, func(change DiffChange) error { + switch change.Kind { + case DiffAdded: + metadata, err := noteMetadata(change.Path) + if err == nil { + err = indexer.Add(metadata) + } + logger.Err(err) + + case DiffModified: + metadata, err := noteMetadata(change.Path) + if err == nil { + err = indexer.Update(metadata) + } + logger.Err(err) + + case DiffRemoved: + indexer.Remove(change.Path) + } + return nil + }) +} + +func noteMetadata(path Path) (NoteMetadata, error) { + metadata := NoteMetadata{ + Path: path, + } + + content, err := ioutil.ReadFile(path.Abs) + if err != nil { + return metadata, err + } + contentStr := string(content) + metadata.Body = contentStr + metadata.WordCount = len(strings.Fields(contentStr)) + metadata.Checksum = fmt.Sprintf("%x", sha256.Sum256(content)) + + times, err := times.Stat(path.Abs) + if err != nil { + return metadata, err + } + + metadata.Modified = times.ModTime() + if times.HasBirthTime() { + metadata.Created = times.BirthTime() + } else { + metadata.Created = time.Now() + } + + return metadata, nil +} diff --git a/core/zk/zk.go b/core/zk/zk.go index b3431c9..87f51f1 100644 --- a/core/zk/zk.go +++ b/core/zk/zk.go @@ -28,6 +28,7 @@ type Zk struct { type Path struct { Dir string Filename string + Abs string } // Open locates a slip box at the given path and parses its configuration. diff --git a/go.mod b/go.mod index e7101d5..f418343 100644 --- a/go.mod +++ b/go.mod @@ -11,5 +11,6 @@ require ( github.com/kballard/go-shellquote v0.0.0-20180428030007-95032a82bc51 github.com/lestrrat-go/strftime v1.0.3 github.com/mattn/go-sqlite3 v1.14.6 + gopkg.in/djherbis/times.v1 v1.2.0 gopkg.in/yaml.v2 v2.4.0 // indirect ) diff --git a/go.sum b/go.sum index b826a2a..f33e663 100644 --- a/go.sum +++ b/go.sum @@ -70,5 +70,7 @@ google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9Ywl gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127 h1:qIbj1fsPNlZgppZ+VLlY7N33q108Sa+fhmuc+sWQYwY= gopkg.in/check.v1 v1.0.0-20180628173108-788fd7840127/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/djherbis/times.v1 v1.2.0 h1:UCvDKl1L/fmBygl2Y7hubXCnY7t4Yj46ZrBFNUipFbM= +gopkg.in/djherbis/times.v1 v1.2.0/go.mod h1:AQlg6unIsrsCEdQYhTzERy542dz6SFdQFZFv6mUY0P8= gopkg.in/yaml.v2 v2.4.0 h1:D8xgwECY7CYvx+Y2n4sBz93Jn9JRvxdiyyo8CTfuKaY= gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= diff --git a/main.go b/main.go index 7b90c13..114a88c 100644 --- a/main.go +++ b/main.go @@ -9,6 +9,7 @@ var Version = "dev" var Build = "dev" var cli struct { + Index cmd.Index `cmd help:"Index the notes in the given directory to be searchable"` Init cmd.Init `cmd help:"Create a slip box in the given directory"` New cmd.New `cmd help:"Create a new note in the given slip box directory"` Version kong.VersionFlag `help:"Print zk version"` diff --git a/util/logger.go b/util/logger.go index 3a6cdba..983310d 100644 --- a/util/logger.go +++ b/util/logger.go @@ -1,10 +1,16 @@ package util +import ( + "log" + "os" +) + // Logger can be used to report logging messages. // The native log.Logger type implements this interface. type Logger interface { Printf(format string, v ...interface{}) Println(v ...interface{}) + Err(error) } // NullLogger is a logger ignoring any input. @@ -15,3 +21,20 @@ type nullLogger struct{} func (n *nullLogger) Printf(format string, v ...interface{}) {} func (n *nullLogger) Println(v ...interface{}) {} + +func (n *nullLogger) Err(err error) {} + +// StdLogger is a logger using the standard logger. +type StdLogger struct { + *log.Logger +} + +func NewStdLogger(prefix string, flags int) StdLogger { + return StdLogger{log.New(os.Stderr, prefix, flags)} +} + +func (l StdLogger) Err(err error) { + if err != nil { + l.Printf("warning: %v", err) + } +}