Ported the importer crate over

pull/7/head
Benedikt Terhechte 2 years ago
parent 2a505f0d83
commit 7a0be16578

@ -1,7 +1,12 @@
use eyre::Result;
use std::path::Path;
use std::thread::JoinHandle;
use crossbeam_channel::Sender;
use eyre::Result;
use crate::Config;
use super::{query::Query, query_result::QueryResult};
use super::{db_message::DBMessage, query::Query, query_result::QueryResult};
pub trait DatabaseLike: Send + Sync {
fn new(path: impl AsRef<Path>) -> Result<Self>
@ -9,4 +14,6 @@ pub trait DatabaseLike: Send + Sync {
Self: Sized;
fn total_mails(&self) -> Result<usize>;
fn query(&self, query: &Query) -> Result<Vec<QueryResult>>;
fn import(self) -> (Sender<DBMessage>, JoinHandle<Result<usize>>);
fn save_config(&self, config: Config) -> Result<()>;
}

@ -1,6 +1,6 @@
use eyre::Report;
use ps_core::EmailEntry;
use crate::EmailEntry;
/// Parameter for sending work to the database during `import`.
pub enum DBMessage {

@ -1,3 +1,4 @@
pub mod database_like;
pub mod db_message;
pub mod query;
pub mod query_result;

@ -2,8 +2,13 @@ use crossbeam_channel;
use eyre::{Report, Result};
use std::thread::JoinHandle;
use crate::DatabaseLike;
pub trait Importerlike {
fn import(self) -> Result<(MessageReceiver, JoinHandle<Result<()>>)>;
fn import<Database: DatabaseLike + 'static>(
self,
database: Database,
) -> Result<(MessageReceiver, JoinHandle<Result<()>>)>;
}
/// The message that informs of the importers progress

@ -3,8 +3,11 @@ mod importer;
mod model;
mod types;
pub use database::database_like::DatabaseLike;
pub use database::db_message::DBMessage;
pub use database::query::{Field, OtherQuery, Query, ValueField, AMOUNT_FIELD_NAME};
pub use database::query_result::QueryResult;
pub use types::{Config, EmailEntry, EmailMeta, FormatType};
pub use crossbeam_channel;
pub use importer::{Importerlike, Message, MessageReceiver, MessageSender};

@ -0,0 +1,984 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "adler"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
[[package]]
name = "ahash"
version = "0.7.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb51a0695d8f838b1ee009b3fbf66bda078cd64590202a864a8f3e8c4315c47"
dependencies = [
"getrandom",
"once_cell",
"version_check",
]
[[package]]
name = "aho-corasick"
version = "0.7.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e37cfd5e7657ada45f742d6e99ca5788580b5c529dc78faf11ece6dc702656f"
dependencies = [
"memchr",
]
[[package]]
name = "ansi_term"
version = "0.12.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d52a9bb7ec0cf484c551830a7ce27bd20d67eac647e1befb56b0be4ee39a55d2"
dependencies = [
"winapi",
]
[[package]]
name = "arrayvec"
version = "0.4.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cd9fd44efafa8690358b7408d253adf110036b88f55672a933f01d616ad9b1b9"
dependencies = [
"nodrop",
]
[[package]]
name = "autocfg"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cdb031dd78e28731d87d56cc8ffef4a8f36ca26c38fe2de700543e627f8a464a"
[[package]]
name = "base64"
version = "0.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "904dfeac50f3cdaba28fc6f57fdcddb75f49ed61346676a78c4ffe55877802fd"
[[package]]
name = "bitflags"
version = "1.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bef38d45163c2f1dde094a7dfd33ccf595c92905c8f8f4fdc18d06fb1037718a"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "chrono"
version = "0.4.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "670ad68c9088c2a963aaa298cb369688cf3f9465ce5e2d4ca10e6e0098a1ce73"
dependencies = [
"libc",
"num-integer",
"num-traits",
"time 0.1.44",
"winapi",
]
[[package]]
name = "crc32fast"
version = "1.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "738c290dfaea84fc1ca15ad9c168d083b05a714e1efddd8edaab678dc28d2836"
dependencies = [
"cfg-if",
]
[[package]]
name = "crossbeam-channel"
version = "0.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06ed27e177f16d65f0f0c22a213e17c696ace5dd64b14258b52f9417ccb52db4"
dependencies = [
"cfg-if",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-deque"
version = "0.8.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6455c0ca19f0d2fbf751b908d5c55c1f5cbc65e03c4225427254b46890bdde1e"
dependencies = [
"cfg-if",
"crossbeam-epoch",
"crossbeam-utils",
]
[[package]]
name = "crossbeam-epoch"
version = "0.9.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ec02e091aa634e2c3ada4a392989e7c3116673ef0ac5b72232439094d73b7fd"
dependencies = [
"cfg-if",
"crossbeam-utils",
"lazy_static",
"memoffset",
"scopeguard",
]
[[package]]
name = "crossbeam-utils"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d82cfc11ce7f2c3faef78d8a684447b40d503d9681acebed6cb728d45940c4db"
dependencies = [
"cfg-if",
"lazy_static",
]
[[package]]
name = "dirs-next"
version = "2.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b98cf8ebf19c3d1b223e151f99a4f9f0690dca41414773390fc824184ac833e1"
dependencies = [
"cfg-if",
"dirs-sys-next",
]
[[package]]
name = "dirs-sys-next"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4ebda144c4fe02d1f7ea1a7d9641b6fc6b580adcfa024ae48797ecdeb6825b4d"
dependencies = [
"libc",
"redox_users",
"winapi",
]
[[package]]
name = "either"
version = "1.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e78d4f1cc4ae33bbfc157ed5d5a5ef3bc29227303d595861deb238fcec4e9457"
[[package]]
name = "email-parser"
version = "0.5.0"
source = "git+https://github.com/terhechte/email-parser#dba59d86771f7df67bb9e7f3a2c4b1e36b02d19b"
dependencies = [
"textcode",
"timezone-abbreviations",
]
[[package]]
name = "emlx"
version = "0.1.5"
source = "git+https://github.com/terhechte/emlx#44c2f278551d9e7a9ae0c3c3207c4471da3049fe"
dependencies = [
"email-parser",
"plist",
"thiserror",
]
[[package]]
name = "eyre"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "221239d1d5ea86bf5d6f91c9d6bc3646ffe471b08ff9b0f91c44f115ac969d2b"
dependencies = [
"indenter",
"once_cell",
]
[[package]]
name = "flate2"
version = "1.0.22"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1e6988e897c1c9c485f43b47a529cef42fde0547f9d8d41a7062518f1d8fc53f"
dependencies = [
"cfg-if",
"crc32fast",
"libc",
"miniz_oxide",
]
[[package]]
name = "getrandom"
version = "0.2.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7fcd999463524c52659517fe2cea98493cfe485d10565e7b0fb07dbba7ad2753"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "hashbrown"
version = "0.11.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ab5ef0d4909ef3724cc8cce6ccc8572c5c817592e9285f5464f8e86f8bd3726e"
dependencies = [
"ahash",
]
[[package]]
name = "heck"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6d621efb26863f0e9924c6ac577e8275e5e6b77455db64ffa6c65c904e9e132c"
dependencies = [
"unicode-segmentation",
]
[[package]]
name = "hermit-abi"
version = "0.1.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "62b467343b94ba476dcb2500d242dadbb39557df889310ac77c5d99100aaac33"
dependencies = [
"libc",
]
[[package]]
name = "indenter"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ce23b50ad8242c51a442f3ff322d56b02f08852c77e4c0b4d3fd684abc89c683"
[[package]]
name = "indexmap"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bc633605454125dec4b66843673f01c7df2b89479b32e0ed634e43a91cff62a5"
dependencies = [
"autocfg",
"hashbrown",
]
[[package]]
name = "itoa"
version = "0.4.8"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b71991ff56294aa922b450139ee08b3bfc70982c6b2c7562771375cf73542dd4"
[[package]]
name = "itoa"
version = "1.0.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1aab8fc367588b89dcee83ab0fd66b72b50b72fa1904d7095045ace2b0c81c35"
[[package]]
name = "lazy_static"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646"
[[package]]
name = "libc"
version = "0.2.112"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b03d17f364a3a042d5e5d46b053bbbf82c92c9430c592dd4c064dc6ee997125"
[[package]]
name = "line-wrap"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f30344350a2a51da54c1d53be93fade8a237e545dbcc4bdbe635413f2117cab9"
dependencies = [
"safemem",
]
[[package]]
name = "log"
version = "0.4.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "51b9bbe6c47d51fc3e1a9b945965946b4c44142ab8792c50835a980d362c2710"
dependencies = [
"cfg-if",
]
[[package]]
name = "lru"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6c748cfe47cb8da225c37595b3108bea1c198c84aaae8ea0ba76d01dda9fc803"
dependencies = [
"hashbrown",
]
[[package]]
name = "mbox-reader"
version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6231e973c0a8caceed71fac7355555012ba73fe230365989b298b36022e9e2ab"
dependencies = [
"memmap",
]
[[package]]
name = "memchr"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "308cc39be01b73d0d18f82a0e7b2a3df85245f84af96fdddc5d202d27e47b86a"
[[package]]
name = "memmap"
version = "0.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6585fd95e7bb50d6cc31e20d4cf9afb4e2ba16c5846fc76793f11218da9c475b"
dependencies = [
"libc",
"winapi",
]
[[package]]
name = "memoffset"
version = "0.6.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5aa361d4faea93603064a027415f07bd8e1d5c88c9fbf68bf56a285428fd79ce"
dependencies = [
"autocfg",
]
[[package]]
name = "miniz_oxide"
version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a92518e98c078586bc6c934028adcca4c92a53d6a958196de835170a01d84e4b"
dependencies = [
"adler",
"autocfg",
]
[[package]]
name = "nodrop"
version = "0.1.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72ef4a56884ca558e5ddb05a1d1e7e1bfd9a68d9ed024c21704cc98872dae1bb"
[[package]]
name = "num-format"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bafe4179722c2894288ee77a9f044f02811c86af699344c498b0840c698a2465"
dependencies = [
"arrayvec",
"itoa 0.4.8",
]
[[package]]
name = "num-integer"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2cc698a63b549a70bc047073d2949cce27cd1c7b0a4a862d08a8031bc2801db"
dependencies = [
"autocfg",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a64b1ec5cda2586e284722486d802acf1f7dbdc623e2bfc57e65ca1cd099290"
dependencies = [
"autocfg",
]
[[package]]
name = "num_cpus"
version = "1.13.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "05499f3756671c15885fee9034446956fff3f243d6077b91e5767df161f766b3"
dependencies = [
"hermit-abi",
"libc",
]
[[package]]
name = "once_cell"
version = "1.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "da32515d9f6e6e489d7bc9d84c71b060db7247dc035bbe44eac88cf87486d8d5"
[[package]]
name = "phf"
version = "0.10.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fabbf1ead8a5bcbc20f5f8b939ee3f5b0f6f281b6ad3468b84656b658b455259"
dependencies = [
"phf_macros",
"phf_shared",
"proc-macro-hack",
]
[[package]]
name = "phf_generator"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5d5285893bb5eb82e6aaf5d59ee909a06a16737a8970984dd7746ba9283498d6"
dependencies = [
"phf_shared",
"rand",
]
[[package]]
name = "phf_macros"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "58fdf3184dd560f160dd73922bea2d5cd6e8f064bf4b13110abd81b03697b4e0"
dependencies = [
"phf_generator",
"phf_shared",
"proc-macro-hack",
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "phf_shared"
version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b6796ad771acdc0123d2a88dc428b5e38ef24456743ddb1744ed628f9815c096"
dependencies = [
"siphasher",
]
[[package]]
name = "pin-project-lite"
version = "0.2.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d31d11c69a6b52a174b42bdc0c30e5e11670f90788b2c471c31c1d17d449443"
[[package]]
name = "plist"
version = "1.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bd39bc6cdc9355ad1dc5eeedefee696bb35c34caf21768741e81826c0bbd7225"
dependencies = [
"base64",
"indexmap",
"line-wrap",
"serde",
"time 0.3.5",
"xml-rs",
]
[[package]]
name = "ppv-lite86"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ed0cfbc8191465bed66e1718596ee0b0b35d5ee1f41c5df2189d0fe8bde535ba"
[[package]]
name = "proc-macro-hack"
version = "0.5.19"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dbf0c48bc1d91375ae5c3cd81e3722dff1abcf81a30960240640d223f59fe0e5"
[[package]]
name = "proc-macro2"
version = "1.0.34"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2f84e92c0f7c9d58328b85a78557813e4bd845130db68d7184635344399423b1"
dependencies = [
"unicode-xid",
]
[[package]]
name = "ps-core"
version = "0.2.0"
dependencies = [
"chrono",
"crossbeam-channel",
"eyre",
"flate2",
"lru",
"num-format",
"once_cell",
"rand",
"rayon",
"regex",
"rsql_builder",
"serde",
"serde_json",
"shellexpand",
"strum",
"strum_macros",
"thiserror",
"tracing",
"tracing-subscriber",
"treemap",
]
[[package]]
name = "ps-importer"
version = "0.2.0"
dependencies = [
"chrono",
"email-parser",
"emlx",
"eyre",
"flate2",
"mbox-reader",
"once_cell",
"ps-core",
"rand",
"rayon",
"regex",
"serde",
"serde_json",
"shellexpand",
"thiserror",
"tracing",
"tracing-subscriber",
"walkdir",
]
[[package]]
name = "quote"
version = "1.0.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38bc8cc6a5f2e3655e0899c1b848643b2562f853f114bfec7be120678e3ace05"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2e7573632e6454cf6b99d7aac4ccca54be06da05aca2ef7423d22d27d4d4bcd8"
dependencies = [
"libc",
"rand_chacha",
"rand_core",
"rand_hc",
]
[[package]]
name = "rand_chacha"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6c10a63a0fa32252be49d21e7709d4d4baf8d231c2dbce1eaa8141b9b127d88"
dependencies = [
"ppv-lite86",
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
dependencies = [
"getrandom",
]
[[package]]
name = "rand_hc"
version = "0.3.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d51e9f596de227fda2ea6c84607f5558e196eeaf43c986b724ba4fb8fdf497e7"
dependencies = [
"rand_core",
]
[[package]]
name = "rayon"
version = "1.5.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c06aca804d41dbc8ba42dfd964f0d01334eceb64314b9ecf7c5fad5188a06d90"
dependencies = [
"autocfg",
"crossbeam-deque",
"either",
"rayon-core",
]
[[package]]
name = "rayon-core"
version = "1.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d78120e2c850279833f1dd3582f730c4ab53ed95aeaaaa862a2a5c71b1656d8e"
dependencies = [
"crossbeam-channel",
"crossbeam-deque",
"crossbeam-utils",
"lazy_static",
"num_cpus",
]
[[package]]
name = "redox_syscall"
version = "0.2.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8383f39639269cde97d255a32bdb68c047337295414940c68bdd30c2e13203ff"
dependencies = [
"bitflags",
]
[[package]]
name = "redox_users"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "528532f3d801c87aec9def2add9ca802fe569e44a544afe633765267840abe64"
dependencies = [
"getrandom",
"redox_syscall",
]
[[package]]
name = "regex"
version = "1.5.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d07a8629359eb56f1e2fb1652bb04212c072a87ba68546a04065d525673ac461"
dependencies = [
"aho-corasick",
"memchr",
"regex-syntax",
]
[[package]]
name = "regex-syntax"
version = "0.6.25"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f497285884f3fcff424ffc933e56d7cbca511def0c9831a7f9b5f6153e3cc89b"
[[package]]
name = "rsql_builder"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9dbd5712883cef396d13516bb52b300fd97a29d52ca20361f0a4905bd38a2355"
dependencies = [
"serde",
"serde_json",
]
[[package]]
name = "rustversion"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2cc38e8fa666e2de3c4aba7edeb5ffc5246c1c2ed0e3d17e560aeeba736b23f"
[[package]]
name = "ryu"
version = "1.0.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73b4b750c782965c211b42f022f59af1fbceabdd026623714f104152f1ec149f"
[[package]]
name = "safemem"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ef703b7cb59335eae2eb93ceb664c0eb7ea6bf567079d843e09420219668e072"
[[package]]
name = "same-file"
version = "1.0.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "93fc1dc3aaa9bfed95e02e6eadabb4baf7e3078b0bd1b4d7b6b0b68378900502"
dependencies = [
"winapi-util",
]
[[package]]
name = "scopeguard"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d29ab0c6d3fc0ee92fe66e2d99f700eab17a8d57d1c1d3b748380fb20baa78cd"
[[package]]
name = "serde"
version = "1.0.131"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b4ad69dfbd3e45369132cc64e6748c2d65cdfb001a2b1c232d128b4ad60561c1"
dependencies = [
"serde_derive",
]
[[package]]
name = "serde_derive"
version = "1.0.131"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b710a83c4e0dff6a3d511946b95274ad9ca9e5d3ae497b63fda866ac955358d2"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "serde_json"
version = "1.0.73"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bcbd0344bc6533bc7ec56df11d42fb70f1b912351c0825ccb7211b59d8af7cf5"
dependencies = [
"itoa 1.0.1",
"ryu",
"serde",
]
[[package]]
name = "sharded-slab"
version = "0.1.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "900fba806f70c630b0a382d0d825e17a0f19fcd059a2ade1ff237bcddf446b31"
dependencies = [
"lazy_static",
]
[[package]]
name = "shellexpand"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "83bdb7831b2d85ddf4a7b148aa19d0587eddbe8671a436b7bd1182eaad0f2829"
dependencies = [
"dirs-next",
]
[[package]]
name = "siphasher"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "533494a8f9b724d33625ab53c6c4800f7cc445895924a8ef649222dcb76e938b"
[[package]]
name = "smallvec"
version = "1.7.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ecab6c735a6bb4139c0caafd0cc3635748bbb3acf4550e8138122099251f309"
[[package]]
name = "strum"
version = "0.23.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "cae14b91c7d11c9a851d3fbc80a963198998c2a64eec840477fa92d8ce9b70bb"
[[package]]
name = "strum_macros"
version = "0.23.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5bb0dc7ee9c15cea6199cde9a127fa16a4c5819af85395457ad72d68edc85a38"
dependencies = [
"heck",
"proc-macro2",
"quote",
"rustversion",
"syn",
]
[[package]]
name = "syn"
version = "1.0.82"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8daf5dd0bb60cbd4137b1b587d2fc0ae729bc07cf01cd70b36a1ed5ade3b9d59"
dependencies = [
"proc-macro2",
"quote",
"unicode-xid",
]
[[package]]
name = "textcode"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "13de2d432b3eea016f6a010139c8b5a5bf050b5a05b8993d04033ca5232e44a9"
[[package]]
name = "thiserror"
version = "1.0.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "854babe52e4df1653706b98fcfc05843010039b406875930a70e4d9644e5c417"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.30"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "aa32fd3f627f367fe16f893e2597ae3c05020f8bba2666a4e6ea73d377e5714b"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "thread_local"
version = "1.1.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8018d24e04c95ac8790716a5987d0fec4f8b27249ffa0f7d33f1369bdfb88cbd"
dependencies = [
"once_cell",
]
[[package]]
name = "time"
version = "0.1.44"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6db9e6914ab8b1ae1c260a4ae7a49b6c5611b40328a735b21862567685e73255"
dependencies = [
"libc",
"wasi",
"winapi",
]
[[package]]
name = "time"
version = "0.3.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41effe7cfa8af36f439fac33861b66b049edc6f9a32331e2312660529c1c24ad"
dependencies = [
"itoa 0.4.8",
"libc",
]
[[package]]
name = "timezone-abbreviations"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ead19eae5d0834473ce509eb859282c262e5b869a0171641d1668cd54c594d8f"
dependencies = [
"phf",
]
[[package]]
name = "tracing"
version = "0.1.29"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "375a639232caf30edfc78e8d89b2d4c375515393e7af7e16f01cd96917fb2105"
dependencies = [
"cfg-if",
"pin-project-lite",
"tracing-attributes",
"tracing-core",
]
[[package]]
name = "tracing-attributes"
version = "0.1.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f4f480b8f81512e825f337ad51e94c1eb5d3bbdf2b363dcd01e2b19a9ffe3f8e"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "tracing-core"
version = "0.1.21"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1f4ed65637b8390770814083d20756f87bfa2c21bf2f110babdc5438351746e4"
dependencies = [
"lazy_static",
]
[[package]]
name = "tracing-log"
version = "0.1.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6923477a48e41c1951f1999ef8bb5a3023eb723ceadafe78ffb65dc366761e3"
dependencies = [
"lazy_static",
"log",
"tracing-core",
]
[[package]]
name = "tracing-subscriber"
version = "0.3.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "245da694cc7fc4729f3f418b304cb57789f1bed2a78c575407ab8a23f53cb4d3"
dependencies = [
"ansi_term",
"sharded-slab",
"smallvec",
"thread_local",
"tracing-core",
"tracing-log",
]
[[package]]
name = "treemap"
version = "0.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1571f89da27a5e1aa83304ee1ab9519ea8c6432b4c8903aaaa6c9a9eecb6f36"
[[package]]
name = "unicode-segmentation"
version = "1.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8895849a949e7845e06bd6dc1aa51731a103c42707010a5b591c0038fb73385b"
[[package]]
name = "unicode-xid"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8ccb82d61f80a663efe1f787a51b16b5a51e3314d6ac365b08639f52387b33f3"
[[package]]
name = "version_check"
version = "0.9.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5fecdca9a5291cc2b8dcf7dc02453fee791a280f3743cb0905f8822ae463b3fe"
[[package]]
name = "walkdir"
version = "2.3.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "808cf2735cd4b6866113f648b791c6adc5714537bc222d9347bb203386ffda56"
dependencies = [
"same-file",
"winapi",
"winapi-util",
]
[[package]]
name = "wasi"
version = "0.10.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1a143597ca7c7793eff794def352d41792a93c481eb1042423ff7ff72ba2c31f"
[[package]]
name = "winapi"
version = "0.3.9"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5c839a674fcd7a98952e593242ea400abe93992746761e38641405d28b00f419"
dependencies = [
"winapi-i686-pc-windows-gnu",
"winapi-x86_64-pc-windows-gnu",
]
[[package]]
name = "winapi-i686-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ac3b87c63620426dd9b991e5ce0329eff545bccbbb34f3be09ff6fb6ab51b7b6"
[[package]]
name = "winapi-util"
version = "0.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "70ec6ce85bb158151cae5e5c87f95a8e97d2c0c4b001223f33a334e3ce5de178"
dependencies = [
"winapi",
]
[[package]]
name = "winapi-x86_64-pc-windows-gnu"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "712e227841d057c1ee1cd2fb22fa7e5a5461ae8e48fa2ca79ec42cfc1931183f"
[[package]]
name = "xml-rs"
version = "0.8.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d2d7d3948613f75c98fd9328cfdcc45acc4d360655289d0a7d4ec931392200a3"

@ -0,0 +1,26 @@
[package]
name = "ps-importer"
version = "0.2.0"
edition = "2021"
# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
[dependencies]
eyre = "0.6.5"
thiserror = "1.0.29"
tracing = "0.1.29"
tracing-subscriber = "0.3.0"
regex = "1.5.3"
flate2 = "1.0.22"
once_cell = "1.8.0"
email-parser = { git = "https://github.com/terhechte/email-parser", features = ["sender", "to", "in-reply-to", "date", "subject", "mime", "allow-duplicate-headers", "compatibility-fixes"]}
rayon = "1.5.1"
chrono = "0.4.19"
emlx = { git = "https://github.com/terhechte/emlx", features = []}
walkdir = "2.3.2"
mbox-reader = "0.2.0"
rand = "0.8.4"
shellexpand = "2.1.0"
serde_json = "1.0.70"
serde = { version = "1.0.130", features = ["derive"]}
ps-core = { path = "../ps-core" }

@ -0,0 +1,87 @@
//! We use a stubbornly stupid algorithm where we just
//! recursively drill down into the appropriate folder
//! until we find `emlx` files and return those.
use eyre::{eyre, Result};
use rayon::prelude::*;
use walkdir::WalkDir;
use super::super::shared::filesystem::emails_in;
use ps_core::{Config, Message, MessageSender};
use super::mail::Mail;
use std::path::PathBuf;
pub fn read_emails(config: &Config, sender: MessageSender) -> Result<Vec<Mail>> {
// on macOS, we might need permission for the `Library` folder...
match std::fs::read_dir(&config.emails_folder_path) {
Ok(_) => (),
Err(e) => match e.kind() {
#[cfg(target_os = "macos")]
std::io::ErrorKind::PermissionDenied => {
tracing::info!("Could not read folder: {}", e);
if let Err(e) = sender.send(Message::MissingPermissions) {
tracing::error!("Error sending: {}", e);
}
// We should return early now, otherwise the code below will send a different
// error
return Ok(Vec::new());
}
_ => {
if let Err(e) = sender.send(Message::Error(eyre!("Error: {:?}", &e))) {
tracing::error!("Error sending: {}", e);
}
}
},
}
// As `walkdir` does not support `par_iter` (see https://www.reddit.com/r/rust/comments/6eif7r/walkdir_users_we_need_you/)
// - -we first collect all folders,
// then all sub-folders in those ending in mboxending in .mbox and then iterate over them in paralell
let folders: Vec<PathBuf> = WalkDir::new(&config.emails_folder_path)
.into_iter()
.filter_map(|e| match e {
Ok(n)
if n.path().is_dir()
&& n.path()
.to_str()
.map(|e| e.contains(".mbox"))
.unwrap_or(false) =>
{
tracing::trace!("Found folder {}", n.path().display());
Some(n.path().to_path_buf())
}
Err(e) => {
tracing::info!("Could not read folder: {}", e);
if let Err(e) = sender.send(Message::Error(eyre!("Could not read folder: {:?}", e)))
{
tracing::error!("Error sending error {}", e);
}
None
}
_ => None,
})
.collect();
sender.send(Message::ReadTotal(folders.len()))?;
let mails: Vec<Mail> = folders
.into_par_iter()
.filter_map(
|path| match emails_in(path.clone(), sender.clone(), Mail::new) {
Ok(n) => Some(n),
Err(e) => {
tracing::error!("{} {:?}", path.display(), &e);
if let Err(e) = sender.send(Message::Error(eyre!(
"Could read mails in {}: {:?}",
path.display(),
e
))) {
tracing::error!("Error sending error {}", e);
}
None
}
},
)
.flatten()
.collect();
Ok(mails)
}

@ -0,0 +1,70 @@
use emlx::parse_emlx;
use eyre::Result;
use std::borrow::Cow;
use std::path::{Path, PathBuf};
use ps_core::EmailMeta;
use super::super::shared::parse::ParseableEmail;
pub struct Mail {
path: PathBuf,
// This is parsed out of the `emlx` as it is parsed
is_seen: bool,
// This is parsed out of the `path`
label: Option<String>,
// Maildata
data: Vec<u8>,
}
impl Mail {
pub fn new<P: AsRef<Path>>(path: P) -> Option<Self> {
let path = path.as_ref();
let name = path.file_name()?.to_str()?;
if !name.ends_with(".emlx") {
return None;
}
// find the folder ending with `.mbox` in the path
let ext = ".mbox";
let label = path
.iter()
.map(|e| e.to_str())
.flatten()
.find(|s| s.ends_with(ext))
.map(|s| s.replace(ext, ""));
Some(Self {
path: path.to_path_buf(),
is_seen: false,
label,
data: Vec::new(),
})
}
}
impl ParseableEmail for Mail {
fn prepare(&mut self) -> Result<()> {
let data = std::fs::read(self.path.as_path())?;
let parsed = parse_emlx(&data)?;
self.is_seen = !parsed.flags.is_read;
self.data = parsed.message.to_vec();
Ok(())
}
fn message(&self) -> Result<Cow<'_, [u8]>> {
Ok(Cow::Borrowed(self.data.as_slice()))
}
fn path(&self) -> &Path {
self.path.as_path()
}
fn meta(&self) -> Result<Option<EmailMeta>> {
let tags = match self.label {
Some(ref n) => vec![n.clone()],
None => vec![],
};
let meta = EmailMeta {
tags,
is_seen: self.is_seen,
};
Ok(Some(meta))
}
}

@ -0,0 +1,24 @@
mod filesystem;
mod mail;
use shellexpand;
use std::{path::PathBuf, str::FromStr};
use super::{ImporterFormat, Result};
use ps_core::{Config, MessageSender};
#[derive(Default)]
pub struct AppleMail {}
impl ImporterFormat for AppleMail {
type Item = mail::Mail;
fn default_path() -> Option<PathBuf> {
let path = shellexpand::tilde("~/Library/Mail");
Some(PathBuf::from_str(&path.to_string()).unwrap())
}
fn emails(&self, config: &Config, sender: MessageSender) -> Result<Vec<Self::Item>> {
filesystem::read_emails(config, sender)
}
}

@ -0,0 +1,46 @@
use chrono::prelude::*;
use eyre::{bail, Result};
use serde::Deserialize;
use serde_json;
use super::raw_email::RawEmailEntry;
use ps_core::EmailMeta;
#[derive(Deserialize, Debug, Clone)]
pub struct Meta {
pub msg_id: String,
pub subject: String,
pub labels: Vec<String>,
pub flags: Vec<String>,
internal_date: i64,
#[serde(skip, default = "Utc::now")]
pub created: DateTime<Utc>,
}
impl Meta {
pub fn is_seen(&self) -> bool {
self.labels.contains(&"\\seen".to_owned())
}
}
impl From<Meta> for EmailMeta {
fn from(meta: Meta) -> Self {
let is_seen = meta.is_seen();
EmailMeta {
tags: meta.labels,
is_seen,
}
}
}
pub fn parse_meta(raw_entry: &RawEmailEntry) -> Result<Meta> {
let content = match raw_entry.read_gmail_meta() {
None => bail!("No Gmail Meta Information Available"),
Some(content) => content?,
};
let mut meta: Meta = serde_json::from_slice(&content)?;
meta.created = Utc.timestamp(meta.internal_date, 0);
Ok(meta)
}

@ -0,0 +1,23 @@
mod meta;
mod raw_email;
use super::shared::filesystem::{emails_in, folders_in};
use super::{Config, ImporterFormat, MessageSender, Result};
use raw_email::RawEmailEntry;
#[derive(Default)]
pub struct Gmail {}
impl ImporterFormat for Gmail {
type Item = raw_email::RawEmailEntry;
fn default_path() -> Option<std::path::PathBuf> {
None
}
fn emails(&self, config: &Config, sender: MessageSender) -> Result<Vec<Self::Item>> {
folders_in(&config.emails_folder_path, sender, |path, sender| {
emails_in(path, sender, RawEmailEntry::new)
})
}
}

@ -0,0 +1,118 @@
use eyre::{eyre, Result};
use flate2::read::GzDecoder;
use std::borrow::Cow;
use std::io::Read;
use std::path::{Path, PathBuf};
use super::super::shared::parse::ParseableEmail;
use ps_core::EmailMeta;
/// Raw representation of an email.
/// Contains the paths to the relevant files as well
/// as the name of the folder the email was in.
#[derive(Debug)]
pub struct RawEmailEntry {
#[allow(unused)]
folder_name: String,
eml_path: PathBuf,
gmail_meta_path: Option<PathBuf>,
is_compressed: bool,
#[allow(unused)]
size: u64,
}
impl RawEmailEntry {
pub fn path(&self) -> &Path {
self.eml_path.as_path()
}
pub fn read(&self) -> Result<Vec<u8>> {
if self.is_compressed {
let reader = std::fs::File::open(&self.eml_path)?;
let mut decoder = GzDecoder::new(reader);
let mut buffer = Vec::new();
decoder.read_to_end(&mut buffer)?;
Ok(buffer)
} else {
std::fs::read(&self.eml_path).map_err(|e| eyre!("IO Error: {}", &e))
}
}
pub fn has_gmail_meta(&self) -> bool {
self.gmail_meta_path.is_some()
}
pub fn read_gmail_meta(&self) -> Option<Result<Vec<u8>>> {
// Just using map here returns a `&Option` whereas we want `Option`
#[allow(clippy::manual_map)]
match &self.gmail_meta_path {
Some(p) => Some(std::fs::read(p).map_err(|e| eyre!("IO Error: {}", &e))),
None => None,
}
}
}
impl RawEmailEntry {
pub(super) fn new<P: AsRef<std::path::Path>>(path: P) -> Option<RawEmailEntry> {
let path = path.as_ref();
let stem = path.file_stem()?.to_str()?;
let name = path.file_name()?.to_str()?;
let is_eml_gz = name.ends_with(".eml.gz");
let is_eml = name.ends_with(".eml");
if !is_eml_gz && !is_eml {
return None;
}
let is_compressed = is_eml_gz;
let folder_name = path.parent()?.file_name()?.to_str()?.to_owned();
let eml_path = path.to_path_buf();
let file_metadata = path.metadata().ok()?;
// Build a meta path
let meta_path = path
.parent()?
.join(format!("{}.meta", stem.replace(".eml", "")));
// Only embed it, if it exists
let gmail_meta_path = if meta_path.exists() {
Some(meta_path)
} else {
None
};
tracing::trace!(
"Email [c?: {}] {} {:?}",
is_compressed,
eml_path.display(),
gmail_meta_path
);
Some(RawEmailEntry {
folder_name,
eml_path,
gmail_meta_path,
is_compressed,
size: file_metadata.len(),
})
}
}
impl ParseableEmail for RawEmailEntry {
fn prepare(&mut self) -> Result<()> {
Ok(())
}
fn message(&self) -> Result<Cow<'_, [u8]>> {
Ok(Cow::Owned(self.read()?))
}
fn path(&self) -> &Path {
self.eml_path.as_path()
}
fn meta(&self) -> Result<Option<EmailMeta>> {
if self.has_gmail_meta() {
Ok(Some(super::meta::parse_meta(self)?.into()))
} else {
Ok(None)
}
}
}

@ -0,0 +1,133 @@
//! FIXME: Implement our own Mailbox reader that better implements the spec.
//! use jetsci for efficient searching:
//! https://github.com/shepmaster/jetscii
//! (or aho corasick)
//! MBox parsing is also not particularly fast as it currently doesn't use parallelism
use eyre::eyre;
use mbox_reader;
use rayon::prelude::*;
use tracing;
use walkdir::WalkDir;
use super::{Config, ImporterFormat, Message, MessageSender, Result};
use super::shared::parse::ParseableEmail;
use ps_core::EmailMeta;
use std::borrow::Cow;
use std::path::{Path, PathBuf};
pub struct Mail {
path: PathBuf,
/// For now, we go with a very simple implementation:
/// Each mal will have a heap-allocated vec of the corresponding
/// bytes in the mbox.
/// This wastes a lot of allocations and shows the limits of our current abstraction.
/// It would be better to just save the headers and ignore the rest.
content: Vec<u8>,
}
#[derive(Default)]
pub struct Mbox;
/// The inner parsing code
fn inner_emails(config: &Config, sender: MessageSender) -> Result<Vec<Mail>> {
// find all files ending in .mbox
let mboxes: Vec<PathBuf> = WalkDir::new(&config.emails_folder_path)
.into_iter()
.filter_map(|e| match e {
Ok(n)
if n.path().is_file()
&& n.path()
.to_str()
.map(|e| e.contains(".mbox"))
.unwrap_or(false) =>
{
tracing::trace!("Found mbox file {}", n.path().display());
Some(n.path().to_path_buf())
}
Err(e) => {
tracing::info!("Could not read folder: {}", e);
if let Err(e) = sender.send(Message::Error(eyre!("Could not read folder: {:?}", e)))
{
tracing::error!("Error sending error {}", e);
}
None
}
_ => None,
})
.collect();
let mails: Vec<Mail> = mboxes
.into_par_iter()
.filter_map(|mbox_file| {
let mbox = match mbox_reader::MboxFile::from_file(&mbox_file) {
Ok(n) => n,
Err(e) => {
tracing::error!(
"Could not open mbox file at {}: {}",
&mbox_file.display(),
e
);
return None;
}
};
let inner_mails: Vec<Mail> = mbox
.iter()
.filter_map(|e| {
let content = match e.message() {
Some(n) => n,
None => {
tracing::error!("Could not parse mail at offset {}", e.offset());
return None;
}
};
Some(Mail {
path: mbox_file.clone(),
content: content.to_owned(),
})
})
.collect();
Some(inner_mails)
})
.flatten()
.collect();
Ok(mails)
}
impl ImporterFormat for Mbox {
type Item = Mail;
fn default_path() -> Option<std::path::PathBuf> {
None
}
fn emails(&self, config: &Config, sender: MessageSender) -> Result<Vec<Self::Item>> {
inner_emails(config, sender)
}
}
impl ParseableEmail for Mail {
fn prepare(&mut self) -> Result<()> {
Ok(())
}
fn message(&self) -> Result<Cow<'_, [u8]>> {
Ok(self.content.as_slice().into())
}
fn path(&self) -> &Path {
self.path.as_path()
}
fn meta(&self) -> Result<Option<EmailMeta>> {
// The filename is a tag, e.g. `INBOX.mbox`, `WORK.mbox`
if let Some(prefix) = self.path.file_stem() {
if let Some(s) = prefix.to_str().map(|s| s.to_owned()) {
return Ok(Some(EmailMeta {
tags: vec![s],
is_seen: false,
}));
}
}
Ok(None)
}
}

@ -0,0 +1,30 @@
use std::path::PathBuf;
pub use eyre::Result;
mod apple_mail;
mod gmailbackup;
mod mbox;
pub mod shared;
pub use apple_mail::AppleMail;
pub use gmailbackup::Gmail;
pub use mbox::Mbox;
use shared::parse::ParseableEmail;
pub use ps_core::{Config, Message, MessageReceiver, MessageSender};
/// This is implemented by the various formats
/// to define how they return email data.
pub trait ImporterFormat: Send + Sync {
type Item: ParseableEmail;
/// The default location path where the data for this format resides
/// on system. If there is none (such as for mbox) return `None`
fn default_path() -> Option<PathBuf>;
/// Return all the emails in this format.
/// Use the sneder to give progress updates via the `ReadProgress` case.
fn emails(&self, config: &Config, sender: MessageSender) -> Result<Vec<Self::Item>>;
}

@ -0,0 +1,78 @@
use super::parse::{parse_email, ParseableEmail};
use ps_core::{Config, DBMessage, DatabaseLike, Message, MessageSender};
use eyre::{bail, Result};
use rayon::prelude::*;
pub fn into_database<Mail: ParseableEmail + 'static, Database: DatabaseLike + 'static>(
config: &Config,
mut emails: Vec<Mail>,
tx: MessageSender,
database: Database,
) -> Result<usize> {
let total = emails.len();
tracing::info!("Loaded {} emails", &total);
// First, communicate the total amount of mails received
if let Err(e) = tx.send(Message::WriteTotal(total)) {
bail!("Channel Failure {:?}", &e);
}
// Save the config into the database
if let Err(e) = database.save_config(config.clone()) {
bail!("Could not save config to database {:?}", &e);
}
// Consume the connection to begin the import. It will return the `handle` to use for
// waiting for the database to finish importing, and the `sender` to submit work.
let (sender, handle) = database.import();
// Iterate over the mails..
emails
// in paralell..
.par_iter_mut()
// parsing them
.map(|raw_mail| parse_email(raw_mail, &config.sender_emails))
// and inserting them into SQLite
.for_each(|entry| {
// Try to write the message into the database
if let Err(e) = match entry {
Ok(mail) => sender.send(DBMessage::Mail(Box::new(mail))),
Err(e) => sender.send(DBMessage::Error(e)),
} {
tracing::error!("Error Inserting into Database: {:?}", &e);
}
// Signal the write
if let Err(e) = tx.send(Message::WriteOne) {
tracing::error!("Channel Failure: {:?}", &e);
}
});
// Tell SQLite there's no more work coming. This will exit the listening loop
if let Err(e) = sender.send(DBMessage::Done) {
bail!("Channel Failure {:?}", &e);
}
// Wait for SQLite to finish parsing
tracing::info!("Waiting for SQLite to finish");
if let Err(e) = tx.send(Message::FinishingUp) {
bail!("Channel Failure {:?}", &e);
}
tracing::trace!("Waiting for database handle...");
let output = match handle.join() {
Ok(Ok(count)) => Ok(count),
Ok(Err(e)) => Err(e),
Err(e) => Err(eyre::eyre!("Join Error: {:?}", &e)),
};
// Tell the caller that we're done processing. This will allow leaving the
// display loop
tracing::trace!("Messaging Done");
if let Err(e) = tx.send(Message::Done) {
bail!("Channel Failure {:?}", &e);
}
output
}

@ -0,0 +1,76 @@
use eyre::{bail, Result};
use rayon::prelude::*;
use tracing::trace;
use std::path::{Path, PathBuf};
use ps_core::{Message, MessageSender};
/// Call `FolderAction` on all files in all sub folders in
/// folder `folder`.
pub fn folders_in<FolderAction, ActionResult, P>(
folder: P,
sender: MessageSender,
action: FolderAction,
) -> Result<Vec<ActionResult>>
where
P: AsRef<Path>,
FolderAction: Fn(PathBuf, MessageSender) -> Result<Vec<ActionResult>> + Send + Sync,
ActionResult: Send,
{
let folder = folder.as_ref();
if !folder.exists() {
bail!("Folder {} does not exist", &folder.display());
}
// For progress reporting, we collect the iterator in order to
// know how many items there are.
let items: Vec<_> = std::fs::read_dir(&folder)?.collect();
let total = items.len();
sender.send(Message::ReadTotal(total))?;
Ok(items
.into_iter()
.par_bridge()
.filter_map(|entry| {
let path = entry
.map_err(|e| tracing::error!("{} {:?}", &folder.display(), &e))
.ok()?
.path();
if !path.is_dir() {
return None;
}
let sender = sender.clone();
trace!("Reading folder {}", path.display());
action(path.clone(), sender)
.map_err(|e| tracing::error!("{} {:?}", path.display(), &e))
.ok()
})
.flatten()
.collect())
}
pub fn emails_in<O, F, P: AsRef<Path>>(path: P, sender: MessageSender, make: F) -> Result<Vec<O>>
where
F: Fn(PathBuf) -> Option<O>,
F: Send + Sync + 'static,
O: Send + Sync,
{
let path = path.as_ref();
let result = Ok(std::fs::read_dir(path)?
.into_iter()
.par_bridge()
.filter_map(|entry| {
let path = entry
.map_err(|e| tracing::error!("{} {:?}", &path.display(), &e))
.ok()?
.path();
if path.is_dir() {
return None;
}
trace!("Reading {}", &path.display());
make(path)
})
.collect());
// We're done reading the folder
sender.send(Message::ReadOne).unwrap();
result
}

@ -0,0 +1,3 @@
pub mod database;
pub mod filesystem;
pub mod parse;

@ -0,0 +1,155 @@
use chrono::prelude::*;
use email_parser::address::{Address, EmailAddress, Mailbox};
use eyre::{eyre, Result};
use std::borrow::Cow;
use std::collections::HashSet;
use std::path::Path;
use ps_core::{EmailEntry, EmailMeta};
/// Different `importer`s can implement this trait to provide the necessary
/// data to parse their data into a `EmailEntry`.
pub trait ParseableEmail: Send + Sized + Sync {
/// This will be called once before `message`, `path` and `meta`
/// are called. It can be used to perform parsing operations
fn prepare(&mut self) -> Result<()>;
/// The message content as bytes
fn message(&self) -> Result<Cow<'_, [u8]>>;
/// The original path of the email in the filesystem
fn path(&self) -> &Path;
/// Optional meta information if they're available.
/// (Depending on the `importer` capabilities and system)
fn meta(&self) -> Result<Option<EmailMeta>>;
}
pub fn parse_email<Entry: ParseableEmail>(
entry: &mut Entry,
sender_emails: &HashSet<String>,
) -> Result<EmailEntry> {
if let Err(e) = entry.prepare() {
tracing::error!("Prepare Error: {:?}", e);
return Err(e);
}
let content = entry.message()?;
match email_parser::email::Email::parse(&content) {
Ok(email) => {
let path = entry.path();
tracing::trace!("Parsing {}", path.display());
let (sender_name, _, sender_local_part, sender_domain) =
mailbox_to_string(&email.sender);
let datetime = emaildatetime_to_chrono(&email.date);
let subject = email.subject.map(|e| e.to_string()).unwrap_or_default();
let to_count = match email.to.as_ref() {
Some(n) => n.len(),
None => 0,
};
let to = match email.to.as_ref().map(|v| v.first()).flatten() {
Some(n) => address_to_name_string(n),
None => None,
};
let to_group = to.as_ref().map(|e| e.0.clone()).flatten();
let to_first = to.as_ref().map(|e| (e.1.clone(), e.2.clone()));
let is_reply = email.in_reply_to.map(|v| !v.is_empty()).unwrap_or(false);
let meta = entry.meta()?;
// In order to determine the sender, we have to
// build up the address again :-(
let is_send = {
let email = format!("{}@{}", sender_local_part, sender_domain);
sender_emails.contains(&email)
};
Ok(EmailEntry {
path: path.to_path_buf(),
sender_domain,
sender_local_part,
sender_name,
datetime,
subject,
meta,
is_reply,
to_count,
to_group,
to_first,
is_send,
})
}
Err(error) => {
let error = eyre!(
"Could not parse email (trace to see contents): {:?} [{}]",
&error,
entry.path().display()
);
tracing::error!("{:?}", &error);
if let Ok(content_string) = String::from_utf8(content.into_owned()) {
tracing::trace!("Contents:\n{}\n---\n", content_string);
} else {
tracing::trace!("Contents:\nInvalid UTF8\n---\n");
}
Err(error)
}
}
}
/// Returns a conversion from address to the fields we care about:
/// ([group name], display name, email address)
fn address_to_name_string(address: &Address) -> Option<(Option<String>, String, String)> {
match address {
Address::Group((names, boxes)) => match (names.first(), boxes.first()) {
(group_name, Some(mailbox)) => {
let group = group_name.map(|e| e.to_string());
let (display_name, address, _, _) = mailbox_to_string(mailbox);
Some((group, display_name, address))
}
_ => None,
},
Address::Mailbox(mailbox) => {
let (display_name, address, _, _) = mailbox_to_string(mailbox);
Some((None, display_name, address))
}
}
}
/// Returns (display name, email address, local part, domain)
fn mailbox_to_string(mailbox: &Mailbox) -> (String, String, String, String) {
let names = match mailbox.name.as_ref() {
Some(n) => n
.iter()
.map(|e| e.as_ref())
.collect::<Vec<&str>>()
.join(" "),
None => "".to_owned(),
};
(
names,
emailaddress_to_string(&mailbox.address),
mailbox.address.local_part.to_string(),
mailbox.address.domain.to_string(),
)
}
fn emailaddress_to_string(address: &EmailAddress) -> String {
format!(
"{}@{}",
address.local_part.to_string(),
address.domain.to_string()
)
}
fn emaildatetime_to_chrono(dt: &email_parser::time::DateTime) -> chrono::DateTime<Utc> {
Utc.ymd(
dt.date.year as i32,
dt.date.month_number() as u32,
dt.date.day as u32,
)
.and_hms(
dt.time.time.hour as u32,
dt.time.time.minute as u32,
dt.time.time.second as u32,
)
}

@ -0,0 +1,61 @@
use std::thread::JoinHandle;
use super::formats::shared;
use ps_core::{
crossbeam_channel::{self, unbounded},
Config, DatabaseLike, Importerlike, Message, MessageReceiver,
};
use super::formats::ImporterFormat;
use eyre::Result;
pub struct Importer<Format: ImporterFormat> {
config: Config,
format: Format,
}
impl<Format: ImporterFormat + 'static> Importer<Format> {
pub fn new(config: Config, format: Format) -> Self {
Self { config, format }
}
}
impl<Format: ImporterFormat + 'static> Importerlike for Importer<Format> {
fn import<Database: DatabaseLike + 'static>(
self,
database: Database,
) -> Result<(MessageReceiver, JoinHandle<Result<()>>)> {
let Importer { format, .. } = self;
let (sender, receiver) = unbounded();
let config = self.config;
let handle: JoinHandle<Result<()>> = std::thread::spawn(move || {
let outer_sender = sender.clone();
let processed = move || {
let emails = format.emails(&config, sender.clone())?;
let processed =
shared::database::into_database(&config, emails, sender.clone(), database)?;
Ok(processed)
};
let result = processed();
// Send the error away and map it to a crossbeam channel error
match result {
Ok(_) => Ok(()),
Err(e) => match outer_sender.send(Message::Error(e)) {
Ok(_) => Ok(()),
Err(e) => Err(eyre::Report::new(e)),
},
}
});
Ok((receiver, handle))
}
}
// impl<T: Importerlike + Sized> Importerlike for Box<T> {
// fn import(self) -> Result<(MessageReceiver, JoinHandle<Result<()>>)> {
// (*self).import()
// }
// }

@ -0,0 +1,29 @@
pub(crate) mod formats;
#[allow(clippy::module_inception)]
mod importer;
mod message_adapter;
pub use message_adapter::*;
use ps_core::{Config, EmailEntry, EmailMeta, FormatType, Importerlike};
use formats::ImporterFormat;
// pub fn importer(config: &Config) -> Box<dyn Importerlike> {
// match config.format {
// AppleMail => Box::new(applemail_importer(config.clone())),
// GmailVault => Box::new(gmail_importer(config.clone())),
// Mbox => Box::new(gmail_importer(config.clone())),
// }
// }
pub fn gmail_importer(config: Config) -> importer::Importer<formats::Gmail> {
importer::Importer::new(config, formats::Gmail::default())
}
pub fn applemail_importer(config: Config) -> importer::Importer<formats::AppleMail> {
importer::Importer::new(config, formats::AppleMail::default())
}
pub fn mbox_importer(config: Config) -> importer::Importer<formats::Mbox> {
importer::Importer::new(config, formats::Mbox::default())
}

@ -0,0 +1,151 @@
use eyre::{bail, eyre, Report, Result};
use std::sync::{Arc, RwLock};
use std::thread::JoinHandle;
use super::formats::ImporterFormat;
use ps_core::{DatabaseLike, Importerlike, Message};
#[derive(Debug, Default)]
struct Data {
total_read: usize,
read: usize,
total_write: usize,
write: usize,
finishing: bool,
done: bool,
error: Option<Report>,
#[cfg(target_os = "macos")]
missing_permissions: bool,
}
#[derive(Clone, Debug, Copy)]
pub struct Progress {
pub total: usize,
pub count: usize,
}
#[derive(Clone, Debug, Copy)]
pub struct State {
pub finishing: bool,
pub done: bool,
pub written: usize,
#[cfg(target_os = "macos")]
pub missing_permissions: bool,
}
/// This can be initialized with a [`MessageSender`] and it will
/// automatically tally up the information into a thread-safe
/// datastructure
pub struct Adapter {
producer_lock: Arc<RwLock<Data>>,
consumer_lock: Arc<RwLock<Data>>,
}
impl Adapter {
#[allow(clippy::new_without_default)]
pub fn new() -> Self {
let rw_lock = Arc::new(RwLock::default());
// FIXME: Look up this warning. It looks like the clones are necessary?
#[allow(clippy::redundant_clone)]
let producer_lock = rw_lock.clone();
#[allow(clippy::redundant_clone)]
let consumer_lock = rw_lock.clone();
Self {
producer_lock,
consumer_lock,
}
}
/// Starts up a thread that handles the `MessageReceiver` messages
/// into state that can be accessed via [`read_count`], [`write_count`] and [`finished`]
pub fn process<Format: ImporterFormat + 'static, Database: DatabaseLike + 'static>(
&self,
importer: super::importer::Importer<Format>,
database: Database,
) -> Result<JoinHandle<Result<()>>> {
let (receiver, handle) = importer.import(database)?;
let lock = self.producer_lock.clone();
let handle = std::thread::spawn(move || {
'outer: loop {
let mut write_guard = match lock.write() {
Ok(n) => n,
Err(e) => bail!("RwLock Error: {:?}", e),
};
for entry in receiver.try_iter() {
match entry {
Message::ReadTotal(n) => write_guard.total_read = n,
Message::ReadOne => {
write_guard.read += 1;
// Depending on the implementation, we may receive read calls before
// the total size is known. We prevent division by zero by
// always setting the total to read + 1 in these cases
if write_guard.total_read <= write_guard.read {
write_guard.total_read = write_guard.read + 1;
}
}
Message::WriteTotal(n) => write_guard.total_write = n,
Message::WriteOne => write_guard.write += 1,
Message::FinishingUp => write_guard.finishing = true,
Message::Done => {
write_guard.done = true;
break 'outer;
}
Message::Error(e) => {
write_guard.error = Some(e);
}
#[cfg(target_os = "macos")]
Message::MissingPermissions => {
write_guard.missing_permissions = true;
}
};
}
}
let _ = handle.join().map_err(|op| eyre::eyre!("{:?}", &op))??;
Ok(())
});
Ok(handle)
}
pub fn read_count(&self) -> Result<Progress> {
let item = self.consumer_lock.read().map_err(|e| eyre!("{:?}", &e))?;
Ok(Progress {
total: item.total_read,
count: item.read,
})
}
pub fn write_count(&self) -> Result<Progress> {
let item = self.consumer_lock.read().map_err(|e| eyre!("{:?}", &e))?;
Ok(Progress {
total: item.total_write,
count: item.write,
})
}
pub fn finished(&self) -> Result<State> {
let item = self.consumer_lock.read().map_err(|e| eyre!("{:?}", &e))?;
Ok(State {
finishing: item.finishing,
done: item.done,
written: item.write,
#[cfg(target_os = "macos")]
missing_permissions: item.missing_permissions,
})
}
pub fn error(&self) -> Result<Option<Report>> {
// We take the error of out of the write lock only if there is an error.
let item = self.consumer_lock.read().map_err(|e| eyre!("{:?}", &e))?;
let is_error = item.error.is_some();
drop(item);
if is_error {
let mut item = self.producer_lock.write().map_err(|e| eyre!("{:?}", &e))?;
Ok(item.error.take())
} else {
Ok(None)
}
}
}
Loading…
Cancel
Save