diff --git a/distant-core/src/api/local/state/search.rs b/distant-core/src/api/local/state/search.rs index 9d3aa8a..bb0c1c4 100644 --- a/distant-core/src/api/local/state/search.rs +++ b/distant-core/src/api/local/state/search.rs @@ -256,7 +256,7 @@ impl SearchQueryReporter { struct SearchQueryExecutor { id: SearchId, query: SearchQuery, - walk_dir: WalkDir, + walk_dirs: Vec, matcher: RegexMatcher, cancel_tx: Option>, @@ -272,29 +272,34 @@ impl SearchQueryExecutor { let (cancel_tx, cancel_rx) = oneshot::channel(); let (match_tx, match_rx) = mpsc::unbounded_channel(); - let path = query.path.as_path(); - let follow_links = query.options.follow_symbolic_links; let regex = query.condition.to_regex_string(); - let matcher = RegexMatcher::new(®ex) .map_err(|x| io::Error::new(io::ErrorKind::InvalidInput, x))?; - let walk_dir = WalkDir::new(path).follow_links(follow_links); - let walk_dir = match query.options.min_depth.as_ref().copied() { - Some(depth) => walk_dir.min_depth(depth as usize), - None => walk_dir, - }; + let mut walk_dirs = Vec::new(); + for path in query.paths.iter() { + let path = path.as_path(); + let follow_links = query.options.follow_symbolic_links; + let walk_dir = WalkDir::new(path).follow_links(follow_links); - let walk_dir = match query.options.max_depth.as_ref().copied() { - Some(depth) => walk_dir.max_depth(depth as usize), - None => walk_dir, - }; + let walk_dir = match query.options.min_depth.as_ref().copied() { + Some(depth) => walk_dir.min_depth(depth as usize), + None => walk_dir, + }; + + let walk_dir = match query.options.max_depth.as_ref().copied() { + Some(depth) => walk_dir.max_depth(depth as usize), + None => walk_dir, + }; + + walk_dirs.push(walk_dir); + } Ok(Self { id: rand::random(), query, matcher, - walk_dir, + walk_dirs, cancel_tx: Some(cancel_tx), cancel_rx, @@ -329,7 +334,7 @@ impl SearchQueryExecutor { fn run(self) { let id = self.id; - let walk_dir = self.walk_dir; + let walk_dirs = self.walk_dirs; let tx = self.match_tx; let mut cancel = self.cancel_rx; @@ -374,54 +379,56 @@ impl SearchQueryExecutor { options: self.query.options.clone(), }; - // Search all entries for matches and report them - for entry in walk_dir - .into_iter() - .filter_map(|e| e.ok()) - .filter(|e| include_path_filter.filter(e.path())) - .filter(|e| !exclude_path_filter.filter(e.path())) - .filter(|e| options_filter.filter(e)) - { - // Check if we are being interrupted, and if so exit our loop early - match cancel.try_recv() { - Err(oneshot::error::TryRecvError::Empty) => (), - _ => { - debug!("[Query {id}] Cancelled"); - break; + for walk_dir in walk_dirs { + // Search all entries for matches and report them + for entry in walk_dir + .into_iter() + .filter_map(|e| e.ok()) + .filter(|e| include_path_filter.filter(e.path())) + .filter(|e| !exclude_path_filter.filter(e.path())) + .filter(|e| options_filter.filter(e)) + { + // Check if we are being interrupted, and if so exit our loop early + match cancel.try_recv() { + Err(oneshot::error::TryRecvError::Empty) => (), + _ => { + debug!("[Query {id}] Cancelled"); + break; + } } - } - let res = match self.query.target { - // Perform the search against the path itself - SearchQueryTarget::Path => { - let path_str = entry.path().to_string_lossy(); - Searcher::new().search_slice( + let res = match self.query.target { + // Perform the search against the path itself + SearchQueryTarget::Path => { + let path_str = entry.path().to_string_lossy(); + Searcher::new().search_slice( + &self.matcher, + path_str.as_bytes(), + SearchQueryPathSink { + search_id: id, + path: entry.path(), + matcher: &self.matcher, + callback: |m| Ok(tx.send(m).is_ok()), + }, + ) + } + + // Perform the search against the file's contents + SearchQueryTarget::Contents => Searcher::new().search_path( &self.matcher, - path_str.as_bytes(), - SearchQueryPathSink { + entry.path(), + SearchQueryContentsSink { search_id: id, path: entry.path(), matcher: &self.matcher, callback: |m| Ok(tx.send(m).is_ok()), }, - ) - } - - // Perform the search against the file's contents - SearchQueryTarget::Contents => Searcher::new().search_path( - &self.matcher, - entry.path(), - SearchQueryContentsSink { - search_id: id, - path: entry.path(), - matcher: &self.matcher, - callback: |m| Ok(tx.send(m).is_ok()), - }, - ), - }; + ), + }; - if let Err(x) = res { - error!("[Query {id}] Search failed for {:?}: {x}", entry.path()); + if let Err(x) = res { + error!("[Query {id}] Search failed for {:?}: {x}", entry.path()); + } } } } @@ -663,7 +670,7 @@ mod tests { let (reply, mut rx) = mpsc::channel(100); let query = SearchQuery { - path: root.path().to_path_buf(), + paths: vec![root.path().to_path_buf()], target: SearchQueryTarget::Path, condition: SearchQueryCondition::equals(""), options: Default::default(), @@ -693,7 +700,7 @@ mod tests { let (reply, mut rx) = mpsc::channel(100); let query = SearchQuery { - path: root.path().to_path_buf(), + paths: vec![root.path().to_path_buf()], target: SearchQueryTarget::Path, condition: SearchQueryCondition::regex("other"), options: Default::default(), @@ -770,7 +777,7 @@ mod tests { let (reply, mut rx) = mpsc::channel(100); let query = SearchQuery { - path: root.path().to_path_buf(), + paths: vec![root.path().to_path_buf()], target: SearchQueryTarget::Path, condition: SearchQueryCondition::regex("path"), options: Default::default(), @@ -848,7 +855,7 @@ mod tests { let (reply, mut rx) = mpsc::channel(100); let query = SearchQuery { - path: root.path().to_path_buf(), + paths: vec![root.path().to_path_buf()], target: SearchQueryTarget::Contents, condition: SearchQueryCondition::regex("text"), options: Default::default(), @@ -919,7 +926,7 @@ mod tests { let (reply, mut rx) = mpsc::channel(100); let query = SearchQuery { - path: root.path().to_path_buf(), + paths: vec![root.path().to_path_buf()], target: SearchQueryTarget::Contents, condition: SearchQueryCondition::regex(r"[abc][ab]"), options: Default::default(), @@ -1016,7 +1023,7 @@ mod tests { let (reply, mut rx) = mpsc::channel(100); let query = SearchQuery { - path: root.path().to_path_buf(), + paths: vec![root.path().to_path_buf()], target: SearchQueryTarget::Contents, condition: SearchQueryCondition::regex("text"), options: SearchQueryOptions { @@ -1112,7 +1119,7 @@ mod tests { let (reply, mut rx) = mpsc::channel(100); let query = SearchQuery { - path: root.path().to_path_buf(), + paths: vec![root.path().to_path_buf()], target: SearchQueryTarget::Contents, condition: SearchQueryCondition::regex("text"), options: SearchQueryOptions { @@ -1149,7 +1156,7 @@ mod tests { let (reply, mut rx) = mpsc::channel(100); let query = SearchQuery { - path: root.path().to_path_buf(), + paths: vec![root.path().to_path_buf()], target: SearchQueryTarget::Contents, condition: SearchQueryCondition::regex("text"), options: SearchQueryOptions { @@ -1194,7 +1201,7 @@ mod tests { let state = SearchState::new(); let (reply, mut rx) = mpsc::channel(100); let query = SearchQuery { - path: root.path().to_path_buf(), + paths: vec![root.path().to_path_buf()], target: SearchQueryTarget::Path, condition: SearchQueryCondition::regex(".*"), options: SearchQueryOptions { @@ -1304,7 +1311,7 @@ mod tests { let state = SearchState::new(); let (reply, mut rx) = mpsc::channel(100); let query = SearchQuery { - path: root.path().to_path_buf(), + paths: vec![root.path().to_path_buf()], target: SearchQueryTarget::Path, condition: SearchQueryCondition::regex(".*"), options: SearchQueryOptions { @@ -1396,7 +1403,7 @@ mod tests { let (reply, mut rx) = mpsc::channel(100); let query = SearchQuery { - path: root.path().to_path_buf(), + paths: vec![root.path().to_path_buf()], target: SearchQueryTarget::Contents, condition: SearchQueryCondition::regex("text"), options: SearchQueryOptions { @@ -1451,7 +1458,7 @@ mod tests { let (reply, mut rx) = mpsc::channel(100); let query = SearchQuery { - path: root.path().to_path_buf(), + paths: vec![root.path().to_path_buf()], target: SearchQueryTarget::Contents, condition: SearchQueryCondition::regex("text"), options: SearchQueryOptions { @@ -1522,7 +1529,7 @@ mod tests { // NOTE: We provide regex that matches an invalid UTF-8 character by disabling the u flag // and checking for 0x9F (159) let query = SearchQuery { - path: root.path().to_path_buf(), + paths: vec![root.path().to_path_buf()], target: SearchQueryTarget::Contents, condition: SearchQueryCondition::regex(r"(?-u:\x9F)"), options: Default::default(), @@ -1578,7 +1585,7 @@ mod tests { let (reply, mut rx) = mpsc::channel(100); let query = SearchQuery { - path: root.path().to_path_buf(), + paths: vec![root.path().to_path_buf()], target: SearchQueryTarget::Path, condition: SearchQueryCondition::regex(".*"), options: SearchQueryOptions { @@ -1665,7 +1672,7 @@ mod tests { let (reply, mut rx) = mpsc::channel(100); let query = SearchQuery { - path: root.path().to_path_buf(), + paths: vec![root.path().to_path_buf()], target: SearchQueryTarget::Path, condition: SearchQueryCondition::regex(".*"), options: SearchQueryOptions { @@ -1726,7 +1733,7 @@ mod tests { // type filter, it will evaluate the underlying type of symbolic links and filter // based on that instead of the the symbolic link let query = SearchQuery { - path: root.path().to_path_buf(), + paths: vec![root.path().to_path_buf()], target: SearchQueryTarget::Path, condition: SearchQueryCondition::regex(".*"), options: SearchQueryOptions { @@ -1762,4 +1769,74 @@ mod tests { assert_eq!(rx.recv().await, None); } + + #[tokio::test] + async fn should_support_being_supplied_more_than_one_path() { + let root = setup_dir(vec![ + ("path/to/file1.txt", "some\nlines of text in\na\nfile"), + ("path/to/file2.txt", "more text"), + ]); + + let state = SearchState::new(); + let (reply, mut rx) = mpsc::channel(100); + + let query = SearchQuery { + paths: vec![ + root.child(make_path("path/to/file1.txt")) + .path() + .to_path_buf(), + root.child(make_path("path/to/file2.txt")) + .path() + .to_path_buf(), + ], + target: SearchQueryTarget::Contents, + condition: SearchQueryCondition::regex("text"), + options: Default::default(), + }; + + let search_id = state.start(query, Box::new(reply)).await.unwrap(); + + let mut matches = get_matches(rx.recv().await.unwrap()) + .into_iter() + .filter_map(|m| m.into_contents_match()) + .collect::>(); + + matches.sort_unstable_by_key(|m| m.path.to_path_buf()); + + assert_eq!( + matches, + vec![ + SearchQueryContentsMatch { + path: root.child(make_path("path/to/file1.txt")).to_path_buf(), + lines: SearchQueryMatchData::text("lines of text in\n"), + line_number: 2, + absolute_offset: 5, + submatches: vec![SearchQuerySubmatch { + r#match: SearchQueryMatchData::Text("text".to_string()), + start: 9, + end: 13, + }] + }, + SearchQueryContentsMatch { + path: root.child(make_path("path/to/file2.txt")).to_path_buf(), + lines: SearchQueryMatchData::text("more text"), + line_number: 1, + absolute_offset: 0, + submatches: vec![SearchQuerySubmatch { + r#match: SearchQueryMatchData::Text("text".to_string()), + start: 5, + end: 9, + }] + } + ] + ); + + let data = rx.recv().await; + assert_eq!( + data, + Some(DistantResponseData::SearchDone { id: search_id }) + ); + + assert_eq!(rx.recv().await, None); + } } diff --git a/distant-core/src/client/searcher.rs b/distant-core/src/client/searcher.rs index 83a659e..4635331 100644 --- a/distant-core/src/client/searcher.rs +++ b/distant-core/src/client/searcher.rs @@ -216,7 +216,7 @@ mod tests { async fn searcher_should_have_query_reflect_ongoing_query() { let (mut transport, session) = make_session(); let test_query = SearchQuery { - path: PathBuf::from("/some/test/path"), + paths: vec![PathBuf::from("/some/test/path")], target: SearchQueryTarget::Path, condition: SearchQueryCondition::Regex { value: String::from("."), @@ -252,7 +252,7 @@ mod tests { async fn searcher_should_support_getting_next_match() { let (mut transport, session) = make_session(); let test_query = SearchQuery { - path: PathBuf::from("/some/test/path"), + paths: vec![PathBuf::from("/some/test/path")], target: SearchQueryTarget::Path, condition: SearchQueryCondition::Regex { value: String::from("."), @@ -371,7 +371,7 @@ mod tests { let (mut transport, session) = make_session(); let test_query = SearchQuery { - path: PathBuf::from("/some/test/path"), + paths: vec![PathBuf::from("/some/test/path")], target: SearchQueryTarget::Path, condition: SearchQueryCondition::Regex { value: String::from("."), @@ -492,7 +492,7 @@ mod tests { let (mut transport, session) = make_session(); let test_query = SearchQuery { - path: PathBuf::from("/some/test/path"), + paths: vec![PathBuf::from("/some/test/path")], target: SearchQueryTarget::Path, condition: SearchQueryCondition::Regex { value: String::from("."), diff --git a/distant-core/src/data.rs b/distant-core/src/data.rs index abeca95..fbab853 100644 --- a/distant-core/src/data.rs +++ b/distant-core/src/data.rs @@ -389,6 +389,7 @@ pub enum DistantRequestData { #[strum_discriminants(strum(message = "Supports searching filesystem using queries"))] Search { /// Query to perform against the filesystem + #[cfg_attr(feature = "clap", clap(flatten))] query: SearchQuery, }, diff --git a/distant-core/src/data/search.rs b/distant-core/src/data/search.rs index 62a0ea8..545834e 100644 --- a/distant-core/src/data/search.rs +++ b/distant-core/src/data/search.rs @@ -7,19 +7,24 @@ pub type SearchId = u32; /// Represents a query to perform against the filesystem #[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[cfg_attr(feature = "clap", derive(clap::Args))] #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] pub struct SearchQuery { - /// Path in which to perform the query - pub path: PathBuf, - - /// Kind of data to example using conditions + /// Kind of data to examine using condition + #[cfg_attr(feature = "clap", clap(long, value_enum, default_value_t = SearchQueryTarget::Contents))] pub target: SearchQueryTarget, /// Condition to meet to be considered a match + #[cfg_attr(feature = "clap", clap(name = "pattern"))] pub condition: SearchQueryCondition, + /// Paths in which to perform the query + #[cfg_attr(feature = "clap", clap(default_value = "."))] + pub paths: Vec, + /// Options to apply to the query #[serde(default)] + #[cfg_attr(feature = "clap", clap(flatten))] pub options: SearchQueryOptions, } @@ -41,7 +46,9 @@ impl FromStr for SearchQuery { /// Kind of data to examine using conditions #[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)] +#[cfg_attr(feature = "clap", derive(clap::ArgEnum))] #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] +#[cfg_attr(feature = "clap", clap(rename_all = "snake_case"))] #[serde(rename_all = "snake_case")] pub enum SearchQueryTarget { /// Checks path of file, directory, or symlink @@ -123,28 +130,43 @@ impl SearchQueryCondition { } } +impl FromStr for SearchQueryCondition { + type Err = std::convert::Infallible; + + /// Parses search query from a JSON string + fn from_str(s: &str) -> Result { + Ok(Self::regex(s)) + } +} + /// Options associated with a search query #[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] +#[cfg_attr(feature = "clap", derive(clap::Args))] #[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))] pub struct SearchQueryOptions { /// Restrict search to only these file types (otherwise all are allowed) + #[cfg_attr(feature = "clap", clap(skip))] #[serde(default)] pub allowed_file_types: HashSet, /// Regex to use to filter paths being searched to only those that match the include condition + #[cfg_attr(feature = "clap", clap(long))] #[serde(default)] pub include: Option, /// Regex to use to filter paths being searched to only those that do not match the exclude /// condition + #[cfg_attr(feature = "clap", clap(long))] #[serde(default)] pub exclude: Option, /// Search should follow symbolic links + #[cfg_attr(feature = "clap", clap(long))] #[serde(default)] pub follow_symbolic_links: bool, /// Maximum results to return before stopping the query + #[cfg_attr(feature = "clap", clap(long))] #[serde(default)] pub limit: Option, @@ -153,6 +175,7 @@ pub struct SearchQueryOptions { /// The smallest depth is 0 and always corresponds to the path given to the new function on /// this type. Its direct descendents have depth 1, and their descendents have depth 2, and so /// on. + #[cfg_attr(feature = "clap", clap(long))] #[serde(default)] pub min_depth: Option, @@ -164,11 +187,13 @@ pub struct SearchQueryOptions { /// /// Note that this will not simply filter the entries of the iterator, but it will actually /// avoid descending into directories when the depth is exceeded. + #[cfg_attr(feature = "clap", clap(long))] #[serde(default)] pub max_depth: Option, /// Amount of results to batch before sending back excluding final submission that will always /// include the remaining results even if less than pagination request + #[cfg_attr(feature = "clap", clap(long))] #[serde(default)] pub pagination: Option, } diff --git a/tests/cli/action/search.rs b/tests/cli/action/search.rs index a650e78..7af972d 100644 --- a/tests/cli/action/search.rs +++ b/tests/cli/action/search.rs @@ -4,7 +4,6 @@ use assert_fs::prelude::*; use indoc::indoc; use predicates::Predicate; use rstest::*; -use serde_json::json; const SEARCH_RESULTS_REGEX: &str = indoc! {r" .*?[\\/]file1.txt @@ -26,12 +25,6 @@ fn should_search_filesystem_using_query(mut action_cmd: CtxCommand) { .unwrap(); root.child("file3.txt").write_str("more content").unwrap(); - let query = json!({ - "path": root.path().to_string_lossy(), - "target": "contents", - "condition": {"type": "regex", "value": "te[a-z]*\\b"}, - }); - let stdout_predicate_fn = predicates::function::function(|s: &[u8]| { let s = std::str::from_utf8(s).unwrap(); @@ -54,7 +47,8 @@ fn should_search_filesystem_using_query(mut action_cmd: CtxCommand) { // distant action system-info action_cmd .arg("search") - .arg(&serde_json::to_string(&query).unwrap()) + .arg("te[a-z]*\\b") + .arg(root.path()) .assert() .success() .stdout(stdout_predicate_fn) diff --git a/tests/cli/repl/search.rs b/tests/cli/repl/search.rs index 7622d82..3a6f2bb 100644 --- a/tests/cli/repl/search.rs +++ b/tests/cli/repl/search.rs @@ -19,7 +19,7 @@ async fn should_support_json_search_filesystem_using_query(mut json_repl: CtxCom "payload": { "type": "search", "query": { - "path": root.path().to_string_lossy(), + "paths": [root.path().to_string_lossy()], "target": "contents", "condition": {"type": "regex", "value": "ua"}, },