Refactor SearchQuery to support multiple paths at once

pull/137/head
Chip Senkbeil 2 years ago
parent 01610a3ac7
commit cae6c5e244
No known key found for this signature in database
GPG Key ID: 35EF1F8EC72A4131

@ -256,7 +256,7 @@ impl SearchQueryReporter {
struct SearchQueryExecutor {
id: SearchId,
query: SearchQuery,
walk_dir: WalkDir,
walk_dirs: Vec<WalkDir>,
matcher: RegexMatcher,
cancel_tx: Option<oneshot::Sender<()>>,
@ -272,29 +272,34 @@ impl SearchQueryExecutor {
let (cancel_tx, cancel_rx) = oneshot::channel();
let (match_tx, match_rx) = mpsc::unbounded_channel();
let path = query.path.as_path();
let follow_links = query.options.follow_symbolic_links;
let regex = query.condition.to_regex_string();
let matcher = RegexMatcher::new(&regex)
.map_err(|x| io::Error::new(io::ErrorKind::InvalidInput, x))?;
let walk_dir = WalkDir::new(path).follow_links(follow_links);
let walk_dir = match query.options.min_depth.as_ref().copied() {
Some(depth) => walk_dir.min_depth(depth as usize),
None => walk_dir,
};
let mut walk_dirs = Vec::new();
for path in query.paths.iter() {
let path = path.as_path();
let follow_links = query.options.follow_symbolic_links;
let walk_dir = WalkDir::new(path).follow_links(follow_links);
let walk_dir = match query.options.max_depth.as_ref().copied() {
Some(depth) => walk_dir.max_depth(depth as usize),
None => walk_dir,
};
let walk_dir = match query.options.min_depth.as_ref().copied() {
Some(depth) => walk_dir.min_depth(depth as usize),
None => walk_dir,
};
let walk_dir = match query.options.max_depth.as_ref().copied() {
Some(depth) => walk_dir.max_depth(depth as usize),
None => walk_dir,
};
walk_dirs.push(walk_dir);
}
Ok(Self {
id: rand::random(),
query,
matcher,
walk_dir,
walk_dirs,
cancel_tx: Some(cancel_tx),
cancel_rx,
@ -329,7 +334,7 @@ impl SearchQueryExecutor {
fn run(self) {
let id = self.id;
let walk_dir = self.walk_dir;
let walk_dirs = self.walk_dirs;
let tx = self.match_tx;
let mut cancel = self.cancel_rx;
@ -374,54 +379,56 @@ impl SearchQueryExecutor {
options: self.query.options.clone(),
};
// Search all entries for matches and report them
for entry in walk_dir
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| include_path_filter.filter(e.path()))
.filter(|e| !exclude_path_filter.filter(e.path()))
.filter(|e| options_filter.filter(e))
{
// Check if we are being interrupted, and if so exit our loop early
match cancel.try_recv() {
Err(oneshot::error::TryRecvError::Empty) => (),
_ => {
debug!("[Query {id}] Cancelled");
break;
for walk_dir in walk_dirs {
// Search all entries for matches and report them
for entry in walk_dir
.into_iter()
.filter_map(|e| e.ok())
.filter(|e| include_path_filter.filter(e.path()))
.filter(|e| !exclude_path_filter.filter(e.path()))
.filter(|e| options_filter.filter(e))
{
// Check if we are being interrupted, and if so exit our loop early
match cancel.try_recv() {
Err(oneshot::error::TryRecvError::Empty) => (),
_ => {
debug!("[Query {id}] Cancelled");
break;
}
}
}
let res = match self.query.target {
// Perform the search against the path itself
SearchQueryTarget::Path => {
let path_str = entry.path().to_string_lossy();
Searcher::new().search_slice(
let res = match self.query.target {
// Perform the search against the path itself
SearchQueryTarget::Path => {
let path_str = entry.path().to_string_lossy();
Searcher::new().search_slice(
&self.matcher,
path_str.as_bytes(),
SearchQueryPathSink {
search_id: id,
path: entry.path(),
matcher: &self.matcher,
callback: |m| Ok(tx.send(m).is_ok()),
},
)
}
// Perform the search against the file's contents
SearchQueryTarget::Contents => Searcher::new().search_path(
&self.matcher,
path_str.as_bytes(),
SearchQueryPathSink {
entry.path(),
SearchQueryContentsSink {
search_id: id,
path: entry.path(),
matcher: &self.matcher,
callback: |m| Ok(tx.send(m).is_ok()),
},
)
}
// Perform the search against the file's contents
SearchQueryTarget::Contents => Searcher::new().search_path(
&self.matcher,
entry.path(),
SearchQueryContentsSink {
search_id: id,
path: entry.path(),
matcher: &self.matcher,
callback: |m| Ok(tx.send(m).is_ok()),
},
),
};
),
};
if let Err(x) = res {
error!("[Query {id}] Search failed for {:?}: {x}", entry.path());
if let Err(x) = res {
error!("[Query {id}] Search failed for {:?}: {x}", entry.path());
}
}
}
}
@ -663,7 +670,7 @@ mod tests {
let (reply, mut rx) = mpsc::channel(100);
let query = SearchQuery {
path: root.path().to_path_buf(),
paths: vec![root.path().to_path_buf()],
target: SearchQueryTarget::Path,
condition: SearchQueryCondition::equals(""),
options: Default::default(),
@ -693,7 +700,7 @@ mod tests {
let (reply, mut rx) = mpsc::channel(100);
let query = SearchQuery {
path: root.path().to_path_buf(),
paths: vec![root.path().to_path_buf()],
target: SearchQueryTarget::Path,
condition: SearchQueryCondition::regex("other"),
options: Default::default(),
@ -770,7 +777,7 @@ mod tests {
let (reply, mut rx) = mpsc::channel(100);
let query = SearchQuery {
path: root.path().to_path_buf(),
paths: vec![root.path().to_path_buf()],
target: SearchQueryTarget::Path,
condition: SearchQueryCondition::regex("path"),
options: Default::default(),
@ -848,7 +855,7 @@ mod tests {
let (reply, mut rx) = mpsc::channel(100);
let query = SearchQuery {
path: root.path().to_path_buf(),
paths: vec![root.path().to_path_buf()],
target: SearchQueryTarget::Contents,
condition: SearchQueryCondition::regex("text"),
options: Default::default(),
@ -919,7 +926,7 @@ mod tests {
let (reply, mut rx) = mpsc::channel(100);
let query = SearchQuery {
path: root.path().to_path_buf(),
paths: vec![root.path().to_path_buf()],
target: SearchQueryTarget::Contents,
condition: SearchQueryCondition::regex(r"[abc][ab]"),
options: Default::default(),
@ -1016,7 +1023,7 @@ mod tests {
let (reply, mut rx) = mpsc::channel(100);
let query = SearchQuery {
path: root.path().to_path_buf(),
paths: vec![root.path().to_path_buf()],
target: SearchQueryTarget::Contents,
condition: SearchQueryCondition::regex("text"),
options: SearchQueryOptions {
@ -1112,7 +1119,7 @@ mod tests {
let (reply, mut rx) = mpsc::channel(100);
let query = SearchQuery {
path: root.path().to_path_buf(),
paths: vec![root.path().to_path_buf()],
target: SearchQueryTarget::Contents,
condition: SearchQueryCondition::regex("text"),
options: SearchQueryOptions {
@ -1149,7 +1156,7 @@ mod tests {
let (reply, mut rx) = mpsc::channel(100);
let query = SearchQuery {
path: root.path().to_path_buf(),
paths: vec![root.path().to_path_buf()],
target: SearchQueryTarget::Contents,
condition: SearchQueryCondition::regex("text"),
options: SearchQueryOptions {
@ -1194,7 +1201,7 @@ mod tests {
let state = SearchState::new();
let (reply, mut rx) = mpsc::channel(100);
let query = SearchQuery {
path: root.path().to_path_buf(),
paths: vec![root.path().to_path_buf()],
target: SearchQueryTarget::Path,
condition: SearchQueryCondition::regex(".*"),
options: SearchQueryOptions {
@ -1304,7 +1311,7 @@ mod tests {
let state = SearchState::new();
let (reply, mut rx) = mpsc::channel(100);
let query = SearchQuery {
path: root.path().to_path_buf(),
paths: vec![root.path().to_path_buf()],
target: SearchQueryTarget::Path,
condition: SearchQueryCondition::regex(".*"),
options: SearchQueryOptions {
@ -1396,7 +1403,7 @@ mod tests {
let (reply, mut rx) = mpsc::channel(100);
let query = SearchQuery {
path: root.path().to_path_buf(),
paths: vec![root.path().to_path_buf()],
target: SearchQueryTarget::Contents,
condition: SearchQueryCondition::regex("text"),
options: SearchQueryOptions {
@ -1451,7 +1458,7 @@ mod tests {
let (reply, mut rx) = mpsc::channel(100);
let query = SearchQuery {
path: root.path().to_path_buf(),
paths: vec![root.path().to_path_buf()],
target: SearchQueryTarget::Contents,
condition: SearchQueryCondition::regex("text"),
options: SearchQueryOptions {
@ -1522,7 +1529,7 @@ mod tests {
// NOTE: We provide regex that matches an invalid UTF-8 character by disabling the u flag
// and checking for 0x9F (159)
let query = SearchQuery {
path: root.path().to_path_buf(),
paths: vec![root.path().to_path_buf()],
target: SearchQueryTarget::Contents,
condition: SearchQueryCondition::regex(r"(?-u:\x9F)"),
options: Default::default(),
@ -1578,7 +1585,7 @@ mod tests {
let (reply, mut rx) = mpsc::channel(100);
let query = SearchQuery {
path: root.path().to_path_buf(),
paths: vec![root.path().to_path_buf()],
target: SearchQueryTarget::Path,
condition: SearchQueryCondition::regex(".*"),
options: SearchQueryOptions {
@ -1665,7 +1672,7 @@ mod tests {
let (reply, mut rx) = mpsc::channel(100);
let query = SearchQuery {
path: root.path().to_path_buf(),
paths: vec![root.path().to_path_buf()],
target: SearchQueryTarget::Path,
condition: SearchQueryCondition::regex(".*"),
options: SearchQueryOptions {
@ -1726,7 +1733,7 @@ mod tests {
// type filter, it will evaluate the underlying type of symbolic links and filter
// based on that instead of the the symbolic link
let query = SearchQuery {
path: root.path().to_path_buf(),
paths: vec![root.path().to_path_buf()],
target: SearchQueryTarget::Path,
condition: SearchQueryCondition::regex(".*"),
options: SearchQueryOptions {
@ -1762,4 +1769,74 @@ mod tests {
assert_eq!(rx.recv().await, None);
}
#[tokio::test]
async fn should_support_being_supplied_more_than_one_path() {
let root = setup_dir(vec![
("path/to/file1.txt", "some\nlines of text in\na\nfile"),
("path/to/file2.txt", "more text"),
]);
let state = SearchState::new();
let (reply, mut rx) = mpsc::channel(100);
let query = SearchQuery {
paths: vec![
root.child(make_path("path/to/file1.txt"))
.path()
.to_path_buf(),
root.child(make_path("path/to/file2.txt"))
.path()
.to_path_buf(),
],
target: SearchQueryTarget::Contents,
condition: SearchQueryCondition::regex("text"),
options: Default::default(),
};
let search_id = state.start(query, Box::new(reply)).await.unwrap();
let mut matches = get_matches(rx.recv().await.unwrap())
.into_iter()
.filter_map(|m| m.into_contents_match())
.collect::<Vec<_>>();
matches.sort_unstable_by_key(|m| m.path.to_path_buf());
assert_eq!(
matches,
vec![
SearchQueryContentsMatch {
path: root.child(make_path("path/to/file1.txt")).to_path_buf(),
lines: SearchQueryMatchData::text("lines of text in\n"),
line_number: 2,
absolute_offset: 5,
submatches: vec![SearchQuerySubmatch {
r#match: SearchQueryMatchData::Text("text".to_string()),
start: 9,
end: 13,
}]
},
SearchQueryContentsMatch {
path: root.child(make_path("path/to/file2.txt")).to_path_buf(),
lines: SearchQueryMatchData::text("more text"),
line_number: 1,
absolute_offset: 0,
submatches: vec![SearchQuerySubmatch {
r#match: SearchQueryMatchData::Text("text".to_string()),
start: 5,
end: 9,
}]
}
]
);
let data = rx.recv().await;
assert_eq!(
data,
Some(DistantResponseData::SearchDone { id: search_id })
);
assert_eq!(rx.recv().await, None);
}
}

@ -216,7 +216,7 @@ mod tests {
async fn searcher_should_have_query_reflect_ongoing_query() {
let (mut transport, session) = make_session();
let test_query = SearchQuery {
path: PathBuf::from("/some/test/path"),
paths: vec![PathBuf::from("/some/test/path")],
target: SearchQueryTarget::Path,
condition: SearchQueryCondition::Regex {
value: String::from("."),
@ -252,7 +252,7 @@ mod tests {
async fn searcher_should_support_getting_next_match() {
let (mut transport, session) = make_session();
let test_query = SearchQuery {
path: PathBuf::from("/some/test/path"),
paths: vec![PathBuf::from("/some/test/path")],
target: SearchQueryTarget::Path,
condition: SearchQueryCondition::Regex {
value: String::from("."),
@ -371,7 +371,7 @@ mod tests {
let (mut transport, session) = make_session();
let test_query = SearchQuery {
path: PathBuf::from("/some/test/path"),
paths: vec![PathBuf::from("/some/test/path")],
target: SearchQueryTarget::Path,
condition: SearchQueryCondition::Regex {
value: String::from("."),
@ -492,7 +492,7 @@ mod tests {
let (mut transport, session) = make_session();
let test_query = SearchQuery {
path: PathBuf::from("/some/test/path"),
paths: vec![PathBuf::from("/some/test/path")],
target: SearchQueryTarget::Path,
condition: SearchQueryCondition::Regex {
value: String::from("."),

@ -389,6 +389,7 @@ pub enum DistantRequestData {
#[strum_discriminants(strum(message = "Supports searching filesystem using queries"))]
Search {
/// Query to perform against the filesystem
#[cfg_attr(feature = "clap", clap(flatten))]
query: SearchQuery,
},

@ -7,19 +7,24 @@ pub type SearchId = u32;
/// Represents a query to perform against the filesystem
#[derive(Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[cfg_attr(feature = "clap", derive(clap::Args))]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct SearchQuery {
/// Path in which to perform the query
pub path: PathBuf,
/// Kind of data to example using conditions
/// Kind of data to examine using condition
#[cfg_attr(feature = "clap", clap(long, value_enum, default_value_t = SearchQueryTarget::Contents))]
pub target: SearchQueryTarget,
/// Condition to meet to be considered a match
#[cfg_attr(feature = "clap", clap(name = "pattern"))]
pub condition: SearchQueryCondition,
/// Paths in which to perform the query
#[cfg_attr(feature = "clap", clap(default_value = "."))]
pub paths: Vec<PathBuf>,
/// Options to apply to the query
#[serde(default)]
#[cfg_attr(feature = "clap", clap(flatten))]
pub options: SearchQueryOptions,
}
@ -41,7 +46,9 @@ impl FromStr for SearchQuery {
/// Kind of data to examine using conditions
#[derive(Copy, Clone, Debug, PartialEq, Eq, Serialize, Deserialize)]
#[cfg_attr(feature = "clap", derive(clap::ArgEnum))]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
#[cfg_attr(feature = "clap", clap(rename_all = "snake_case"))]
#[serde(rename_all = "snake_case")]
pub enum SearchQueryTarget {
/// Checks path of file, directory, or symlink
@ -123,28 +130,43 @@ impl SearchQueryCondition {
}
}
impl FromStr for SearchQueryCondition {
type Err = std::convert::Infallible;
/// Parses search query from a JSON string
fn from_str(s: &str) -> Result<Self, Self::Err> {
Ok(Self::regex(s))
}
}
/// Options associated with a search query
#[derive(Clone, Debug, Default, PartialEq, Eq, Serialize, Deserialize)]
#[cfg_attr(feature = "clap", derive(clap::Args))]
#[cfg_attr(feature = "schemars", derive(schemars::JsonSchema))]
pub struct SearchQueryOptions {
/// Restrict search to only these file types (otherwise all are allowed)
#[cfg_attr(feature = "clap", clap(skip))]
#[serde(default)]
pub allowed_file_types: HashSet<FileType>,
/// Regex to use to filter paths being searched to only those that match the include condition
#[cfg_attr(feature = "clap", clap(long))]
#[serde(default)]
pub include: Option<SearchQueryCondition>,
/// Regex to use to filter paths being searched to only those that do not match the exclude
/// condition
#[cfg_attr(feature = "clap", clap(long))]
#[serde(default)]
pub exclude: Option<SearchQueryCondition>,
/// Search should follow symbolic links
#[cfg_attr(feature = "clap", clap(long))]
#[serde(default)]
pub follow_symbolic_links: bool,
/// Maximum results to return before stopping the query
#[cfg_attr(feature = "clap", clap(long))]
#[serde(default)]
pub limit: Option<u64>,
@ -153,6 +175,7 @@ pub struct SearchQueryOptions {
/// The smallest depth is 0 and always corresponds to the path given to the new function on
/// this type. Its direct descendents have depth 1, and their descendents have depth 2, and so
/// on.
#[cfg_attr(feature = "clap", clap(long))]
#[serde(default)]
pub min_depth: Option<u64>,
@ -164,11 +187,13 @@ pub struct SearchQueryOptions {
///
/// Note that this will not simply filter the entries of the iterator, but it will actually
/// avoid descending into directories when the depth is exceeded.
#[cfg_attr(feature = "clap", clap(long))]
#[serde(default)]
pub max_depth: Option<u64>,
/// Amount of results to batch before sending back excluding final submission that will always
/// include the remaining results even if less than pagination request
#[cfg_attr(feature = "clap", clap(long))]
#[serde(default)]
pub pagination: Option<u64>,
}

@ -4,7 +4,6 @@ use assert_fs::prelude::*;
use indoc::indoc;
use predicates::Predicate;
use rstest::*;
use serde_json::json;
const SEARCH_RESULTS_REGEX: &str = indoc! {r"
.*?[\\/]file1.txt
@ -26,12 +25,6 @@ fn should_search_filesystem_using_query(mut action_cmd: CtxCommand<Command>) {
.unwrap();
root.child("file3.txt").write_str("more content").unwrap();
let query = json!({
"path": root.path().to_string_lossy(),
"target": "contents",
"condition": {"type": "regex", "value": "te[a-z]*\\b"},
});
let stdout_predicate_fn = predicates::function::function(|s: &[u8]| {
let s = std::str::from_utf8(s).unwrap();
@ -54,7 +47,8 @@ fn should_search_filesystem_using_query(mut action_cmd: CtxCommand<Command>) {
// distant action system-info
action_cmd
.arg("search")
.arg(&serde_json::to_string(&query).unwrap())
.arg("te[a-z]*\\b")
.arg(root.path())
.assert()
.success()
.stdout(stdout_predicate_fn)

@ -19,7 +19,7 @@ async fn should_support_json_search_filesystem_using_query(mut json_repl: CtxCom
"payload": {
"type": "search",
"query": {
"path": root.path().to_string_lossy(),
"paths": [root.path().to_string_lossy()],
"target": "contents",
"condition": {"type": "regex", "value": "ua"},
},

Loading…
Cancel
Save