From 02b717ae54b0974ec0181c797214f3c176e9c0e8 Mon Sep 17 00:00:00 2001 From: Vincent Flyson Date: Mon, 26 Aug 2019 23:17:36 -0400 Subject: [PATCH] Add flag to ignore errors related to TLS certificates --- Cargo.toml | 2 +- README.md | 1 + src/html.rs | 128 +++++++++++++++++++++++++++++----------------------- src/http.rs | 4 +- src/main.rs | 12 ++++- 5 files changed, 88 insertions(+), 59 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 3490512..e15cd6d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "monolith" -version = "2.0.14" +version = "2.0.16" authors = [ "Sunshine ", "Mahdi Robatipoor ", diff --git a/README.md b/README.md index bb15527..52253b8 100644 --- a/README.md +++ b/README.md @@ -23,6 +23,7 @@ If compared to saving websites with `wget -mpk`, this tool embeds all assets as ### Options - `-i`: Remove images - `-j`: Exclude JavaScript + - `-k`: Accept invalid X.509 (TLS) certificates - `-s`: Silent mode - `-u`: Specify custom User-Agent diff --git a/src/html.rs b/src/html.rs index 8aadb1a..df197e4 100644 --- a/src/html.rs +++ b/src/html.rs @@ -70,18 +70,20 @@ pub fn walk_and_embed_assets( opt_no_images: bool, opt_user_agent: &str, opt_silent: bool, + opt_insecure: bool, ) { match node.data { NodeData::Document => { // Dig deeper for child in node.children.borrow().iter() { walk_and_embed_assets( - &url, child, - opt_no_js, - opt_no_images, - opt_user_agent, - opt_silent, - ); + &url, child, + opt_no_js, + opt_no_images, + opt_user_agent, + opt_silent, + opt_insecure, + ); } } NodeData::Doctype { .. } => {} @@ -128,12 +130,14 @@ pub fn walk_and_embed_assets( ) .unwrap_or(EMPTY_STRING.clone()); let favicon_datauri = retrieve_asset( - &href_full_url, - true, - "", - opt_user_agent, - opt_silent, - ).unwrap_or(EMPTY_STRING.clone()); + &href_full_url, + true, + "", + opt_user_agent, + opt_silent, + opt_insecure, + ) + .unwrap_or(EMPTY_STRING.clone()); attr.value.clear(); attr.value.push_slice(favicon_datauri.as_str()); } @@ -148,12 +152,14 @@ pub fn walk_and_embed_assets( ) .unwrap_or(EMPTY_STRING.clone()); let css_datauri = retrieve_asset( - &href_full_url, - true, - "text/css", - opt_user_agent, - opt_silent, - ).unwrap_or(EMPTY_STRING.clone()); + &href_full_url, + true, + "text/css", + opt_user_agent, + opt_silent, + opt_insecure, + ) + .unwrap_or(EMPTY_STRING.clone()); attr.value.clear(); attr.value.push_slice(css_datauri.as_str()); } @@ -185,12 +191,14 @@ pub fn walk_and_embed_assets( ) .unwrap_or(EMPTY_STRING.clone()); let img_datauri = retrieve_asset( - &src_full_url, - true, - "", - opt_user_agent, - opt_silent, - ).unwrap_or(EMPTY_STRING.clone()); + &src_full_url, + true, + "", + opt_user_agent, + opt_silent, + opt_insecure, + ) + .unwrap_or(EMPTY_STRING.clone()); attr.value.clear(); attr.value.push_slice(img_datauri.as_str()); } @@ -211,12 +219,14 @@ pub fn walk_and_embed_assets( ) .unwrap_or(EMPTY_STRING.clone()); let source_datauri = retrieve_asset( - &srcset_full_url, - true, - "", - opt_user_agent, - opt_silent, - ).unwrap_or(EMPTY_STRING.clone()); + &srcset_full_url, + true, + "", + opt_user_agent, + opt_silent, + opt_insecure, + ) + .unwrap_or(EMPTY_STRING.clone()); attr.value.clear(); attr.value.push_slice(source_datauri.as_str()); } @@ -257,12 +267,14 @@ pub fn walk_and_embed_assets( ) .unwrap_or(EMPTY_STRING.clone()); let js_datauri = retrieve_asset( - &src_full_url, - true, - "application/javascript", - opt_user_agent, - opt_silent, - ).unwrap_or(EMPTY_STRING.clone()); + &src_full_url, + true, + "application/javascript", + opt_user_agent, + opt_silent, + opt_insecure, + ) + .unwrap_or(EMPTY_STRING.clone()); attr.value.clear(); attr.value.push_slice(js_datauri.as_str()); } @@ -290,21 +302,24 @@ pub fn walk_and_embed_assets( let src_full_url: String = resolve_url(&url, &attr.value.to_string()) .unwrap_or(EMPTY_STRING.clone()); let iframe_data = retrieve_asset( - &src_full_url, - false, - "text/html", - opt_user_agent, - opt_silent, - ).unwrap_or(EMPTY_STRING.clone()); + &src_full_url, + false, + "text/html", + opt_user_agent, + opt_silent, + opt_insecure, + ) + .unwrap_or(EMPTY_STRING.clone()); let dom = html_to_dom(&iframe_data); walk_and_embed_assets( - &src_full_url, - &dom.document, - opt_no_js, - opt_no_images, - opt_user_agent, - opt_silent, - ); + &src_full_url, + &dom.document, + opt_no_js, + opt_no_images, + opt_user_agent, + opt_silent, + opt_insecure, + ); let mut buf: Vec = Vec::new(); serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); let iframe_datauri = data_to_dataurl("text/html", &buf); @@ -328,13 +343,14 @@ pub fn walk_and_embed_assets( // Dig deeper for child in node.children.borrow().iter() { walk_and_embed_assets( - &url, - child, - opt_no_js, - opt_no_images, - opt_user_agent, - opt_silent, - ); + &url, + child, + opt_no_js, + opt_no_images, + opt_user_agent, + opt_silent, + opt_insecure, + ); } } NodeData::ProcessingInstruction { .. } => unreachable!() diff --git a/src/http.rs b/src/http.rs index af39b90..d48ab42 100644 --- a/src/http.rs +++ b/src/http.rs @@ -36,13 +36,15 @@ pub fn retrieve_asset( as_dataurl: bool, as_mime: &str, opt_user_agent: &str, - opt_silent: bool + opt_silent: bool, + opt_insecure: bool, ) -> Result { if is_data_url(&url).unwrap() { Ok(url.to_string()) } else { let client = Client::builder() .timeout(Duration::from_secs(10)) + .danger_accept_invalid_certs(opt_insecure) .build()?; let mut response = client .get(url) diff --git a/src/main.rs b/src/main.rs index af229b5..1d25991 100644 --- a/src/main.rs +++ b/src/main.rs @@ -23,6 +23,7 @@ fn main() { ) .args_from_usage("-i, --no-images 'Removes images'") .args_from_usage("-j, --no-js 'Excludes JavaScript'") + .args_from_usage("-k, --insecure 'Accept invalid X.509 (TLS) certificates'") .args_from_usage("-s, --silent 'Suppress verbosity'") .args_from_usage("-u, --user-agent=[Iceweasel] 'Custom User-Agent string'") .get_matches(); @@ -31,11 +32,19 @@ fn main() { let arg_target = command.value_of("url").unwrap(); let opt_no_images = command.is_present("no-images"); let opt_no_js = command.is_present("no-js"); + let opt_insecure = command.is_present("insecure"); let opt_silent = command.is_present("silent"); let opt_user_agent = command.value_of("user-agent").unwrap_or(DEFAULT_USER_AGENT); if is_valid_url(arg_target) { - let data = retrieve_asset(&arg_target, false, "", opt_user_agent, opt_silent).unwrap(); + let data = retrieve_asset( + &arg_target, + false, + "", + opt_user_agent, + opt_silent, + opt_insecure, + ).unwrap(); let dom = html_to_dom(&data); walk_and_embed_assets( @@ -45,6 +54,7 @@ fn main() { opt_no_images, opt_user_agent, opt_silent, + opt_insecure, ); print_dom(&dom.document);