From a6ddf1c13a399d669d5f4c74a76f8d4c7fb4cafc Mon Sep 17 00:00:00 2001 From: Sunshine Date: Sun, 14 Mar 2021 19:42:36 -1000 Subject: [PATCH 1/5] simplify code responsible for processing CSS --- src/css.rs | 50 +++++++------- src/html.rs | 10 +-- src/main.rs | 4 +- src/tests/cli/basic.rs | 2 +- src/tests/cli/local_files.rs | 2 +- src/tests/css/embed_css.rs | 16 ++--- src/tests/css/enquote.rs | 53 -------------- src/tests/css/mod.rs | 1 - ...data_to_data_url.rs => create_data_url.rs} | 4 +- src/tests/url/is_http_or_https_url.rs | 69 ------------------- src/tests/url/is_url_and_has_protocol.rs | 28 +++++--- src/tests/url/mod.rs | 2 +- src/tests/url/resolve_url.rs | 68 +++++++++++++----- src/tests/utils/retrieve_asset.rs | 8 +-- src/url.rs | 2 +- 15 files changed, 122 insertions(+), 197 deletions(-) delete mode 100644 src/tests/css/enquote.rs rename src/tests/url/{data_to_data_url.rs => create_data_url.rs} (91%) delete mode 100644 src/tests/url/is_http_or_https_url.rs diff --git a/src/css.rs b/src/css.rs index a1205a9..f5184c4 100644 --- a/src/css.rs +++ b/src/css.rs @@ -6,7 +6,7 @@ use std::collections::HashMap; use url::Url; use crate::opts::Options; -use crate::url::{data_to_data_url, resolve_url}; +use crate::url::{create_data_url, resolve_url}; use crate::utils::retrieve_asset; const CSS_PROPS_WITH_IMAGE_URLS: &[&str] = &[ @@ -55,14 +55,6 @@ pub fn embed_css( .unwrap() } -pub fn enquote(input: String, double: bool) -> String { - if double { - format!("\"{}\"", input.replace("\"", "\\\"")) - } else { - format!("'{}'", input.replace("'", "\\'")) - } -} - pub fn format_ident(ident: &str) -> String { let mut res: String = String::new(); let _ = serialize_identifier(ident, &mut res); @@ -206,7 +198,7 @@ pub fn process_css<'a>( depth + 1, ) { Ok((import_contents, import_final_url, _import_media_type)) => { - let mut import_data_url = data_to_data_url( + let mut import_data_url = create_data_url( "text/css", embed_css( cache, @@ -220,15 +212,18 @@ pub fn process_css<'a>( &import_final_url, ); import_data_url.set_fragment(import_full_url.fragment()); - result.push_str(enquote(import_data_url.to_string(), false).as_str()); + result.push_str( + format_quoted_string(&import_data_url.to_string()).as_str(), + ); } Err(_) => { // Keep remote reference if unable to retrieve the asset if import_full_url.scheme() == "http" || import_full_url.scheme() == "https" { - result - .push_str(enquote(import_full_url.to_string(), false).as_str()); + result.push_str( + format_quoted_string(&import_full_url.to_string()).as_str(), + ); } } } @@ -240,7 +235,7 @@ pub fn process_css<'a>( } if options.no_images && is_image_url_prop(curr_prop.as_str()) { - result.push_str(enquote(str!(empty_image!()), false).as_str()); + result.push_str(format_quoted_string(empty_image!()).as_str()); } else { let resolved_url: Url = resolve_url(&document_url, value); match retrieve_asset( @@ -253,9 +248,11 @@ pub fn process_css<'a>( ) { Ok((data, final_url, media_type)) => { let mut data_url = - data_to_data_url(&media_type, &data, &final_url); + create_data_url(&media_type, &data, &final_url); data_url.set_fragment(resolved_url.fragment()); - result.push_str(enquote(data_url.to_string(), false).as_str()); + result.push_str( + format_quoted_string(&data_url.to_string()).as_str(), + ); } Err(_) => { // Keep remote reference if unable to retrieve the asset @@ -263,7 +260,8 @@ pub fn process_css<'a>( || resolved_url.scheme() == "https" { result.push_str( - enquote(resolved_url.to_string(), false).as_str(), + format_quoted_string(&resolved_url.to_string()) + .as_str(), ); } } @@ -345,7 +343,7 @@ pub fn process_css<'a>( depth + 1, ) { Ok((css, final_url, _media_type)) => { - let mut data_url = data_to_data_url( + let mut data_url = create_data_url( "text/css", embed_css( cache, @@ -359,18 +357,19 @@ pub fn process_css<'a>( &final_url, ); data_url.set_fragment(full_url.fragment()); - result.push_str(enquote(data_url.to_string(), false).as_str()); + result.push_str(format_quoted_string(&data_url.to_string()).as_str()); } Err(_) => { // Keep remote reference if unable to retrieve the asset if full_url.scheme() == "http" || full_url.scheme() == "https" { - result.push_str(enquote(full_url.to_string(), false).as_str()); + result + .push_str(format_quoted_string(&full_url.to_string()).as_str()); } } } } else { if is_image_url_prop(curr_prop.as_str()) && options.no_images { - result.push_str(enquote(str!(empty_image!()), false).as_str()); + result.push_str(format_quoted_string(empty_image!()).as_str()); } else { let full_url: Url = resolve_url(&document_url, value); match retrieve_asset( @@ -382,14 +381,17 @@ pub fn process_css<'a>( depth + 1, ) { Ok((data, final_url, media_type)) => { - let mut data_url = data_to_data_url(&media_type, &data, &final_url); + let mut data_url = create_data_url(&media_type, &data, &final_url); data_url.set_fragment(full_url.fragment()); - result.push_str(enquote(data_url.to_string(), false).as_str()); + result + .push_str(format_quoted_string(&data_url.to_string()).as_str()); } Err(_) => { // Keep remote reference if unable to retrieve the asset if full_url.scheme() == "http" || full_url.scheme() == "https" { - result.push_str(enquote(full_url.to_string(), false).as_str()); + result.push_str( + format_quoted_string(&full_url.to_string()).as_str(), + ); } } } diff --git a/src/html.rs b/src/html.rs index 17524fb..40eb35d 100644 --- a/src/html.rs +++ b/src/html.rs @@ -17,7 +17,7 @@ use std::default::Default; use crate::css::embed_css; use crate::js::attr_is_event_handler; use crate::opts::Options; -use crate::url::{clean_url, data_to_data_url, is_url_and_has_protocol, resolve_url}; +use crate::url::{clean_url, create_data_url, is_url_and_has_protocol, resolve_url}; use crate::utils::retrieve_asset; struct SrcSetItem<'a> { @@ -190,7 +190,7 @@ pub fn embed_srcset( ) { Ok((image_data, image_final_url, image_media_type)) => { let mut image_data_url = - data_to_data_url(&image_media_type, &image_data, &image_final_url); + create_data_url(&image_media_type, &image_data, &image_final_url); // Append retreved asset as a data URL image_data_url.set_fragment(image_full_url.fragment()); result.push_str(image_data_url.as_ref()); @@ -534,7 +534,7 @@ pub fn retrieve_and_embed_asset( options, depth + 1, ); - let css_data_url = data_to_data_url("text/css", css.as_bytes(), &final_url); + let css_data_url = create_data_url("text/css", css.as_bytes(), &final_url); set_node_attr(&node, attr_name, Some(css_data_url.to_string())); @@ -559,7 +559,7 @@ pub fn retrieve_and_embed_asset( ) .unwrap(); - let mut frame_data_url = data_to_data_url(&media_type, &frame_data, &final_url); + let mut frame_data_url = create_data_url(&media_type, &frame_data, &final_url); frame_data_url.set_fragment(resolved_url.fragment()); @@ -572,7 +572,7 @@ pub fn retrieve_and_embed_asset( if node_name == "script" { media_type = "application/javascript".to_string(); } - let mut data_url = data_to_data_url(&media_type, &data, &final_url); + let mut data_url = create_data_url(&media_type, &data, &final_url); data_url.set_fragment(resolved_url.fragment()); set_node_attr(node, attr_name, Some(data_url.to_string())); } diff --git a/src/main.rs b/src/main.rs index d04fba6..23e245b 100644 --- a/src/main.rs +++ b/src/main.rs @@ -13,7 +13,7 @@ use monolith::html::{ stringify_document, walk_and_embed_assets, }; use monolith::opts::Options; -use monolith::url::{data_to_data_url, parse_data_url, resolve_url}; +use monolith::url::{create_data_url, parse_data_url, resolve_url}; use monolith::utils::retrieve_asset; mod macros; @@ -266,7 +266,7 @@ fn main() { 0, ) { Ok((data, final_url, media_type)) => { - let favicon_data_url: Url = data_to_data_url(&media_type, &data, &final_url); + let favicon_data_url: Url = create_data_url(&media_type, &data, &final_url); dom = add_favicon(&dom.document, favicon_data_url.to_string()); } Err(_) => { diff --git a/src/tests/cli/basic.rs b/src/tests/cli/basic.rs index 60ae9da..0e0d5d6 100644 --- a/src/tests/cli/basic.rs +++ b/src/tests/cli/basic.rs @@ -71,7 +71,7 @@ mod passing { // STDOUT should contain embedded CSS url()'s assert_eq!( std::str::from_utf8(&out.stdout).unwrap(), - "\n\n" + "\n\n" ); // STDERR should list files that got retrieved diff --git a/src/tests/cli/local_files.rs b/src/tests/cli/local_files.rs index 1ee2280..4c3465f 100644 --- a/src/tests/cli/local_files.rs +++ b/src/tests/cli/local_files.rs @@ -193,7 +193,7 @@ mod passing { // STDOUT should contain HTML with date URL for background-image in it assert_eq!( std::str::from_utf8(&out.stdout).unwrap(), - "
body {}"; @@ -191,9 +191,9 @@ mod passing { "\ @charset \"UTF-8\";\n\ \n\ - @import 'data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9';\n\ + @import \"data:text/css;base64,aHRtbHtiYWNrZ3JvdW5kLWNvbG9yOiMwMDB9\";\n\ \n\ - @import url('data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==')\n\ + @import url(\"data:text/css;base64,aHRtbHtjb2xvcjojZmZmfQ==\")\n\ " ); } @@ -331,7 +331,7 @@ mod passing { "; const CSS_OUT: &str = "\ #language a[href=\"#translations\"]:before {\n\ - content: url('data:;base64,') \"\\a \";\n\ + content: url(\"data:;base64,\") \"\\a \";\n\ white-space: pre }\n\ "; diff --git a/src/tests/css/enquote.rs b/src/tests/css/enquote.rs deleted file mode 100644 index d02a868..0000000 --- a/src/tests/css/enquote.rs +++ /dev/null @@ -1,53 +0,0 @@ -// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ -// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ -// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ -// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ -// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ -// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ - -#[cfg(test)] -mod passing { - use crate::css; - - #[test] - fn empty_input_single_quotes() { - assert_eq!(css::enquote(str!(""), false), "''"); - } - - #[test] - fn empty_input_double_quotes() { - assert_eq!(css::enquote(str!(""), true), "\"\""); - } - - #[test] - fn apostrophes_single_quotes() { - assert_eq!( - css::enquote(str!("It's a lovely day, don't you think?"), false), - "'It\\'s a lovely day, don\\'t you think?'" - ); - } - - #[test] - fn apostrophes_double_quotes() { - assert_eq!( - css::enquote(str!("It's a lovely day, don't you think?"), true), - "\"It's a lovely day, don't you think?\"" - ); - } - - #[test] - fn feet_and_inches_single_quotes() { - assert_eq!( - css::enquote(str!("5'2\", 6'5\""), false), - "'5\\'2\", 6\\'5\"'" - ); - } - - #[test] - fn feet_and_inches_double_quotes() { - assert_eq!( - css::enquote(str!("5'2\", 6'5\""), true), - "\"5'2\\\", 6'5\\\"\"" - ); - } -} diff --git a/src/tests/css/mod.rs b/src/tests/css/mod.rs index 5f17fd3..15775b5 100644 --- a/src/tests/css/mod.rs +++ b/src/tests/css/mod.rs @@ -1,3 +1,2 @@ mod embed_css; -mod enquote; mod is_image_url_prop; diff --git a/src/tests/url/data_to_data_url.rs b/src/tests/url/create_data_url.rs similarity index 91% rename from src/tests/url/data_to_data_url.rs rename to src/tests/url/create_data_url.rs index bac0126..873dbda 100644 --- a/src/tests/url/data_to_data_url.rs +++ b/src/tests/url/create_data_url.rs @@ -15,7 +15,7 @@ mod passing { fn encode_string_with_specific_media_type() { let mime = "application/javascript"; let data = "var word = 'hello';\nalert(word);\n"; - let data_url = url::data_to_data_url(mime, data.as_bytes(), &Url::parse("data:,").unwrap()); + let data_url = url::create_data_url(mime, data.as_bytes(), &Url::parse("data:,").unwrap()); assert_eq!( data_url.as_str(), @@ -26,7 +26,7 @@ mod passing { #[test] fn encode_append_fragment() { let data = "\n"; - let data_url = url::data_to_data_url( + let data_url = url::create_data_url( "image/svg+xml", data.as_bytes(), &Url::parse("data:,").unwrap(), diff --git a/src/tests/url/is_http_or_https_url.rs b/src/tests/url/is_http_or_https_url.rs deleted file mode 100644 index 1e0a579..0000000 --- a/src/tests/url/is_http_or_https_url.rs +++ /dev/null @@ -1,69 +0,0 @@ -// ██████╗ █████╗ ███████╗███████╗██╗███╗ ██╗ ██████╗ -// ██╔══██╗██╔══██╗██╔════╝██╔════╝██║████╗ ██║██╔════╝ -// ██████╔╝███████║███████╗███████╗██║██╔██╗ ██║██║ ███╗ -// ██╔═══╝ ██╔══██║╚════██║╚════██║██║██║╚██╗██║██║ ██║ -// ██║ ██║ ██║███████║███████║██║██║ ╚████║╚██████╔╝ -// ╚═╝ ╚═╝ ╚═╝╚══════╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ - -#[cfg(test)] -mod passing { - use reqwest::Url; - - use crate::url; - - #[test] - fn http_url() { - assert!(url::is_http_or_https_url(&Url::parse("http://kernel.org").unwrap())); - } - - #[test] - fn https_url() { - assert!(url::is_http_or_https_url(&Url::parse("https://www.rust-lang.org/").unwrap())); - } - - #[test] - fn http_url_with_backslashes() { - assert!(url::is_http_or_https_url(&Url::parse("http:\\\\freebsd.org\\").unwrap())); - } -} - -// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ -// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ -// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ -// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ -// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ -// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ - -#[cfg(test)] -mod failing { - use reqwest::Url; - - use crate::url; - - #[test] - fn url_with_no_protocol() { - assert!(!url::is_http_or_https_url(&Url::parse("//kernel.org").unwrap())); - } - - #[test] - fn dot_slash_filename() { - assert!(!url::is_http_or_https_url(&Url::parse("./index.html").unwrap())); - } - - #[test] - fn just_filename() { - assert!(!url::is_http_or_https_url(&Url::parse("some-local-page.htm").unwrap())); - } - - #[test] - fn https_ip_port_url() { - assert!(!url::is_http_or_https_url(&Url::parse("ftp://1.2.3.4/www/index.html").unwrap())); - } - - #[test] - fn data_url() { - assert!(!url::is_http_or_https_url( - &Url::parse("data:text/html;base64,V2VsY29tZSBUbyBUaGUgUGFydHksIDxiPlBhbDwvYj4h").unwrap() - )); - } -} diff --git a/src/tests/url/is_url_and_has_protocol.rs b/src/tests/url/is_url_and_has_protocol.rs index cae497a..a46690b 100644 --- a/src/tests/url/is_url_and_has_protocol.rs +++ b/src/tests/url/is_url_and_has_protocol.rs @@ -48,6 +48,11 @@ mod passing { assert!(url::is_url_and_has_protocol("https://github.com")); } + #[test] + fn file() { + assert!(url::is_url_and_has_protocol("file:///tmp/image.png")); + } + #[test] fn mailto_uppercase() { assert!(url::is_url_and_has_protocol( @@ -59,6 +64,11 @@ mod passing { fn empty_data_url() { assert!(url::is_url_and_has_protocol("data:text/html,")); } + + #[test] + fn empty_data_url_surrounded_by_spaces() { + assert!(url::is_url_and_has_protocol(" data:text/html, ")); + } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ @@ -74,25 +84,27 @@ mod failing { #[test] fn url_with_no_protocol() { - assert!(!url::is_url_and_has_protocol( - "//some-hostname.com/some-file.html" - )); + assert_eq!( + url::is_url_and_has_protocol("//some-hostname.com/some-file.html"), + false + ); } #[test] fn relative_path() { - assert!(!url::is_url_and_has_protocol( - "some-hostname.com/some-file.html" - )); + assert_eq!( + url::is_url_and_has_protocol("some-hostname.com/some-file.html"), + false + ); } #[test] fn relative_to_root_path() { - assert!(!url::is_url_and_has_protocol("/some-file.html")); + assert_eq!(url::is_url_and_has_protocol("/some-file.html"), false); } #[test] fn empty_string() { - assert!(!url::is_url_and_has_protocol("")); + assert_eq!(url::is_url_and_has_protocol(""), false); } } diff --git a/src/tests/url/mod.rs b/src/tests/url/mod.rs index 50efbc6..e99e386 100644 --- a/src/tests/url/mod.rs +++ b/src/tests/url/mod.rs @@ -1,5 +1,5 @@ mod clean_url; -mod data_to_data_url; +mod create_data_url; mod is_url_and_has_protocol; mod parse_data_url; mod percent_decode; diff --git a/src/tests/url/resolve_url.rs b/src/tests/url/resolve_url.rs index edfe773..4abede9 100644 --- a/src/tests/url/resolve_url.rs +++ b/src/tests/url/resolve_url.rs @@ -11,6 +11,34 @@ mod passing { use crate::url; + #[test] + fn basic_httsp_relative() { + assert_eq!( + url::resolve_url( + &Url::parse("https://www.kernel.org").unwrap(), + "category/signatures.html" + ) + .as_str(), + Url::parse("https://www.kernel.org/category/signatures.html") + .unwrap() + .as_str() + ); + } + + #[test] + fn basic_httsp_absolute() { + assert_eq!( + url::resolve_url( + &Url::parse("https://www.kernel.org").unwrap(), + "/category/signatures.html" + ) + .as_str(), + Url::parse("https://www.kernel.org/category/signatures.html") + .unwrap() + .as_str() + ); + } + #[test] fn from_https_to_level_up_relative() { assert_eq!( @@ -50,7 +78,7 @@ mod passing { } #[test] - fn from_https_url_to_relative_root_path() { + fn from_https_url_to_absolute_path() { assert_eq!( url::resolve_url( &Url::parse("https://www.kernel.org/category/signatures.html").unwrap(), @@ -148,22 +176,28 @@ mod passing { ); } - // #[test] - // fn resolve_from_file_url_to_file_url() { - // assert_eq!( - // if cfg!(windows) { - // url::resolve_url(&Url::parse("file:///c:/index.html").unwrap(), "file:///c:/image.png").as_str() - // } else { - // url::resolve_url(&Url::parse("file:///tmp/index.html").unwrap(), "file:///tmp/image.png") - // .as_str() - // }, - // if cfg!(windows) { - // "file:///c:/image.png" - // } else { - // "file:///tmp/image.png" - // } - // ); - // } + #[test] + fn resolve_from_file_url_to_file_url() { + if cfg!(windows) { + assert_eq!( + url::resolve_url( + &Url::parse("file:///c:/index.html").unwrap(), + "file:///c:/image.png" + ) + .as_str(), + "file:///c:/image.png" + ); + } else { + assert_eq!( + url::resolve_url( + &Url::parse("file:///tmp/index.html").unwrap(), + "file:///tmp/image.png" + ) + .as_str(), + "file:///tmp/image.png" + ); + } + } } // ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ diff --git a/src/tests/utils/retrieve_asset.rs b/src/tests/utils/retrieve_asset.rs index d7f7057..5e975d0 100644 --- a/src/tests/utils/retrieve_asset.rs +++ b/src/tests/utils/retrieve_asset.rs @@ -36,8 +36,8 @@ mod passing { ) .unwrap(); assert_eq!( - url::data_to_data_url(&media_type, &data, &final_url), - url::data_to_data_url( + url::create_data_url(&media_type, &data, &final_url), + url::create_data_url( "text/html", "target".as_bytes(), &Url::parse("data:text/html;base64,c291cmNl").unwrap() @@ -45,7 +45,7 @@ mod passing { ); assert_eq!( final_url, - url::data_to_data_url( + url::create_data_url( "text/html", "target".as_bytes(), &Url::parse("data:text/html;base64,c291cmNl").unwrap() @@ -85,7 +85,7 @@ mod passing { 0, ) .unwrap(); - assert_eq!(url::data_to_data_url("application/javascript", &data, &final_url), Url::parse("data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==").unwrap()); + assert_eq!(url::create_data_url("application/javascript", &data, &final_url), Url::parse("data:application/javascript;base64,ZG9jdW1lbnQuYm9keS5zdHlsZS5iYWNrZ3JvdW5kQ29sb3IgPSAiZ3JlZW4iOwpkb2N1bWVudC5ib2R5LnN0eWxlLmNvbG9yID0gInJlZCI7Cg==").unwrap()); assert_eq!( final_url, Url::parse(&format!( diff --git a/src/url.rs b/src/url.rs index 002b6c0..878e726 100644 --- a/src/url.rs +++ b/src/url.rs @@ -12,7 +12,7 @@ pub fn clean_url(url: Url) -> Url { url } -pub fn data_to_data_url(media_type: &str, data: &[u8], final_asset_url: &Url) -> Url { +pub fn create_data_url(media_type: &str, data: &[u8], final_asset_url: &Url) -> Url { let media_type: String = if media_type.is_empty() { detect_media_type(data, &final_asset_url) } else { From a308a204119634626cab67740bf47bdc49fa4af4 Mon Sep 17 00:00:00 2001 From: Sunshine Date: Mon, 15 Mar 2021 20:10:50 -1000 Subject: [PATCH 2/5] simplify code of CLI tests --- src/html.rs | 1 + src/tests/cli/base_url.rs | 24 +++++---------- src/tests/cli/basic.rs | 26 ++++++---------- src/tests/cli/data_url.rs | 49 ++++++++++-------------------- src/tests/cli/local_files.rs | 31 ++++++------------- src/tests/cli/noscript.rs | 24 +++++---------- src/tests/cli/unusual_encodings.rs | 6 ++-- 7 files changed, 54 insertions(+), 107 deletions(-) diff --git a/src/html.rs b/src/html.rs index 40eb35d..a1524e2 100644 --- a/src/html.rs +++ b/src/html.rs @@ -826,6 +826,7 @@ pub fn walk_and_embed_assets( set_node_attr(node, "href", None); } } + if let Some(image_attr_xlink_href_value) = get_node_attr(node, "xlink:href") { image_href = image_attr_xlink_href_value; if options.no_images { diff --git a/src/tests/cli/base_url.rs b/src/tests/cli/base_url.rs index 7ba88d9..9b07bf7 100644 --- a/src/tests/cli/base_url.rs +++ b/src/tests/cli/base_url.rs @@ -12,8 +12,8 @@ mod passing { use std::process::Command; #[test] - fn add_new_when_provided() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn add_new_when_provided() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") .arg("-b") @@ -35,13 +35,11 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } #[test] - fn keep_existing_when_none_provided() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn keep_existing_when_none_provided() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") .arg("data:text/html,Hello%2C%20World!") @@ -61,13 +59,11 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } #[test] - fn override_existing_when_provided() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn override_existing_when_provided() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") .arg("-b") @@ -89,13 +85,11 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } #[test] - fn remove_existing_when_empty_provided() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn remove_existing_when_empty_provided() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") .arg("-b") @@ -117,7 +111,5 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } } diff --git a/src/tests/cli/basic.rs b/src/tests/cli/basic.rs index 0e0d5d6..063aa53 100644 --- a/src/tests/cli/basic.rs +++ b/src/tests/cli/basic.rs @@ -15,8 +15,8 @@ mod passing { use url::Url; #[test] - fn print_version() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn print_version() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd.arg("-V").output().unwrap(); // STDOUT should contain program name and version @@ -30,12 +30,10 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } #[test] - fn stdin_target_input() -> Result<(), Box> { + fn stdin_target_input() { let mut echo = Command::new("echo") .arg("Hello from STDIN") .stdout(Stdio::piped()) @@ -44,22 +42,20 @@ mod passing { let echo_out = echo.stdout.take().unwrap(); echo.wait().unwrap(); - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); cmd.stdin(echo_out); let out = cmd.arg("-M").arg("-").output().unwrap(); - // STDOUT should contain HTML from STDIN + // STDOUT should contain HTML created out of STDIN assert_eq!( std::str::from_utf8(&out.stdout).unwrap(), "Hello from STDIN\n\n" ); - - Ok(()) } #[test] - fn css_import_string() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn css_import_string() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let path_html: &Path = Path::new("src/tests/data/css/index.html"); let path_css: &Path = Path::new("src/tests/data/css/style.css"); @@ -95,8 +91,6 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } } @@ -114,8 +108,8 @@ mod failing { use std::process::Command; #[test] - fn bad_input_empty_target() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn bad_input_empty_target() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd.arg("").output().unwrap(); // STDOUT should be empty @@ -129,7 +123,5 @@ mod failing { // The exit code should be 1 out.assert().code(1); - - Ok(()) } } diff --git a/src/tests/cli/data_url.rs b/src/tests/cli/data_url.rs index d1b255a..62e5bbe 100644 --- a/src/tests/cli/data_url.rs +++ b/src/tests/cli/data_url.rs @@ -12,8 +12,8 @@ mod passing { use std::process::Command; #[test] - fn bad_input_data_url() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn bad_input_data_url() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap(); // STDOUT should contain HTML @@ -27,13 +27,11 @@ mod passing { // The exit code should be 1 out.assert().code(1); - - Ok(()) } #[test] - fn isolate_data_url() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn isolate_data_url() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") .arg("-I") @@ -54,13 +52,11 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } #[test] - fn remove_css_from_data_url() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn remove_css_from_data_url() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") .arg("-c") @@ -82,13 +78,11 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } #[test] - fn remove_fonts_from_data_url() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn remove_fonts_from_data_url() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") .arg("-F") @@ -110,13 +104,11 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } #[test] - fn remove_frames_from_data_url() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn remove_frames_from_data_url() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") .arg("-f") @@ -137,13 +129,11 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } #[test] - fn remove_images_from_data_url() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn remove_images_from_data_url() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") .arg("-i") @@ -173,13 +163,11 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } #[test] - fn remove_js_from_data_url() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn remove_js_from_data_url() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") .arg("-j") @@ -203,14 +191,11 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } #[test] - fn security_disallow_local_assets_within_data_url_targets( - ) -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn security_disallow_local_assets_within_data_url_targets() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") .arg("data:text/html,%3Cscript%20src=\"src/tests/data/basic/local-script.js\"%3E%3C/script%3E") @@ -228,7 +213,5 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } } diff --git a/src/tests/cli/local_files.rs b/src/tests/cli/local_files.rs index 4c3465f..bdadad0 100644 --- a/src/tests/cli/local_files.rs +++ b/src/tests/cli/local_files.rs @@ -15,8 +15,8 @@ mod passing { use url::Url; #[test] - fn local_file_target_input_relative_target_path() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn local_file_target_input_relative_target_path() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let cwd_normalized: String = str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/"); let out = cmd @@ -65,13 +65,11 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } #[test] - fn local_file_target_input_absolute_target_path() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn local_file_target_input_absolute_target_path() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let path_html: &Path = Path::new("src/tests/data/basic/local-file.html"); let out = cmd @@ -115,13 +113,11 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } #[test] - fn local_file_url_target_input() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn local_file_url_target_input() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let cwd_normalized: String = str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/"); let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" }; @@ -177,14 +173,11 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } #[test] - fn embed_file_url_local_asset_within_style_attribute() -> Result<(), Box> - { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn embed_file_url_local_asset_within_style_attribute() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let path_html: &Path = Path::new("src/tests/data/svg/index.html"); let path_svg: &Path = Path::new("src/tests/data/svg/image.svg"); @@ -215,13 +208,11 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } #[test] - fn discard_integrity_for_local_files() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn discard_integrity_for_local_files() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let cwd_normalized: String = str!(env::current_dir().unwrap().to_str().unwrap()).replace("\\", "/"); let file_url_protocol: &str = if cfg!(windows) { "file:///" } else { "file://" }; @@ -280,7 +271,5 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } } diff --git a/src/tests/cli/noscript.rs b/src/tests/cli/noscript.rs index 6dde13c..7ba93ce 100644 --- a/src/tests/cli/noscript.rs +++ b/src/tests/cli/noscript.rs @@ -15,8 +15,8 @@ mod passing { use url::Url; #[test] - fn parse_noscript_contents() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn parse_noscript_contents() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let path_html: &Path = Path::new("src/tests/data/noscript/index.html"); let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg"); @@ -47,13 +47,11 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } #[test] - fn unwrap_noscript_contents() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn unwrap_noscript_contents() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let path_html: &Path = Path::new("src/tests/data/noscript/index.html"); let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg"); @@ -84,13 +82,11 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } #[test] - fn unwrap_noscript_contents_nested() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn unwrap_noscript_contents_nested() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let path_html: &Path = Path::new("src/tests/data/noscript/nested.html"); let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg"); @@ -121,13 +117,11 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } #[test] - fn unwrap_noscript_contents_with_script() -> Result<(), Box> { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + fn unwrap_noscript_contents_with_script() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let path_html: &Path = Path::new("src/tests/data/noscript/script.html"); let path_svg: &Path = Path::new("src/tests/data/noscript/image.svg"); @@ -158,7 +152,5 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } } diff --git a/src/tests/cli/unusual_encodings.rs b/src/tests/cli/unusual_encodings.rs index 006e745..4796cec 100644 --- a/src/tests/cli/unusual_encodings.rs +++ b/src/tests/cli/unusual_encodings.rs @@ -12,10 +12,10 @@ mod passing { use std::process::Command; #[test] - fn change_encoding_to_utf_8() -> Result<(), Box> { + fn change_encoding_to_utf_8() { let cwd = env::current_dir().unwrap(); let cwd_normalized: String = str!(cwd.to_str().unwrap()).replace("\\", "/"); - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME"))?; + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") .arg(if cfg!(windows) { @@ -45,7 +45,5 @@ mod passing { // The exit code should be 0 out.assert().code(0); - - Ok(()) } } From a6e891b3c5f1ed2b751bda0672019120e9ab1d30 Mon Sep 17 00:00:00 2001 From: Sunshine Date: Wed, 2 Jun 2021 03:41:41 -1000 Subject: [PATCH 3/5] add more tests --- README.md | 16 +- src/html.rs | 107 +++++----- src/tests/cli/base_url.rs | 2 +- src/tests/cli/data_url.rs | 50 +++-- src/tests/cli/noscript.rs | 32 ++- src/tests/cli/unusual_encodings.rs | 9 +- src/tests/html/walk_and_embed_assets.rs | 272 ++++++++++++++++++------ 7 files changed, 344 insertions(+), 144 deletions(-) diff --git a/README.md b/README.md index 8d07184..c6fafcd 100644 --- a/README.md +++ b/README.md @@ -79,11 +79,11 @@ or - `-j`: Exclude JavaScript - `-k`: Accept invalid X.509 (TLS) certificates - `-M`: Don't add timestamp and URL information - - `-n`: Extract contents of NOSCRIPT tags + - `-n`: Extract contents of NOSCRIPT elements - `-o`: Write output to `file` - `-s`: Be quiet - `-t`: Adjust `network request timeout` - - `-u`: Provide `custom User-Agent` + - `-u`: Provide custom `User-Agent` - `-v`: Exclude videos --------------------------------------------------- @@ -99,19 +99,15 @@ Please open an issue if something is wrong, that helps make this project better. --------------------------------------------------- ## Related projects - - `Monolith Chrome Extension`: https://github.com/rhysd/monolith-of-web - - `Pagesaver`: https://github.com/distributed-mind/pagesaver - - `Personal WayBack Machine`: https://github.com/popey/pwbm - - `Hako`: https://github.com/dmpop/hako + - Monolith Chrome Extension: https://github.com/rhysd/monolith-of-web + - Pagesaver: https://github.com/distributed-mind/pagesaver + - Personal WayBack Machine: https://github.com/popey/pwbm + - Hako: https://github.com/dmpop/hako --------------------------------------------------- ## License - - CC0-1.0 - -
To the extent possible under law, the author(s) have dedicated all copyright related and neighboring rights to this software to the public domain worldwide. This software is distributed without any warranty. diff --git a/src/html.rs b/src/html.rs index a1524e2..836f5cc 100644 --- a/src/html.rs +++ b/src/html.rs @@ -474,8 +474,9 @@ pub fn stringify_document(handle: &Handle, options: &Options) -> String { result = String::from_utf8(buf).unwrap(); } + // Unwrap NOSCRIPT elements if options.unwrap_noscript { - let noscript_re = Regex::new(r"<(?P/?noscript)>").unwrap(); + let noscript_re = Regex::new(r"<(?P/?noscript[^>]*)>").unwrap(); result = noscript_re.replace_all(&result, "").to_string(); } @@ -503,44 +504,39 @@ pub fn retrieve_and_embed_asset( depth + 1, ) { Ok((data, final_url, mut media_type)) => { - // Check integrity if it's a LINK or SCRIPT tag let node_name: &str = get_node_name(&node).unwrap(); - let mut ok_to_include: bool = true; + // Check integrity if it's a LINK or SCRIPT element + let mut ok_to_include: bool = true; if node_name == "link" || node_name == "script" { - let node_integrity_attr_value: Option = get_node_attr(node, "integrity"); - // Check integrity - if let Some(node_integrity_attr_value) = node_integrity_attr_value { + if let Some(node_integrity_attr_value) = get_node_attr(node, "integrity") { if !node_integrity_attr_value.is_empty() { ok_to_include = check_integrity(&data, &node_integrity_attr_value); } - } - // Wipe integrity attribute - set_node_attr(node, "integrity", None); + // Wipe the integrity attribute + set_node_attr(node, "integrity", None); + } } if ok_to_include { - if node_name == "link" { - let link_type: &str = determine_link_node_type(node); - // CSS LINK nodes requires special treatment - if link_type == "stylesheet" { - let css: String = embed_css( - cache, - client, - &final_url, - &String::from_utf8_lossy(&data), - options, - depth + 1, - ); - let css_data_url = create_data_url("text/css", css.as_bytes(), &final_url); - - set_node_attr(&node, attr_name, Some(css_data_url.to_string())); + if node_name == "link" && determine_link_node_type(node) == "stylesheet" { + // Stylesheet LINK elements require special treatment + let css: String = embed_css( + cache, + client, + &final_url, + &String::from_utf8_lossy(&data), + options, + depth + 1, + ); - return; // Do not fall through - } + // Create and embed data URL + let css_data_url = create_data_url("text/css", css.as_bytes(), &final_url); + set_node_attr(&node, attr_name, Some(css_data_url.to_string())); } else if node_name == "frame" || node_name == "iframe" { + // (I)FRAMEs are also quite different from conventional resources let frame_dom = html_to_dom(&String::from_utf8_lossy(&data)); walk_and_embed_assets( cache, @@ -559,30 +555,38 @@ pub fn retrieve_and_embed_asset( ) .unwrap(); + // Create and embed data URL let mut frame_data_url = create_data_url(&media_type, &frame_data, &final_url); - frame_data_url.set_fragment(resolved_url.fragment()); - set_node_attr(node, attr_name, Some(frame_data_url.to_string())); + } else { + // Every other type of element gets processed here + + // Parse media type for SCRIPT elements + if node_name == "script" { + if let Some(_) = get_node_attr(node, "src") { + if let Some(script_node_type_attr_value) = get_node_attr(node, "type") { + media_type = script_node_type_attr_value.to_string(); + } else { + // Fallback to default one if it's not specified + media_type = "application/javascript".to_string(); + } + } + } - return; // Do not fall through - } - - // Everything else - if node_name == "script" { - media_type = "application/javascript".to_string(); + // Create and embed data URL + let mut data_url = create_data_url(&media_type, &data, &final_url); + data_url.set_fragment(resolved_url.fragment()); + set_node_attr(node, attr_name, Some(data_url.to_string())); } - let mut data_url = create_data_url(&media_type, &data, &final_url); - data_url.set_fragment(resolved_url.fragment()); - set_node_attr(node, attr_name, Some(data_url.to_string())); } } Err(_) => { if resolved_url.scheme() == "http" || resolved_url.scheme() == "https" { - // Keep remote reference if unable to retrieve the asset + // Keep remote references if unable to retrieve the asset set_node_attr(node, attr_name, Some(resolved_url.to_string())); } else { - // Exclude non-remote URLs + // Remove local references if they can't be successfully embedded as data URLs set_node_attr(node, attr_name, None); } } @@ -645,7 +649,7 @@ pub fn walk_and_embed_assets( let link_type: &str = determine_link_node_type(node); if link_type == "icon" { - // Find and resolve this LINK node's href attribute + // Find and resolve LINK's href attribute if let Some(link_attr_href_value) = get_node_attr(node, "href") { if !options.no_images && !link_attr_href_value.is_empty() { retrieve_and_embed_asset( @@ -663,10 +667,12 @@ pub fn walk_and_embed_assets( } } } else if link_type == "stylesheet" { - // Find and resolve this LINK node's href attribute + // Resolve LINK's href attribute if let Some(link_attr_href_value) = get_node_attr(node, "href") { if options.no_css { set_node_attr(node, "href", None); + // Wipe integrity attribute + set_node_attr(node, "integrity", None); } else { if !link_attr_href_value.is_empty() { retrieve_and_embed_asset( @@ -916,14 +922,15 @@ pub fn walk_and_embed_assets( // Replace with empty JS call to preserve original behavior set_node_attr(node, "href", Some(str!("javascript:;"))); } - } else if anchor_attr_href_value.clone().starts_with('#') - || is_url_and_has_protocol(&anchor_attr_href_value.clone()) - { - // Don't touch mailto: links or hrefs which begin with a hash sign } else { - let href_full_url: Url = - resolve_url(document_url, &anchor_attr_href_value); - set_node_attr(node, "href", Some(href_full_url.to_string())); + // Don't touch mailto: links or hrefs which begin with a hash sign + if !anchor_attr_href_value.clone().starts_with('#') + && !is_url_and_has_protocol(&anchor_attr_href_value.clone()) + { + let href_full_url: Url = + resolve_url(document_url, &anchor_attr_href_value); + set_node_attr(node, "href", Some(href_full_url.to_string())); + } } } } @@ -937,6 +944,8 @@ pub fn walk_and_embed_assets( // Remove src attribute if script_attr_src != None { set_node_attr(node, "src", None); + // Wipe integrity attribute + set_node_attr(node, "integrity", None); } } else if !script_attr_src.clone().unwrap_or_default().is_empty() { retrieve_and_embed_asset( @@ -1081,7 +1090,7 @@ pub fn walk_and_embed_assets( ); // Get rid of original contents noscript_contents.clear(); - // Insert HTML containing embedded assets back into NOSCRIPT node + // Insert HTML containing embedded assets into NOSCRIPT node if let Some(html) = get_child_node_by_name(&noscript_contents_dom.document, "html") { diff --git a/src/tests/cli/base_url.rs b/src/tests/cli/base_url.rs index 9b07bf7..192e1e7 100644 --- a/src/tests/cli/base_url.rs +++ b/src/tests/cli/base_url.rs @@ -88,7 +88,7 @@ mod passing { } #[test] - fn remove_existing_when_empty_provided() { + fn set_existing_to_empty_when_empty_provided() { let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); let out = cmd .arg("-M") diff --git a/src/tests/cli/data_url.rs b/src/tests/cli/data_url.rs index 62e5bbe..280ed40 100644 --- a/src/tests/cli/data_url.rs +++ b/src/tests/cli/data_url.rs @@ -11,24 +11,6 @@ mod passing { use std::env; use std::process::Command; - #[test] - fn bad_input_data_url() { - let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); - let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap(); - - // STDOUT should contain HTML - assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), ""); - - // STDERR should contain error description - assert_eq!( - std::str::from_utf8(&out.stderr).unwrap(), - "Unsupported data URL media type\n" - ); - - // The exit code should be 1 - out.assert().code(1); - } - #[test] fn isolate_data_url() { let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); @@ -192,6 +174,38 @@ mod passing { // The exit code should be 0 out.assert().code(0); } +} + +// ███████╗ █████╗ ██╗██╗ ██╗███╗ ██╗ ██████╗ +// ██╔════╝██╔══██╗██║██║ ██║████╗ ██║██╔════╝ +// █████╗ ███████║██║██║ ██║██╔██╗ ██║██║ ███╗ +// ██╔══╝ ██╔══██║██║██║ ██║██║╚██╗██║██║ ██║ +// ██║ ██║ ██║██║███████╗██║██║ ╚████║╚██████╔╝ +// ╚═╝ ╚═╝ ╚═╝╚═╝╚══════╝╚═╝╚═╝ ╚═══╝ ╚═════╝ + +#[cfg(test)] +mod failing { + use assert_cmd::prelude::*; + use std::env; + use std::process::Command; + + #[test] + fn bad_input_data_url() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); + let out = cmd.arg("data:,Hello%2C%20World!").output().unwrap(); + + // STDOUT should contain HTML + assert_eq!(std::str::from_utf8(&out.stdout).unwrap(), ""); + + // STDERR should contain error description + assert_eq!( + std::str::from_utf8(&out.stderr).unwrap(), + "Unsupported data URL media type\n" + ); + + // The exit code should be 1 + out.assert().code(1); + } #[test] fn security_disallow_local_assets_within_data_url_targets() { diff --git a/src/tests/cli/noscript.rs b/src/tests/cli/noscript.rs index 7ba93ce..19ab674 100644 --- a/src/tests/cli/noscript.rs +++ b/src/tests/cli/noscript.rs @@ -130,7 +130,14 @@ mod passing { // STDOUT should contain HTML with no CSS assert_eq!( std::str::from_utf8(&out.stdout).unwrap(), - "\n\n" + "\ + \ + \ + \ + \ + \n\ + \ + \n" ); // STDERR should contain target HTML and embedded SVG files @@ -153,4 +160,27 @@ mod passing { // The exit code should be 0 out.assert().code(0); } + + #[test] + fn unwrap_noscript_contents_attr_data_url() { + let mut cmd = Command::cargo_bin(env!("CARGO_PKG_NAME")).unwrap(); + let out = cmd + .arg("-M") + .arg("-n") + .arg("data:text/html,") + .output() + .unwrap(); + + // STDOUT should contain unwrapped contents of NOSCRIPT element + assert_eq!( + std::str::from_utf8(&out.stdout).unwrap(), + "test\n" + ); + + // STDERR should be empty + assert_eq!(std::str::from_utf8(&out.stderr).unwrap(), ""); + + // The exit code should be 0 + out.assert().code(0); + } } diff --git a/src/tests/cli/unusual_encodings.rs b/src/tests/cli/unusual_encodings.rs index 4796cec..5ebd9ac 100644 --- a/src/tests/cli/unusual_encodings.rs +++ b/src/tests/cli/unusual_encodings.rs @@ -30,7 +30,14 @@ mod passing { // STDOUT should contain newly added base URL assert_eq!( std::str::from_utf8(&out.stdout).unwrap(), - "\n \n \n \n © Some Company\n \n\n\n" + "\ + \n \ + \n \ + \n \ + \n \ + © Some Company\n \ + \n\n\ + \n" ); // STDERR should contain only the target file diff --git a/src/tests/html/walk_and_embed_assets.rs b/src/tests/html/walk_and_embed_assets.rs index 855cc37..7e2ab83 100644 --- a/src/tests/html/walk_and_embed_assets.rs +++ b/src/tests/html/walk_and_embed_assets.rs @@ -87,10 +87,12 @@ mod passing { #[test] fn no_css() { - let html = "\ - \ - \ -
"; + let html = "\ + \ + \ + \ +
\ + "; let dom = html::html_to_dom(&html); let url: Url = Url::parse("http://localhost").unwrap(); let cache = &mut HashMap::new(); @@ -108,16 +110,18 @@ mod passing { assert_eq!( buf.iter().map(|&c| c as char).collect::(), - "\ - \ - \ - \ - \ - \ - \ -
\ - \ - " + "\ + \ + \ + \ + \ + \ + \ + \ +
\ + \ + \ + " ); } @@ -203,7 +207,15 @@ mod passing { assert_eq!( buf.iter().map(|&c| c as char).collect::(), - "" + "\ + \ + \ + \ + \ + \ + \ + \ + " ); } @@ -227,16 +239,25 @@ mod passing { assert_eq!( buf.iter().map(|&c| c as char).collect::(), - "" + "\ + \ + \ + \ + \ + \ + \ + " ); } #[test] fn no_js() { - let html = "
\ - \ - \ -
"; + let html = "\ +
\ + \ + \ +
\ + "; let dom = html::html_to_dom(&html); let url: Url = Url::parse("http://localhost").unwrap(); let cache = &mut HashMap::new(); @@ -254,52 +275,141 @@ mod passing { assert_eq!( buf.iter().map(|&c| c as char).collect::(), - "
\ -
" + "\ + \ + \ + \ +
\ + \ + \ +
\ + \ + \ + " ); } - // #[test] - // fn discards_integrity() { - // let html = "No integrity\ - // \ - // "; - // let dom = html::html_to_dom(&html); - // let url: Url = Url::parse("http://localhost").unwrap(); - // let cache = &mut HashMap::new(); - - // let mut options = Options::default(); - // options.no_css = true; - // options.no_frames = true; - // options.no_js = true; - // options.no_images = true; - // options.silent = true; - - // let client = Client::new(); - - // html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0); - - // let mut buf: Vec = Vec::new(); - // serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); - - // assert_eq!( - // buf.iter().map(|&c| c as char).collect::(), - // "\ - // No integrity\ - // \ - // " - // ); - // } + #[test] + fn keeps_integrity_for_linked_assets() { + let html = "Has integrity\ + "; + let dom = html::html_to_dom(&html); + let url: Url = Url::parse("http://localhost").unwrap(); + let cache = &mut HashMap::new(); + + let mut options = Options::default(); + options.silent = true; + + let client = Client::new(); + + html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0); + + let mut buf: Vec = Vec::new(); + serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); + + assert_eq!( + buf.iter().map(|&c| c as char).collect::(), + "\ + \ + \ + Has integrity\ + \ + \ + \ + \ + " + ); + } + + #[test] + fn discards_integrity_for_linked_assets_nojs_nocss() { + let html = "\ + No integrity\ + \ + \ + "; + let dom = html::html_to_dom(&html); + let url: Url = Url::parse("http://localhost").unwrap(); + let cache = &mut HashMap::new(); + + let mut options = Options::default(); + options.no_css = true; + options.no_js = true; + options.silent = true; + + let client = Client::new(); + + html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0); + + let mut buf: Vec = Vec::new(); + serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); + + assert_eq!( + buf.iter().map(|&c| c as char).collect::(), + "\ + \ + \ + No integrity\ + \ + \ + \ + \ + \ + " + ); + } + + #[test] + fn discards_integrity_for_embedded_assets() { + let html = "\ + No integrity\ + \ + \ + "; + let dom = html::html_to_dom(&html); + let url: Url = Url::parse("http://localhost").unwrap(); + let cache = &mut HashMap::new(); + + let mut options = Options::default(); + options.no_css = true; + options.no_js = true; + options.silent = true; + + let client = Client::new(); + + html::walk_and_embed_assets(cache, &client, &url, &dom.document, &options, 0); + + let mut buf: Vec = Vec::new(); + serialize(&mut buf, &dom.document, SerializeOpts::default()).unwrap(); + + assert_eq!( + buf.iter().map(|&c| c as char).collect::(), + "\ + \ + \ + No integrity\ + \ + \ + \ + \ + \ + \ + " + ); + } #[test] fn removes_unwanted_meta_tags() { - let html = "\ - \ - \ - \ - \ - \ - "; + let html = "\ + \ + \ + \ + \ + \ + \ + \ + \ + "; let dom = html::html_to_dom(&html); let url: Url = Url::parse("http://localhost").unwrap(); let cache = &mut HashMap::new(); @@ -320,19 +430,22 @@ mod passing { assert_eq!( buf.iter().map(|&c| c as char).collect::(), - "\ + "\ + \ \ \ \ \ - \ + \ + \ " ); } #[test] fn processes_noscript_tags() { - let html = "\ + let html = "\ + \ \