@ -7,13 +7,10 @@ use html5ever::tree_builder::{Attribute, TreeSink};
use html5ever ::{ local_name , namespace_url , ns } ;
use http ::retrieve_asset ;
use js ::attr_is_event_handler ;
use reqwest ::Client ;
use std ::collections ::HashMap ;
use std ::default ::Default ;
use utils ::{ data_to_dataurl , is_valid_url , resolve_url , url_has_protocol } ;
lazy_static ! {
static ref EMPTY_STRING : String = String ::new ( ) ;
}
use utils ::{ data_to_dataurl , is_valid_url , resolve_css_imports , resolve_url , url_has_protocol } ;
const ICON_VALUES : [ & str ; 5 ] = [
"icon" ,
@ -35,7 +32,7 @@ pub fn get_parent_node(node: &Handle) -> Handle {
pub fn get_node_name ( node : & Handle ) -> String {
match & node . data {
NodeData ::Element { ref name , .. } = > name . local . as_ref ( ) . to_string ( ) ,
_ = > EMPTY_STRING . clone ( ) ,
_ = > str ! ( ) ,
}
}
@ -45,14 +42,13 @@ pub fn is_icon(attr_value: &str) -> bool {
pub fn walk_and_embed_assets (
cache : & mut HashMap < String , String > ,
client : & Client ,
url : & str ,
node : & Handle ,
opt_no_css : bool ,
opt_no_js : bool ,
opt_no_images : bool ,
opt_user_agent : & str ,
opt_silent : bool ,
opt_insecure : bool ,
opt_no_frames : bool ,
) {
match node . data {
@ -61,14 +57,13 @@ pub fn walk_and_embed_assets(
for child in node . children . borrow ( ) . iter ( ) {
walk_and_embed_assets (
cache ,
client ,
& url ,
child ,
opt_no_css ,
opt_no_js ,
opt_no_images ,
opt_user_agent ,
opt_silent ,
opt_insecure ,
opt_no_frames ,
) ;
}
@ -104,17 +99,16 @@ pub fn walk_and_embed_assets(
} else {
let href_full_url : String =
resolve_url ( & url , & attr . value . to_string ( ) )
. unwrap_or ( EMPTY_STRING . clone ( ) ) ;
. unwrap_or ( str ! ( ) ) ;
let ( favicon_dataurl , _ ) = retrieve_asset (
cache ,
client ,
& href_full_url ,
true ,
"" ,
opt_user_agent ,
opt_silent ,
opt_insecure ,
)
. unwrap_or ( ( EMPTY_STRING . clone ( ) , EMPTY_STRING . clone ( ) ) ) ;
. unwrap_or ( ( str ! ( ) , str ! ( ) ) ) ;
attr . value . clear ( ) ;
attr . value . push_slice ( favicon_dataurl . as_str ( ) ) ;
}
@ -128,19 +122,37 @@ pub fn walk_and_embed_assets(
} else {
let href_full_url : String =
resolve_url ( & url , & attr . value . to_string ( ) )
. unwrap_or ( EMPTY_STRING . clone ( ) ) ;
let ( css_dataurl , _ ) = retrieve_asset (
. unwrap_or ( str ! ( ) ) ;
let replacement_text = match retrieve_asset (
cache ,
client ,
& href_full_url ,
tru e,
fals e,
"text/css" ,
opt_user_agent ,
opt_silent ,
opt_insecure ,
)
. unwrap_or ( ( EMPTY_STRING . clone ( ) , EMPTY_STRING . clone ( ) ) ) ;
) {
// On successful retrieval, traverse CSS
Ok ( ( css_data , _ ) ) = > resolve_css_imports (
cache ,
client ,
& css_data ,
true ,
& href_full_url ,
opt_no_images ,
opt_silent ,
) ,
// If a network error occured, warn
Err ( e ) = > {
eprintln! ( "Warning: {}" , e , ) ;
// If failed to resolve, replace with absolute URL
href_full_url
}
} ;
attr . value . clear ( ) ;
attr . value . push_slice ( css_dataurl . as_str ( ) ) ;
attr . value . push_slice ( & replacement_text ) ;
}
}
}
@ -148,8 +160,7 @@ pub fn walk_and_embed_assets(
for attr in attrs_mut . iter_mut ( ) {
if & attr . name . local = = "href" {
let href_full_url : String =
resolve_url ( & url , & attr . value . to_string ( ) )
. unwrap_or ( EMPTY_STRING . clone ( ) ) ;
resolve_url ( & url , & attr . value . to_string ( ) ) . unwrap_or ( str ! ( ) ) ;
attr . value . clear ( ) ;
attr . value . push_slice ( & href_full_url . as_str ( ) ) ;
}
@ -184,16 +195,15 @@ pub fn walk_and_embed_assets(
. map ( | attr | & attr . value )
. filter ( | src | ! src . is_empty ( ) ) // Ignore empty srcs
. next ( )
. and_then ( | src | resolve_url ( & url , src ) . ok ( ) ) // Make absolute
. and_then ( | src | resolve_url ( & url , src ) . ok ( ) ) // Make absolute
. and_then ( | abs_src | // Download and convert to dataurl
retrieve_asset (
cache ,
client ,
& abs_src ,
true ,
"" ,
opt_user_agent ,
opt_silent ,
opt_insecure ,
) . ok ( ) )
{
// Add the new dataurl src attribute
@ -220,17 +230,16 @@ pub fn walk_and_embed_assets(
} else {
let srcset_full_url : String =
resolve_url ( & url , & attr . value . to_string ( ) )
. unwrap_or ( EMPTY_STRING . clone ( ) ) ;
. unwrap_or ( str ! ( ) ) ;
let ( source_dataurl , _ ) = retrieve_asset (
cache ,
client ,
& srcset_full_url ,
true ,
"" ,
opt_user_agent ,
opt_silent ,
opt_insecure ,
)
. unwrap_or ( ( EMPTY_STRING . clone ( ) , EMPTY_STRING . clone ( ) ) ) ;
. unwrap_or ( ( str ! ( ) , str ! ( ) ) ) ;
attr . value . clear ( ) ;
attr . value . push_slice ( source_dataurl . as_str ( ) ) ;
}
@ -246,8 +255,8 @@ pub fn walk_and_embed_assets(
continue ;
}
let href_full_url : String = resolve_url ( & url , & attr . value . to_string ( ) )
. unwrap_or ( EMPTY_STRING . clone ( ) ) ;
let href_full_url : String =
resolve_url ( & url , & attr . value . to_string ( ) ) . unwrap_or ( str ! ( ) ) ;
attr . value . clear ( ) ;
attr . value . push_slice ( href_full_url . as_str ( ) ) ;
}
@ -266,18 +275,16 @@ pub fn walk_and_embed_assets(
for attr in attrs_mut . iter_mut ( ) {
if & attr . name . local = = "src" {
let src_full_url : String =
resolve_url ( & url , & attr . value . to_string ( ) )
. unwrap_or ( EMPTY_STRING . clone ( ) ) ;
resolve_url ( & url , & attr . value . to_string ( ) ) . unwrap_or ( str ! ( ) ) ;
let ( js_dataurl , _ ) = retrieve_asset (
cache ,
client ,
& src_full_url ,
true ,
"application/javascript" ,
opt_user_agent ,
opt_silent ,
opt_insecure ,
)
. unwrap_or ( ( EMPTY_STRING . clone ( ) , EMPTY_STRING . clone ( ) ) ) ;
. unwrap_or ( ( str ! ( ) , str ! ( ) ) ) ;
attr . value . clear ( ) ;
attr . value . push_slice ( js_dataurl . as_str ( ) ) ;
}
@ -288,6 +295,23 @@ pub fn walk_and_embed_assets(
if opt_no_css {
// Empty inner content of STYLE tags
node . children . borrow_mut ( ) . clear ( ) ;
} else {
for node in node . children . borrow_mut ( ) . iter_mut ( ) {
if let NodeData ::Text { ref contents } = node . data {
let mut tendril = contents . borrow_mut ( ) ;
let replacement = resolve_css_imports (
cache ,
client ,
tendril . as_ref ( ) ,
false ,
& url ,
opt_no_images ,
opt_silent ,
) ;
tendril . clear ( ) ;
tendril . push_slice ( & replacement ) ;
}
}
}
}
"form" = > {
@ -296,8 +320,7 @@ pub fn walk_and_embed_assets(
// Modify action to be a full URL
if ! is_valid_url ( & attr . value ) {
let href_full_url : String =
resolve_url ( & url , & attr . value . to_string ( ) )
. unwrap_or ( EMPTY_STRING . clone ( ) ) ;
resolve_url ( & url , & attr . value . to_string ( ) ) . unwrap_or ( str ! ( ) ) ;
attr . value . clear ( ) ;
attr . value . push_slice ( href_full_url . as_str ( ) ) ;
}
@ -316,33 +339,31 @@ pub fn walk_and_embed_assets(
let iframe_src : String = attr . value . to_string ( ) ;
// Ignore iframes with empty source (they cause infinite loops)
if iframe_src = = EMPTY_STRING . clone ( ) {
if iframe_src = = str ! ( ) {
continue ;
}
let src_full_url : String =
resolve_url ( & url , & iframe_src ) . unwrap_or ( EMPTY_STRING . clone ( ) ) ;
resolve_url ( & url , & iframe_src ) . unwrap_or ( str ! ( ) ) ;
let ( iframe_data , iframe_final_url ) = retrieve_asset (
cache ,
client ,
& src_full_url ,
false ,
"text/html" ,
opt_user_agent ,
opt_silent ,
opt_insecure ,
)
. unwrap_or ( ( EMPTY_STRING . clone ( ) , src_full_url ) ) ;
. unwrap_or ( ( str ! ( ) , src_full_url ) ) ;
let dom = html_to_dom ( & iframe_data ) ;
walk_and_embed_assets (
cache ,
client ,
& iframe_final_url ,
& dom . document ,
opt_no_css ,
opt_no_js ,
opt_no_images ,
opt_user_agent ,
opt_silent ,
opt_insecure ,
opt_no_frames ,
) ;
let mut buf : Vec < u8 > = Vec ::new ( ) ;
@ -359,25 +380,24 @@ pub fn walk_and_embed_assets(
let video_poster = attr . value . to_string ( ) ;
// Skip posters with empty source
if video_poster = = EMPTY_STRING . clone ( ) {
if video_poster = = str ! ( ) {
continue ;
}
if opt_no_images {
attr . value . clear ( ) ;
} else {
let poster_full_url : String = resolve_url ( & url , & video_poster )
. unwrap_or ( EMPTY_STRING . clone ( ) ) ;
let poster_full_url : String =
resolve_url ( & url , & video_poster ) . unwrap_or ( str ! ( ) ) ;
let ( poster_dataurl , _ ) = retrieve_asset (
cache ,
client ,
& poster_full_url ,
true ,
"" ,
opt_user_agent ,
opt_silent ,
opt_insecure ,
)
. unwrap_or ( ( poster_full_url , EMPTY_STRING . clone ( ) ) ) ;
. unwrap_or ( ( poster_full_url , str ! ( ) ) ) ;
attr . value . clear ( ) ;
attr . value . push_slice ( poster_dataurl . as_str ( ) ) ;
}
@ -387,6 +407,7 @@ pub fn walk_and_embed_assets(
_ = > { }
}
// Process style attributes
if opt_no_css {
// Get rid of style attributes
let mut style_attr_indexes = Vec ::new ( ) ;
@ -399,6 +420,24 @@ pub fn walk_and_embed_assets(
for attr_index in style_attr_indexes {
attrs_mut . remove ( attr_index ) ;
}
} else {
// Otherwise, parse any links found in the attributes
for attribute in attrs_mut
. iter_mut ( )
. filter ( | a | a . name . local . as_ref ( ) . eq_ignore_ascii_case ( "style" ) )
{
let replacement = resolve_css_imports (
cache ,
client ,
attribute . value . as_ref ( ) ,
false ,
& url ,
opt_no_images ,
opt_silent ,
) ;
attribute . value . clear ( ) ;
attribute . value . push_slice ( & replacement ) ;
}
}
if opt_no_js {
@ -419,14 +458,13 @@ pub fn walk_and_embed_assets(
for child in node . children . borrow ( ) . iter ( ) {
walk_and_embed_assets (
cache ,
client ,
& url ,
child ,
opt_no_css ,
opt_no_js ,
opt_no_images ,
opt_user_agent ,
opt_silent ,
opt_insecure ,
opt_no_frames ,
) ;
}
@ -481,7 +519,7 @@ pub fn stringify_document(
let doc = dom . get_document ( ) ;
let html = get_child_node_by_name ( & doc , "html" ) ;
let head = get_child_node_by_name ( & html , "head" ) ;
let mut content_attr = EMPTY_STRING . clone ( ) ;
let mut content_attr = str ! ( ) ;
if opt_isolate {
content_attr + = " default-src 'unsafe-inline' data:;" ;
}