Treat images a little differently so they get more inclusion

- When the body of the article contains screenshots/etc we want
to try to keep those images around.
- Added test for Business Insider article.
- Added sweetshark test.
- Added craig to the credits.
pull/21/head
Mišo Belica 11 years ago
parent 02160fe2ae
commit 3746ee5bb5

@ -1,3 +1,4 @@
Rick Harding (original author)
Michal Belica (current maintainer)
Craig Maloney
nhnifong

@ -2,6 +2,7 @@
Changelog for readability
==========================
- Treat images a little differently so they get more inclusion.
- Added User-Agent string into HTTP requests.
- Added property ``Article.main_text`` for getting text annotated with
semantic HTML tags (<em>, <strong>, ...).

@ -92,6 +92,11 @@ def get_link_density(node, node_text=None):
return 0.0
links_length = sum(map(_get_normalized_text_length, node.findall(".//a")))
# Give 50 bonus chars worth of length for each img.
# Tweaking this 50 down a notch should help if we hit false positives.
img_bonuses = 50 * len(node.findall(".//img"))
links_length = max(0, links_length - img_bonuses)
return links_length / text_length

File diff suppressed because it is too large Load Diff

@ -0,0 +1,39 @@
# -*- coding: utf8 -*-
from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals
from os.path import join, dirname
from readability.readable import Article
from ...compat import unittest
class TestArticle(unittest.TestCase):
"""
Test the scoring and parsing of the article from URL below:
http://www.businessinsider.com/tech-ceos-favorite-productivity-hacks-2013-8
"""
def setUp(self):
"""Load up the article for us"""
article_path = join(dirname(__file__), "article.html")
with open(article_path, "rb") as file:
self.document = Article(file.read(), "http://www.businessinsider.com/tech-ceos-favorite-productivity-hacks-2013-8")
def tearDown(self):
"""Drop the article"""
self.document = None
def test_parses(self):
"""Verify we can parse the document."""
self.assertIn('id="readabilityBody"', self.document.readable)
def test_images_preserved(self):
"""The div with the comments should be removed."""
images = [
'bharath-kumar-a-co-founder-at-pugmarksme-suggests-working-on-a-sunday-late-night.jpg',
'bryan-guido-hassin-a-university-professor-and-startup-junkie-uses-airplane-days.jpg',
]
for image in images:
self.assertIn(image, self.document.readable, image)

@ -0,0 +1,667 @@
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
<html xmlns="http://www.w3.org/1999/xhtml" id="sixapart-standard">
<head>
<meta http-equiv="X-UA-Compatible" content="IE=EmulateIE7; IE=EmulateIE9" />
<meta http-equiv="Content-Type" content="text/html;charset=utf-8" />
<!--[if IE]><link rel="shortcut icon" type="image/vnd.microsoft.icon" href="http://l-stat.livejournal.com/img/userinfo.ico?v=108.1"><![endif]-->
<link rel="icon" type="image/gif" href="http://l-stat.livejournal.com/img/userinfo.gif?v=17080?v=108.1">
<link rel='stylesheet' href='http://l-stat.livejournal.com/ad_base.css?v=1340259377' type='text/css' />
<link rel="meta" type="application/rdf+xml" title="FOAF" href="http://sweetshark.livejournal.com/data/foaf" />
<link rel="stylesheet" type="text/css" href="http://l-stat.livejournal.com/??lj_base.css,contextualhover.css,esn.css,controlstrip-new.css,widgets/calendar.css,share.css,journalpromo/journalpromo.css,lj_base-journal.css?v=1377871128" />
<link rel="stylesheet" type="text/css" href="http://l-stat.livejournal.com/??voxhtml/base.css,voxhtml/default/screen.css,voxhtml/ljextras.css?v=1377871128" media="screen"/>
<!--[if lte IE 8]><link rel="stylesheet" type="text/css" href="http://l-stat.livejournal.com/??ie.css?v=1377871128" /><![endif]-->
<script type="text/javascript">
Site = window.Site || {};
Site.ml_text = {"date.month.september.short":"Sep","date.day.thursday.short":"Thu","date.month.may.short":"May","date.month.march.short":"Mar","date.month.february.long":"February","date.month.january.long":"January","date.month.june.long":"June","date.day.friday.short":"Fri","date.month.october.long":"October","date.day.tuesday.short":"Tue","date.month.april.short":"Apr","date.month.december.short":"Dec","date.month.june.short":"Jun","date.format.short":"%M/%D/%Y","date.month.january.short":"Jan","date.day.monday.short":"Mon","date.month.october.short":"Oct","confirm.bubble.no":"No","date.month.august.short":"Aug","date.month.april.long":"April","date.month.march.long":"March","date.day.saturday.short":"Sat","date.month.july.short":"Jul","date.month.december.long":"December","date.format.offset":"0","entry.reference.label.reposted":"Reposted","date.month.august.long":"August","date.format.long":"%B %D, %Y","date.month.may.long":"May","date.day.wednesday.short":"Wed","date.day.sunday.short":"Sun","date.month.september.long":"September","date.month.february.short":"Feb","entry.reference.label.title":"Remove repost","confirm.bubble.yes":"Yes","date.month.november.short":"Nov","date.month.july.long":"July","date.month.november.long":"November","sharing.popup.title":"Share"};
Site.page = {"remote":{"is_logged_in":0,"status":"You are viewing <span class=\"ljuser i-ljuser \" lj:user=\"sweetshark\"><a href=\"http://sweetshark.livejournal.com/profile\" ><img width=\"16\" height=\"16\" class=\"i-ljuser-userhead\" src=\"http://l-stat.livejournal.com/img/userinfo.gif?v=17080?v=108.1\" /></a><a href=\"http://sweetshark.livejournal.com/\" class=\"i-ljuser-username\" ><b>sweetshark</b></a></span>"},"calendar":{"month":{"short":["date.month.january.short","date.month.february.short","date.month.march.short","date.month.april.short","date.month.may.short","date.month.june.short","date.month.july.short","date.month.august.short","date.month.september.short","date.month.october.short","date.month.november.short","date.month.december.short"],"long":["date.month.january.long","date.month.february.long","date.month.march.long","date.month.april.long","date.month.may.long","date.month.june.long","date.month.july.long","date.month.august.long","date.month.september.long","date.month.october.long","date.month.november.long","date.month.december.long"]},"week":["date.day.sunday.short","date.day.monday.short","date.day.tuesday.short","date.day.wednesday.short","date.day.thursday.short","date.day.friday.short","date.day.saturday.short"]},"D":{}};
Site.page.template = {};
Site.timer = +(new Date());
(function(){
var p = {"remote_is_identity":null,"remote_is_maintainer":0,"auth_token":"sessionless:1377975600:/__api/::a088e98a57ee51f7398bb8d4892b2709cbe9b48b","locale":"en_US","remoteUser":null,"remote_is_sup":0,"remoteJournalBase":null,"statprefix":"http://l-stat.livejournal.com","ctx_popup":1,"jsonrpcprefix":"http://l-api.livejournal.com","siteroot":"http://www.livejournal.com","country":"US","templates_update_time":900,"media_embed_enabled":1,"inbox_update_poll":0,"rpc":{"public":["homepage.get_rating","latest.get_entries","sitemessage.get_message","comment.get_thread"]},"v":1377871128,"has_remote":0,"currentEntryRecommendations":0,"remoteLocale":"en_US","server_time":1377976936,"picsUploadDomain":"up.pics.livejournal.com","remote_is_suspended":0,"logprefix":"","imgprefix":"http://l-stat.livejournal.com/img","esn_async":1,"remote_can_track_threads":null,"currentJournal":"sweetshark","pics_production":"","currentEntry":"http://sweetshark.livejournal.com/11564.html","currentJournalBase":"http://sweetshark.livejournal.com"}, i;
for (i in p) Site[i] = p[i];
})();
Site.current_journal = {"url_profile":"http://sweetshark.livejournal.com/profile","url_journal":"http://sweetshark.livejournal.com","is_identity":"","userid":37450766,"is_shared":"","display_name":"sweetshark","is_comm":"","username":"sweetshark","is_syndicated":"","userpic_w":100,"userpic_h":100,"can_receive_vgifts":1,"url_allpics":"http://www.livejournal.com/allpics.bml?user=sweetshark","is_person":"1","url_message":"http://www.livejournal.com/inbox/compose.bml?user=sweetshark","url_userpic":"http://l-userpic.livejournal.com/117931338/37450766","display_username":"sweetshark"};
Site.version = '108.1';
</script>
<script type="text/javascript" src="http://l-stat.livejournal.com/js/??.ljlib.js?v=1377871128"></script>
<script type="text/javascript" src="http://l-stat.livejournal.com/js/??jquery/jquery.lj.calendar.js,jquery/jquery.mask.js,jquery/jquery.lj.share.js,ljshare-init.js,controlstrip.js,jquery/jquery.calendarEvents.js,jquery/jquery.lj.repostbutton.js,s2.js,esn.js,jquery/jquery.lj.confirmbubble.js,jquery/jquery.lj.ljcut.js,fb-select-image.js,quickreply.js,md5.js,thread_expander.js,thread_expander.ex.js,commentmanage.js,jquery/jquery.lj.journalPromoStrip.js,lj.api.js?v=1377871128"></script>
<!--[if lte IE 9]><script type="text/javascript" src="http://l-stat.livejournal.com/js/??lib/json3.min.js?v=1377871128"></script><![endif]-->
<!--[if gte IE 9]><script type="text/javascript" src="http://l-stat.livejournal.com/js/??ie9pinned.js?v=1377871128"></script><![endif]-->
<script type="text/javascript">
Site.LJShareParams = {"ml":{"close":"Close","title":"Share"},"services":{"stumbleupon":{"bindLink":"http://www.livejournal.com/redirect/SHARING_stumbleupon?url=http%3A%2F%2Fwww.stumbleupon.com%2Fsubmit%3Furl%3D{url}","title":"StumbleUpon"},"moimir":{"bindLink":"http://www.livejournal.com/redirect/SHARING_moimir?url=http%3A%2F%2Fconnect.mail.ru%2Fshare%3Furl%3D{url}","title":"Moi mir"},"twitter":{"bindLink":"http://www.livejournal.com/redirect/SHARING_twitter?url=http%3A%2F%2Ftwitter.com%2Fshare%3Furl%3D{url}%26text%3D{title}%26hashtags%3D{hashtags}","title":"Twitter"},"digg":{"bindLink":"http://www.livejournal.com/redirect/SHARING_digg?url=http%3A%2F%2Fdigg.com%2Fsubmit%3Furl%3D{url}","title":"Digg"},"email":{"bindLink":"http://www.livejournal.com/redirect/SHARING_email?url=http%3A%2F%2Fapi.addthis.com%2Foexchange%2F0.8%2Fforward%2Femail%2Foffer%3Fusername%3Dinternal%26url%3D{url}%26title%3D{title}","title":"E-mail"},"livejournal":{"bindLink":"http://www.livejournal.com/redirect/SHARING_livejournal?url=http%3A%2F%2Fwww.livejournal.com%2Fupdate.bml%3Frepost_type%3Dc%26repost%3D{url}","openInTab":1,"title":"LiveJournal"},"vkontakte":{"bindLink":"http://www.livejournal.com/redirect/SHARING_vkontakte?url=http%3A%2F%2Fvkontakte.ru%2Fshare.php%3Furl%3D{url}","title":"VKontakte"},"facebook":{"bindLink":"http://www.livejournal.com/redirect/SHARING_facebook?url=http%3A%2F%2Fwww.facebook.com%2Fsharer.php%3Fu%3D{url}","title":"Facebook"},"odnoklassniki":{"bindLink":"http://www.livejournal.com/redirect/SHARING_odnoklassniki?url=http%3A%2F%2Fwww.odnoklassniki.ru%2Fdk%3Fst.cmd%3DaddShare%26st.s%3D1%26st._surl%3D{url}","title":"Odnoklassniki"},"tumblr":{"bindLink":"http://www.livejournal.com/redirect/SHARING_tumblr?url=http%3A%2F%2Fwww.tumblr.com%2Fshare%2Flink%3Furl%3D{url}%26name%3D{title}%26description%3D{text}","title":"Tumblr"}},"links":["livejournal","facebook","twitter","digg","tumblr","stumbleupon","email"]};</script>
<script type="text/javascript" src="http://l-stat.livejournal.com/tmpl/??Widgets/bubble.tmpl,Widgets/share.tmpl,CleanHtml/reposted.tmpl,CleanHtml/Repost.tmpl,CleanHtml/PaidRepost.tmpl,Widgets/popupcontent.tmpl?v=1354174850&tm=1531085;uselang=en_LJ"></script>
<script type="text/javascript" src="http://l-stat.livejournal.com/tmpl/??Widgets/contextualhover.jqtmpl?v=1343758569&tm=1531085;uselang=en_LJ"></script>
<script>
// don't crawl this. read http://www.livejournal.com/developer/exporting.bml
var LJ_cmtinfo = {"form_auth":"c0%3A1377975600%3A1336%3A86400%3ARgZomE12Pp-0-%3A1736db375a9c8e8a8ef2abd819c239dd","remote":null,"canAdmin":0,"journal":"sweetshark"}
</script><meta property="og:description" content="The reasonable man adapts himself to the world; the unreasonable one persists in trying to adapt the world to himself. Therefore all progress depends on the unreasonable man. -- G.B. Shaw Simon Phipps recently published this amazing article titled &amp;quot; Ubuntu and Android: A match made in open…" /><meta property="og:image" content="http://pics.livejournal.com/sweetshark/pic/0000eawe/s640x480" /><meta property="og:title" content="open source -- limitless innovation" /><meta property="og:type" content="website" /><meta property="og:url" content="http://sweetshark.livejournal.com/11564.html" /><meta property="twitter:app:id:ipad" content="383091547" /><meta property="twitter:app:id:iphone" content="383091547" /><meta property="twitter:app:name:ipad" content="LiveJournal" /><meta property="twitter:app:name:iphone" content="LiveJournal" /><meta property="twitter:app:url:ipad" content="lj://sweetshark.livejournal.com/11564.html" /><meta property="twitter:app:url:iphone" content="lj://sweetshark.livejournal.com/11564.html" /><meta property="twitter:card" content="summary" /><meta property="twitter:site" content="@livejournal" /><link rel="stylesheet" href="http://l-stat.livejournal.com/voxhtml/minimalist-green/screen.css" type="text/css" />
<link rel="stylesheet" href="http://sweetshark.livejournal.com/res/82806159/stylesheet?1375335824" type="text/css" />
<title>You can&#39;t take the sky from me. - open source -- limitless innovation</title>
</head>
<body class="asset-stream layout-tw lj-view-entry j-p-resize-all-images" id="home">
<div class="w-cs" id="lj_controlstrip_new">
<div class="w-cs-userinfo">
<div class="w-cs-options">
<div class="w-cs-logo"><!--
--><a href="http://www.livejournal.com">LiveJournal</a><!--
--></div>
<form
id="login"
class="w-cs-login"
action="https://www.livejournal.com/login.bml?ret=1"
method="post">
<input type="hidden" name="mode" value="login" />
<ul class="w-cs-signin">
<li class="w-cs-signin-item">
<div class="w-cs-group w-cs-group-first">
<input
type="text"
name="user"
size="10"
maxlength="17"
tabindex="1"
class="w-cs-text"
placeholder='Username' />
</div>
<div class="w-cs-group w-cs-group-second">
<label
for="remember"
class="note remember"><!--
--><input
type="checkbox"
id="remember"
name="remember_me"
value="1"
tabindex="3"
class="w-cs-checkbox" /><!--
-->Remember Me<!--
--></label>
</div>
</li>
<li class="w-cs-signin-item">
<div class="w-cs-group w-cs-group-first">
<input
type="password"
name="password"
size="10"
tabindex="2"
class="w-cs-text"
placeholder='Password' />
</div>
<div class="w-cs-group w-cs-group-second">
<p class="note">
<a
href="http://www.livejournal.com/lostinfo.bml"
tabindex="5"><!--
-->Forgot your password?<!--
--></a>
</p>
</div>
</li>
<li class="w-cs-signin-item w-cs-signin-buttons">
<div class="w-cs-group w-cs-group-first">
<span class="submit-wrap"><!--
--><input
type="submit"
tabindex="4"
class="w-cs-submit"
value='Log in' /><!--
--></span>
<ul class="login-with">
<li>
<a
href="http://www.livejournal.com/identity/login.bml?type=facebook"
title=""
class="i-auth"
tabindex="6"><!--
--><img
src="http://l-stat.livejournal.com/img/icons/facebook-16.png?v=29916"
width="16"
height="16"
alt="" /><!--
--></a>
</li>
<li>
<a
href="http://www.livejournal.com/identity/login.bml?type=twitter"
title=""
class="i-auth"
tabindex="6"><!--
--><img
src="http://l-stat.livejournal.com/img/icons/twitter-16.png?v=29916"
width="16"
height="16"
alt="" /><!--
--></a>
</li>
<li class="last">
<a
href="#"
title=""
class="i-auth i-auth-control"
tabindex="6"><!--
--><img
src="http://l-stat.livejournal.com/img/icons/showmore.gif?v=11430"
width="16"
height="16"
alt="" /><!--
--></a>
<div
class="b-loginpopup"
id="login-more">
<ul class="b-loginpopup-items">
<li class="b-loginpopup-item">
<a
href="http://www.livejournal.com/identity/login.bml?type=openid"
title="Open ID"
tabindex="7"><!--
--><img
src="http://l-stat.livejournal.com/img/icons/openid-16.gif?v=17127"
width="16"
height="16"
alt="Open ID" /><!--
-->Open ID<!--
--></a>
</li>
<li class="b-loginpopup-item">
<a
href="http://www.livejournal.com/identity/login.bml?type=google"
title="Google"
tabindex="7"><!--
--><img
src="http://l-stat.livejournal.com/img/icons/google-16.png?v=29916"
width="16"
height="16"
alt="Google" /><!--
-->Google<!--
--></a>
</li>
<li class="b-loginpopup-item">
<a
href="http://www.livejournal.com/identity/login.bml?type=mailru"
title="Mail.ru"
tabindex="7"><!--
--><img
src="http://l-stat.livejournal.com/img/icons/mailru-16.gif?v=11910"
width="16"
height="16"
alt="Mail.ru" /><!--
-->Mail.ru<!--
--></a>
</li>
<li class="b-loginpopup-item">
<a
href="http://www.livejournal.com/identity/login.bml?type=vkontakte"
title="VKontakte"
tabindex="7"><!--
--><img
src="http://l-stat.livejournal.com/img/icons/vkontakte-16.png?v=29916"
width="16"
height="16"
alt="VKontakte" /><!--
-->VKontakte<!--
--></a>
</li>
</ul>
</div>
<script type="text/javascript">
jQuery('#lj_controlstrip_new .b-loginpopup').bubble({
target: '#lj_controlstrip_new .i-auth-control img',
closeControl: false,
showOn: 'click'
});
</script>
</li>
</ul>
</div>
<div class="w-cs-group w-cs-group-second w-cs-group-aside">
<p class="note create">
<a href="http://www.livejournal.com/create.bml"><!--
-->Create an Account<!--
--></a>
</p>
</div>
</li>
</ul>
</form>
</div><!-- w-cs-options -->
</div><!-- w-cs-userinfo -->
<div class="w-cs-user-controls">
<div class="w-cs-group w-cs-group-first">
<div class="w-cs-status">
<p>You are viewing <span class="ljuser i-ljuser " lj:user="sweetshark"><a href="http://sweetshark.livejournal.com/profile" ><img width="16" height="16" class="i-ljuser-userhead" src="http://l-stat.livejournal.com/img/userinfo.gif?v=17080?v=108.1" /></a><a href="http://sweetshark.livejournal.com/" class="i-ljuser-username" ><b>sweetshark</b></a></span></p>
</div>
<form class="w-cs-search" action="http://www.livejournal.com/search/">
<input
type="hidden"
name="journal"
value="sweetshark">
<fieldset>
<input
type="text"
name="q"
class="w-cs-text"
tabindex="8"
placeholder="Search at sweetshark" />
</fieldset>
<fieldset>
<button
type="submit"
title="Найти"
class="w-cs-search-submit"
tabindex="9">
<span class="w-cs-search-inner"><!--
-->Find<!--
--></span>
</button>
</fieldset>
</form>
</div>
</div><!-- w-cs-user-controls -->
<script>
jQuery( 'input.text' ).labeledPlaceholder();
</script>
</div>
<div id="container"><div id="container-inner">
<div id="ad-leaderboard"><div id="ad-leaderboard-inner">
<!--
## IndieClick Ad Tag
## publisher[Live Journal (unaudited by Google)] zone[Run of site] size[728x90]
-->
<script language="JavaScript" type="text/javascript">
//<![CDATA[
(function(){var config={
ic_domain :"dmd.ind.lj.unaudros"
,ic_size :"728x90"
,ic_zone :""
,ic_campaign:""
,ic_version :"2"
};var ic_config=(typeof window.top.ic_config=="undefined")?window.top.ic_config={}:window.top.ic_config;var in_iframe=(window!=window.top)?true:false;config.iframe=(in_iframe)?true:false;config.ord=(ic_config.ord)?ic_config.ord:ic_config.ord=Math.floor(Math.random()*10000000000000000);config.tile=(ic_config.tile)?++ic_config.tile:ic_config.tile=1;for(var key in ic_config){config[key]=ic_config[key]}var key_value=[];key_value[key_value.length]="sz="+config.ic_size;key_value[key_value.length]="tile="+config.tile;key_value[key_value.length]="ver="+config.ic_version;key_value[key_value.length]="frame="+config.iframe;key_value[key_value.length]="cm="+config.ic_campaign;for(var key in config.ic_keyValue){key_value[key_value.length]=key+"="+config.ic_keyValue[key]}if(!in_iframe&&!ic_config.dcopt){ic_config.dcopt=true;key_value[key_value.length]="dcopt=ist"}document.write('<span id="ic_'+config.ord+config.tile+'"></span>');var element=document.getElementById("ic_"+config.ord+config.tile);if(ic_config.asyncInit){ic_config.asyncInit(key_value,element)}var key_value=key_value.join(";");window.ic_ad_complete=function(){if(in_iframe){window.onload=function(){var elem=document.getElementById("icAdUnit");elem.style.position="absolute";window.frameElement.style.width=elem.offsetWidth+"px";window.frameElement.style.height=elem.offsetHeight+"px";window.frameElement.style.overflow="hidden";window.frameElement.style.border="0"}}};document.write('<script language="JavaScript" src="http://ad.doubleclick.net/adj/'+config.ic_domain+"/"+config.ic_zone+";"+key_value+";ord="+config.ord+'?" type="text/javascript"><\/script>');document.write('<script type="text/javascript">window.ic_ad_complete();<\/script>')})();
(function(){var u="dmd.ind.lj.unaudros//1/ns/728x90/script_dfp.js";var e=document.createElement('script');e.src='http://pixel.indieclick.com/annonymous/dfp/'+u;e.type="text/javascript";var s=document.getElementsByTagName('script')[0];s.parentNode.insertBefore(e,s);})();
//]]>
</script><noscript><a href="http://ad.doubleclick.net/jump/dmd.ind.lj.unaudros/;sz=728x90;ord=123456789?" target="_blank" ><img src="http://ad.doubleclick.net/ad/dmd.ind.lj.unaudros/;sz=728x90;ord=123456789?" border="0" alt="" /></a></noscript>
<!-- End IndieClick Ad Tag -->
<script type="text/javascript">
(function(){var e=document.createElement('script');e.type='text/javascript';e.async=true;e.id='ic_annonymous_pixel';
e.src='http://pixel.indieclick.com/annonymous/domain/livejournal.com/reach/script_ic.js';
var s=document.getElementsByTagName('script')[0];s.parentNode.insertBefore(e,s);})();
</script>
</div></div>
<div id="page"><div id="page-inner">
<div id="message-window" class="hidden"></div>
<div id="header"><div id="header-inner">
<div id="header-content"><div id="header-content-inner">
<div id="header-photo">
<div id="header-photo-inner"></div>
</div>
<div id="header-text"><ul class="nav">
<li class="item item-recent first"><a href="http://sweetshark.livejournal.com/" title="Recent Entries">Recent Entries</a></li><li class="item item-archive"><a href="http://sweetshark.livejournal.com/calendar" title="Archive">Archive</a></li><li class="item item-friends"><a href="http://sweetshark.livejournal.com/friends" title="Friends">Friends</a></li><li class="item item-userinfo"><a href="http://sweetshark.livejournal.com/profile" title="User Info">User Info</a></li><li class="item item-memories"><a href="http://www.livejournal.com/tools/memories.bml?user=sweetshark" title="Memories">Memories</a></li>
</ul>
<div id="header-name"><a href="http://sweetshark.livejournal.com">You can&#39;t take the sky from me.</a>
</div>
<div id="header-description">Stories about hacking LibreOffice for Ubuntu, fun and profit.</div></div>
</div><span id="header-content-decor" class="decor"><b></b><u></u><s></s><i></i></span></div><!-- End Header Content -->
</div><span id="header-decor" class="decor"><b></b><u></u><s></s><i></i></span></div><!-- End Header --><div id="content"><div id="content-inner">
<div id="alpha"><div id="alpha-inner">
<p class="prevnext"><a href="http://sweetshark.livejournal.com/11319.html">Previous Entry</a> | <a href="http://sweetshark.livejournal.com/11927.html">Next Entry</a></p>
<div id="asset-sweetshark-11564" class="post-asset asset">
<div class="asset-inner">
<div class="asset-header">
<div class="asset-header-inner">
<div class="asset-header-content">
<div class="asset-header-content-inner">
<h2 class="asset-name page-header2"><a href="http://sweetshark.livejournal.com/11564.html" class="subj-link" >open source -- limitless innovation</a></h2>
<div class="asset-meta asset-entry-date">
<ul class="asset-meta-list">
<li class="item"><span><abbr class="datetime">May. 5th, 2012 at 8:18 PM</abbr></span></li>
</ul>
</div>
</div></div></div>
</div> <!-- end asset-header -->
<div class="asset-content">
<div class="asset-body"><div class="user-icon"><img src="http://l-userpic.livejournal.com/117931338/37450766" alt="" height="100" width="100" /><br /></div><div style="text-align: right;"><i>The reasonable man adapts himself to the world;<br />the unreasonable one persists in trying to adapt the world to himself.<br />Therefore all progress depends on the unreasonable man.<br />-- G.B. Shaw</i></div><br /><span><span>Simon Phipps </span></span>recently published this amazing article titled &quot;<a href="http://www.infoworld.com/d/open-source-software/ubuntu-and-android-match-made-in-open-source-192287" rel="nofollow">Ubuntu and Android: A match made in open source</a>&quot; and notes in it that:<br /><br /><i>That would have been impossible with proprietary systems. It was Bill Joy who once pointed out it&#39;s impossible to hire all the smart people. Open source allows you to work and innovate with all the smart people.</i><br /><br />I think there is a deep wisdom in this quote. As Mark Shuttleworth <a href="https://bugs.launchpad.net/ubuntu/+bug/1" rel="nofollow">put it so eloquently in Bug 1</a>:<br /><br /><i>Non-free software is holding back innovation in the IT industry, restricting access to IT to a small part of the world&#39;s population and limiting the ability of software developers to reach their full potential, globally.</i><br /><br />Open source allows people having ideas that seem foolish to the rest of the world to prove themselves right and the rest of the world wrong to the greater benefit of all. Being told &quot;I am happy that there are crazy people like you on the LibreOffice project. Every sane person would have been sure that this cannot possibly work out.&quot; as a praise by Michael Stahl after putting out bibisect shows this is one of the core values of our project. Many others deserve the same praise for succeeding in doing things that were deemed impossible by others. Setting up the foundation in the way it was done was one of them, but there were many others.<br /><br />And despite -- or maybe because of -- starting from nothing less than two years ago and bootstrapping all the tiny and big things that are needed to run a software project with more than 10 million lines of code, this project also en passant enabled things like <a href="http://www.youtube.com/watch?v=wzc0uMXGFBY" rel="nofollow">this</a>:<br /><br /><div style="text-align: center;"><br /><iframe src="http://l.lj-toys.com/?auth_token=sessionless%3A1377975600%3Aembedcontent%3A37450766%264%26%26youtube%26wzc0uMXGFBY%3A5ae806ce2ca931576ba2323f9449d52e0e392b17&amp;source=youtube&amp;vid=wzc0uMXGFBY&amp;moduleid=4&amp;preview=&amp;journalid=37450766" width="640" height="390" frameborder="0" class="lj_embedcontent" name="embed_37450766_4"></iframe><br /><a href="http://www.youtube.com/watch?v=wzc0uMXGFBY" rel="nofollow"><img alt="" height="359" src="http://pics.livejournal.com/sweetshark/pic/0000eawe/s640x480" style="border-width: 0pt; border-style: solid;" width="640" /></a></div><br /><br />Despite initial skepticism, I by now firmly believe LibreOffice to be the project that will be able to change the world. Too many individuals in this project succeeded in doing the unreasonable, the foolish and the impossible. We started from nothing and we are still very hungry. We do not fear to innovate, just because someone thinks it would be unreasonable. We will: <a href="http://www.youtube.com/watch?v=UF8uR6Z6KLc" rel="nofollow">Stay hungry, Stay foolish.</a> And we are just getting started.<br /><br />P.S.: Lets each take this opportunity to look back down the road and thank somebody for doing something impossible, foolish or unreasonable. I, for one, thank Thorsten, Florian and Mike for their tireless and invisible work setting up the foundation: Thank you. And Norbert Thiebaud for doing all the hard work on the OneGit migration making bibisect possible in the first place: Again, Thank you! And <a href="https://launchpad.net/~penalvch" rel="nofollow">Christopher M. Penalver</a>, <a href="http://rrbd.wordpress.com/" rel="nofollow">Rainer Bielefeld</a>, <a href="http://www.nouenoff.nl/" rel="nofollow">Cor Nouws</a>, Sasha and many more for relentless bug wrangling: Again, Thank you! And <a href="https://launchpad.net/~ricotz" rel="nofollow">Rico Tzschichholz</a> for providing backports: Again, Thank you!<br><br />Please add more thanks in the comments, on twitter, g+ or whatever!</div>
<div class="asset-tags"><h4 class="asset-tags-header page-header-4">Tags:</h4>
<ul class="asset-tags-list">
<li class="item"><a rel="tag" href="http://sweetshark.livejournal.com/tag/libreoffice">libreoffice</a>, </li><li class="item"><a rel="tag" href="http://sweetshark.livejournal.com/tag/ubuntu">ubuntu</a></li>
</ul>
</div>
</div>
<div class="quickreply" id="ljqrtentrycomment" style="display: none;"></div><div class="asset-meta asset-entry-links">
<ul class="asset-meta-list">
<li class="asset-meta-comments item asset-meta-no-comments item-mem_add"><a href="http://www.livejournal.com/tools/memadd.bml?journal=sweetshark&amp;itemid=11564">Add to Memories</a></li><li class="asset-meta-comments item asset-meta-no-comments item-share"><a href="http://sweetshark.livejournal.com/11564.html?title=open%20source%20--%20limitless%20innovation&amp;hashtags=&amp;text=%20%20The%20reasonable%20man%20adapts%20himself%20to%20the%20world%3B%20the%20unreasonable%20one%20persists%20in%20trying%20to%20adapt%20the%20world%20to%20himself.%20Therefore%20all%20progress%20depends%20on%20the%20unreasonable%20man.%20--%20G.B.%20Shaw%20%20%20Simon%20Phipps%20recently%20published%20this%20amazing%20article%20titled%E2%80%A6" data-text="%20%20The%20reasonable%20man%20adapts%20himself%20to%20the%20world%3B%20the%20unreasonable%20one%20persists%20in%20trying%20to%20adapt%20the%20world%20to%20himself.%20Therefore%20all%20progress%20depends%20on%20the%20unreasonable%20man.%20--%20G.B.%20Shaw%20%20%20Simon%20Phipps%20recently%20published%20this%20amazing%20article%20titled%E2%80%A6" data-poster="sweetshark" data-ditemid="11564" class="js-lj-share" data-title="open%20source%20--%20limitless%20innovation" data-url="http%3A%2F%2Fsweetshark.livejournal.com%2F11564.html">Share</a></li><li class="asset-meta-comments item asset-meta-no-comments item-link"><a href="http://sweetshark.livejournal.com/11564.html">Link</a></li>
</ul>
</div>
<div class="asset-footer"></div>
</div>
</div>
<div id="thread_loader_img" style="display:none;" ><img src="http://l-stat.livejournal.com/img/threadExpander.gif"></div></div></div>
<div id="beta"><div id="beta-inner">
<div class="about-me-widget widget">
<div class="widget-inner">
<h3 class="widget-header">
<a href="http://sweetshark.livejournal.com/profile">
Profile</a>
</h3>
<div class="widget-content">
<div class="user-pic"><img src="http://l-userpic.livejournal.com/117931338/37450766" alt="" height="100" width="100" /></div>
<dl class="profile-list widget-list">
<dd class="profile-username item"><span class="ljuser i-ljuser " lj:user="sweetshark"><a href="http://sweetshark.livejournal.com/profile" ><img width="16" height="16" class="i-ljuser-userhead" src="http://l-stat.livejournal.com/img/userinfo.gif?v=17080?v=108.1" /></a><a href="http://sweetshark.livejournal.com/" class="i-ljuser-username" ><b>sweetshark</b></a></span></dd>
<dd class="profile-name item">Bjoern Michaelsen</dd>
<dd class="profile-name item"><a href="https://launchpad.net/~bjoern-michaelsen">Website</a></dd>
</dl>
</div>
</div>
</div>
<div class="calendar-widget widget">
<div class="widget-inner">
<h3 class="widget-header">
Latest Month</h3>
<div class="widget-content">
<table cellspacing="5" cellpadding="0" summary="Monthly calendar with links to each day\'s posts">
<tr><td colspan="7" align="center"><a href="http://sweetshark.livejournal.com/2012/07/">July 2012</a></td></tr>
<tr>
<th>S</th>
<th>M</th>
<th>T</th>
<th>W</th>
<th>T</th>
<th>F</th>
<th>S</th>
</tr>
<tr>
<td>1</td><td>2</td><td>3</td><td>4</td><td>5</td><td><a href="http://sweetshark.livejournal.com/2012/07/06/">6</a></td><td>7</td></tr>
<tr>
<td>8</td><td><a href="http://sweetshark.livejournal.com/2012/07/09/">9</a></td><td>10</td><td>11</td><td>12</td><td>13</td><td>14</td></tr>
<tr>
<td>15</td><td><a href="http://sweetshark.livejournal.com/2012/07/16/">16</a></td><td>17</td><td>18</td><td>19</td><td>20</td><td>21</td></tr>
<tr>
<td>22</td><td>23</td><td>24</td><td>25</td><td>26</td><td>27</td><td>28</td></tr>
<tr>
<td>29</td><td>30</td><td>31</td><td>&nbsp;</td><td>&nbsp;</td><td>&nbsp;</td><td>&nbsp;</td></tr>
</table>
<div class="widget-footer"><p><a href="http://sweetshark.livejournal.com/calendar" title="View All Archives">View All Archives</a></p></div>
</div>
</div>
</div>
<div class="categories-widget widget">
<div class="widget-inner">
<h3 class="widget-header">
Tags</h3>
<div class="widget-content">
<ul class="widget-list tagcloud">
<li class="item tag"><a href="http://sweetshark.livejournal.com/tag/libreoffice" style="font-size: 26px;" title="42 uses, public">libreoffice</a> </li><li class="item tag"><a href="http://sweetshark.livejournal.com/tag/openoffice" style="font-size: 10px;" title="1 use, public">openoffice</a> </li><li class="item tag"><a href="http://sweetshark.livejournal.com/tag/ubuntu" style="font-size: 24px;" title="39 uses, public">ubuntu</a> </li></ul>
<div class="widget-footer"><a href="http://sweetshark.livejournal.com/tag/" title="View my Tags page">View my Tags page</a></div>
</div>
</div>
</div>
<div class="powered-widget widget">
<div class="widget-inner">
<div class="widget-content">
Powered by <a href="http://www.livejournal.com/">LiveJournal.com</a></div>
</div>
</div>
<div class="designed-widget widget">
<div class="widget-inner">
<div class="widget-content">
<div class="asset-name-hover">Designed by <a href="http://lilia.vox.com/" target="_blank">Lilia Ahner</a></div>
</div>
</div>
</div>
</div></div>
</div></div>
<!-- End Content --><div id="footer"><div id="footer-inner">
<div id="ad-5linkunit"><div id="ad-5linkunit-inner">
<!--
## IndieClick Ad Tag
## publisher[Live Journal (unaudited by Google)] zone[Run of site] size[728x90]
-->
<script language="JavaScript" type="text/javascript">
//<![CDATA[
(function(){var config={
ic_domain :"dmd.ind.lj.unaudros"
,ic_size :"728x90"
,ic_zone :""
,ic_campaign:""
,ic_version :"2"
};var ic_config=(typeof window.top.ic_config=="undefined")?window.top.ic_config={}:window.top.ic_config;var in_iframe=(window!=window.top)?true:false;config.iframe=(in_iframe)?true:false;config.ord=(ic_config.ord)?ic_config.ord:ic_config.ord=Math.floor(Math.random()*10000000000000000);config.tile=(ic_config.tile)?++ic_config.tile:ic_config.tile=1;for(var key in ic_config){config[key]=ic_config[key]}var key_value=[];key_value[key_value.length]="sz="+config.ic_size;key_value[key_value.length]="tile="+config.tile;key_value[key_value.length]="ver="+config.ic_version;key_value[key_value.length]="frame="+config.iframe;key_value[key_value.length]="cm="+config.ic_campaign;for(var key in config.ic_keyValue){key_value[key_value.length]=key+"="+config.ic_keyValue[key]}if(!in_iframe&&!ic_config.dcopt){ic_config.dcopt=true;key_value[key_value.length]="dcopt=ist"}document.write('<span id="ic_'+config.ord+config.tile+'"></span>');var element=document.getElementById("ic_"+config.ord+config.tile);if(ic_config.asyncInit){ic_config.asyncInit(key_value,element)}var key_value=key_value.join(";");window.ic_ad_complete=function(){if(in_iframe){window.onload=function(){var elem=document.getElementById("icAdUnit");elem.style.position="absolute";window.frameElement.style.width=elem.offsetWidth+"px";window.frameElement.style.height=elem.offsetHeight+"px";window.frameElement.style.overflow="hidden";window.frameElement.style.border="0"}}};document.write('<script language="JavaScript" src="http://ad.doubleclick.net/adj/'+config.ic_domain+"/"+config.ic_zone+";"+key_value+";ord="+config.ord+'?" type="text/javascript"><\/script>');document.write('<script type="text/javascript">window.ic_ad_complete();<\/script>')})();
(function(){var u="dmd.ind.lj.unaudros//1/ns/728x90/script_dfp.js";var e=document.createElement('script');e.src='http://pixel.indieclick.com/annonymous/dfp/'+u;e.type="text/javascript";var s=document.getElementsByTagName('script')[0];s.parentNode.insertBefore(e,s);})();
//]]>
</script><noscript><a href="http://ad.doubleclick.net/jump/dmd.ind.lj.unaudros/;sz=728x90;ord=123456789?" target="_blank" ><img src="http://ad.doubleclick.net/ad/dmd.ind.lj.unaudros/;sz=728x90;ord=123456789?" border="0" alt="" /></a></noscript>
<!-- End IndieClick Ad Tag -->
<script type="text/javascript">
(function(){var e=document.createElement('script');e.type='text/javascript';e.async=true;e.id='ic_annonymous_pixel';
e.src='http://pixel.indieclick.com/annonymous/domain/livejournal.com/reach/script_ic.js';
var s=document.getElementsByTagName('script')[0];s.parentNode.insertBefore(e,s);})();
</script>
</div></div>
</div></div>
<!-- End Footer --></div></div>
<!-- End Page --></div></div>
<!-- End Container --><script type="text/javascript">LiveJournal.injectScript('https://apis.google.com/js/client:plusone.js',{text:"{lang: 'en'}"});</script>
<script type="text/javascript" src="http://www.livejournal.com/js/jquery/jquery.vkloader.js"></script>
<script type="text/javascript">if (jQuery.VK) { jQuery.VK.init({apiId: 2244371, onlyWidgets: true})} </script>
<script type="text/javascript">LiveJournal.injectScript('http://platform.twitter.com/widgets.js');</script><div id="fb-root"></div>
<script type="text/javascript">
window.fbAsyncInit = function() {
FB.init({appId: '214181831945836', xfbml: true});
};
LiveJournal.injectScript(document.location.protocol + '//connect.facebook.net/en_US/all.js', null, document.getElementById('fb-root'))
</script>
<script type="text/javascript">LiveJournal.injectScript('http://surfingbird.ru/share/share.min.js');</script> <div id='hello-world' style='text-align: left; font-size:0; line-height:0; height:0; overflow:hidden;'><!-- begin of Top100 code -->
<div id='ramblertop100counter'></div>
<script type="text/javascript">
var _top100q = _top100q || [];
_top100q.push(
['setAccount', '1132242'],
['sync', ''], // для синхронизации пользователей
['setCustomVar', 'xz', ''], // передать переменную
['setCustomVar', 'mn', ''], // передать еще одну переменную
['trackPageviewByLogo', document.getElementById('ramblertop100counter')]
);
(function(){
var top100 = document.createElement("script");
top100.type = "text/javascript";
top100.async = true;
top100.src = ("https:" == document.location.protocol ? "https:" : "http:") + "//st.top100.ru/top100/top100.js";
var s = document.getElementsByTagName("script")[0];
s.parentNode.insertBefore(top100, s);
})();
</script>
<noscript>
<img src="http://counter.rambler.ru/top100.cnt?1111412" alt="" width="1" height="1" border="0" />
</noscript>
<!-- end of Top100 code -->
<!-- begin Rambler Ad code -->
<div id="rambler_ad_counter_137797693639"></div>
<script>
;(function () {
LiveJournal.injectScript('//ad.rambler.ru/static/green2.min.js')
.done(function () {
_green.defineSlot('8990', [1, 1], 'rambler_ad_counter_137797693639');
_green.display('rambler_ad_counter_137797693639');
});
}());
</script>
<!-- end Rambler Ad code -->
<script type="text/javascript">
var _gaq = _gaq || [];
_gaq.push(['_setAccount', 'UA-24823174-1']);
_gaq.push(['_setDomainName', '.livejournal.com']);
_gaq.push(['_trackPageview']);
_gaq.push(['_trackPageLoadTime']);
(function() {
var ga = document.createElement('script'); ga.type = 'text/javascript'; ga.async = true;
ga.src = ('https:' == document.location.protocol ? 'https://ssl' : 'http://www') + '.google-analytics.com/ga.js';
var s = document.getElementsByTagName('script')[0]; s.parentNode.insertBefore(ga, s);
})();
</script>
<!-- tns-counter.ru -->
<script language="JavaScript" type="text/javascript">
var img = new Image();
img.src = 'http://www.tns-counter.ru/V13a***R>' + document.referrer.replace(/\*/g,'%2a') + '*sup_ru/ru/UTF-8/tmsec=lj_noncyr/' + Math.round(Math.random() * 1000000000);
</script>
<noscript>
<img src="http://www.tns-counter.ru/V13a****sup_ru/ru/UTF-8/tmsec=lj_noncyr/" width="1" height="1" alt="">
</noscript>
<!--/ tns-counter.ru -->
<!-- Begin comScore Tag 1.1111.15 -->
<script type="text/javascript">
// <![CDATA[
Site.page.comscore = {};
Site.page.comscore.url = 'http'+(document.location.href.charAt(4)=='s'?'s://sb':'://b')+'.scorecardresearch.com/b';
Site.page.comscore.query = '?c1=2&c2=7602110&sm_vd_cyrillic_status=nonCyr&sm_vd_view_own_journal=undef&sm_vd_id=undef&sm_vd_login_status=logout&sm_vd_account_level=undef&sm_vd_premium_package=undef&sm_vd_early_adopter=undef&sm_vd_log_in_service=undef&sm_vd_viewing_scheme=lanzelot&sm_vd_view_in_my_style=undef&sm_pd_visited_journal_account_type=personal&sm_pd_visited_journal_log_in_service=lj&sm_pd_ads_onpage=2&sm_pd_ad_eligible=yes&sm_pd_ad_1=demand_media_728x90&sm_pd_ad_2=demand_media_728x90&sm_pd_adult_content=none&sm_pd_comments_style=s2&sm_pd_error_pages=undef&sm_pd_visited_journal_name=sweetshark&sm_pd_page_type=journal&sm_pd_style_layout=Expressive&sm_pd_style_design=undef&sm_pd_style_system=s2&sm_pd_visited_journal_account_level=plus&sm_pd_early_adopter=no&sm_pd_visited_journal_premium_package=no&sm_pd_page_group=PostMainPage&category=undef&sm_pd_geotargeting=noncyr&sm_pd_rating_user_duplication=show&sm_pd_rating_friends=hide&sm_pd_rating_hidden_post=hide';
function udm_(a){var b="comScore=",c=document,d=c.cookie,e="",f="indexOf",g="substring",h="length",i=2048,j,k="&ns_",l="&",m,n,o,p,q=window,r=q.encodeURIComponent||escape;if(d[f](b)+1)for(o=0,n=d.split(";"),p=n[h];o<p;o++)m=n[o][f](b),m+1&&(e=l+unescape(n[o][g](m+b[h])));a+=k+"_t="+ +(new Date)+k+"c="+(c.characterSet||c.defaultCharset||"")+"&c8="+r(c.title)+e+"&c7="+r(c.URL)+"&c9="+r(c.referrer),a[h]>i&&a[f](l)>0&&(j=a[g](0,i-8).lastIndexOf(l),a=(a[g](0,j)+k+"cut="+r(a[g](j+1)))[g](0,i)),c.images?(m=new Image,q.ns_p||(ns_p=m),m.src=a):c.write("<","p","><",'img src="',a,'" height="1" width="1" alt="*"',"><","/p",">")}
udm_(Site.page.comscore.url + Site.page.comscore.query);
// ]]>
</script>
<noscript><p><img src="http://b.scorecardresearch.com/p?c1=2&c2=7602110&sm_vd_cyrillic_status=nonCyr&sm_vd_view_own_journal=undef&sm_vd_id=undef&sm_vd_login_status=logout&sm_vd_account_level=undef&sm_vd_premium_package=undef&sm_vd_early_adopter=undef&sm_vd_log_in_service=undef&sm_vd_viewing_scheme=lanzelot&sm_vd_view_in_my_style=undef&sm_pd_visited_journal_account_type=personal&sm_pd_visited_journal_log_in_service=lj&sm_pd_ads_onpage=2&sm_pd_ad_eligible=yes&sm_pd_ad_1=demand_media_728x90&sm_pd_ad_2=demand_media_728x90&sm_pd_adult_content=none&sm_pd_comments_style=s2&sm_pd_error_pages=undef&sm_pd_visited_journal_name=sweetshark&sm_pd_page_type=journal&sm_pd_style_layout=Expressive&sm_pd_style_design=undef&sm_pd_style_system=s2&sm_pd_visited_journal_account_level=plus&sm_pd_early_adopter=no&sm_pd_visited_journal_premium_package=no&sm_pd_page_group=PostMainPage&category=undef&sm_pd_geotargeting=noncyr&sm_pd_rating_user_duplication=show&sm_pd_rating_friends=hide&sm_pd_rating_hidden_post=hide" height="1" width="1" alt="*"></p></noscript>
<script type="text/javascript" language="JavaScript1.3" src="http://b.scorecardresearch.com/c2/7602110/cs.js"></script>
<!-- End comScore Tag --><!-- LiveJournal COUNTER -->
<img src="http://xc3.services.livejournal.com/ljcounter?d=srv:bil1-ws32,r:0,j:37450766,uri:%22%2F11564.html%22,vig:0,extra:Ajt0DgI7dA4AAC0s" alt="" />
<!-- /COUNTER -->
<!-- begin of yandex code -->
<script language=JavaScript>
<!--
var seed=Math.round(Math.random()*65535);
document.write("<img src=http://awaps.yandex.ru/0/9999/001001.gif?0-0-"+seed+"-0-&" + "timestamp=" + seed + "&awcode=6&" + "subsection=0 width=1 height=1 border=0>");
//-->
</script>
<noscript>
<img src=http://awaps.yandex.ru/0/9999/001001.gif?subsection=0 width=1 height=1 border=0>
</noscript>
<!-- end of yandex code -->
<!-- begin of Bogun code -->
<script>
(new Image).src = [
"https:" === document.location.protocol ? "https:" : "http:",
"//autocontext.begun.ru/analytics?target_id=0&counter_id=0&url=",
encodeURIComponent(document.URL),
"&ref=", encodeURIComponent(document.referrer),
"&rnd=", Math.random()
].join('');
</script>
<!-- end of Begun code -->
<!-- Begin ATI Basic Tracking Code -->
<script type="text/javascript">
LJ.define('LJ.ATI');
xtnv = document;
xtsd = 'http://logc400';
xtsite = '528851';
xtn2 = '2';
xtpage = 'journal::personal::sweetshark::sweetshark.livejournal.com/11564.html';
xtdi = '';
LJ.ATI.page = xtpage;
LJ.ATI.level2id = xtn2;
LJ.ATI.queryString = '';
LJ.ATI.params = {
ac: '0',
an: '',
x1: '[' + document.title + ']',
x2: '[undef]',
x3: '2',
x4: '0',
x5: '[lanzelot]',
x6: '0',
x7: '[lj]',
x8: '2',
x9: '1',
x10: '1',
x11: '1',
x12: '2',
x13: '2',
x14: '[Expressive]',
x15: '0',
x16: '2',
x17: '[undef]',
f1: '[]',
f2: '',
f3: '',
f4: '',
tag: '[]',
ptype: '2-2-1',
};
LJ.ATI.queryString = LiveJournal.constructUrl('', LJ.ATI.params).replace('?', '');
if (window.xtparam != null) {
window.xtparam += LJ.ATI.queryString;
} else{
window.xtparam = LJ.ATI.queryString + '&ati=PUB-[livejournal]-[demand_media-728x90]--PUB-[livejournal]-[demand_media-728x90]';
};
LiveJournal.injectScript('http://l-stat.livejournal.com/js/ads/xtcore.js');
</script>
<noscript>
<img width="1" height="1" alt="" src="http://logc400.xiti.com/hit.xiti?s=528851&s2=1&p=&di=&an=&ac=0&x2=undef&x3=2&x4=0&x5=lanzelot&x6=0&x7=lj&x8=2&x9=1&x10=1&x11=1&x12=2&x13=2&x14=Expressive&x15=0&x16=2&x17=undef&tag=&ptype=2-2-1&f1=&f2=&f3=&f4=" >
</noscript>
<!-- End ATI Basic Tracking Code -->
</div></body>
</html>

@ -0,0 +1,33 @@
# -*- coding: utf8 -*-
from __future__ import absolute_import
from __future__ import division, print_function, unicode_literals
from os.path import join, dirname
from readability.readable import Article
from ...compat import unittest
class TestArticle(unittest.TestCase):
"""
Test the scoring and parsing of the article from URL below:
http://sweetshark.livejournal.com/11564.html
"""
def setUp(self):
"""Load up the article for us"""
article_path = join(dirname(__file__), "article.html")
with open(article_path, "rb") as file:
self.document = Article(file.read(), "http://sweetshark.livejournal.com/11564.html")
def tearDown(self):
"""Drop the article"""
self.document = None
def test_parses(self):
"""Verify we can parse the document."""
self.assertIn('id="readabilityBody"', self.document.readable)
def test_content_after_video(self):
"""The div with the comments should be removed."""
self.assertIn('Stay hungry, Stay foolish', self.document.readable)
Loading…
Cancel
Save