Add test for title

pull/23/head
Richard Harding 11 years ago
parent 19d3ee634c
commit 17270db5f0

@ -8,10 +8,21 @@ import re
import logging
import charade
from lxml.etree import tostring, tounicode, XMLSyntaxError
from lxml.html import document_fromstring, HTMLParser
from ._compat import unicode, to_bytes, to_unicode, unicode_compatible
from lxml.etree import (
tounicode,
XMLSyntaxError,
)
from lxml.html import (
document_fromstring,
HTMLParser,
)
from ._compat import (
to_bytes,
to_unicode,
unicode,
unicode_compatible,
)
from .utils import cached_property

@ -29,6 +29,14 @@ class TestReadableDocument(unittest.TestCase):
# We get back the document as a div tag currently by default.
self.assertEqual(doc.readable_dom.tag, 'div')
def test_title_loads(self):
"""Verify we can fetch the title of the parsed article"""
doc = Article(load_snippet('document_min.html'))
self.assertEqual(
doc._original_document.title,
'Min Document Title'
)
def test_doc_no_scripts_styles(self):
"""Step #1 remove all scripts from the document"""
doc = Article(load_snippet('document_scripts.html'))
@ -129,18 +137,26 @@ class TestCleaning(unittest.TestCase):
'</div></body></html>')
test_doc2 = document_fromstring(test_html2)
self.assertEqual(
tounicode(leaf_div_elements_into_paragraphs(test_doc2)),
to_unicode('<html><body><p>simple<a href="">link</a></p></body></html>')
tounicode(
leaf_div_elements_into_paragraphs(test_doc2)),
to_unicode(
'<html><body><p>simple<a href="">link</a></p></body></html>')
)
def test_dont_transform_div_with_div(self):
"""Verify that only child <div> element is replaced by <p>."""
dom = document_fromstring(
"<html><body><div>text<div>child</div>aftertext</div></body></html>")
"<html><body><div>text<div>child</div>"
"aftertext</div></body></html>"
)
self.assertEqual(
tounicode(leaf_div_elements_into_paragraphs(dom)),
to_unicode("<html><body><div>text<p>child</p>aftertext</div></body></html>")
tounicode(
leaf_div_elements_into_paragraphs(dom)),
to_unicode(
"<html><body><div>text<p>child</p>"
"aftertext</div></body></html>"
)
)
def test_bad_links(self):

Loading…
Cancel
Save