Fix tests to pass again

12 years ago · 60ab4a96b0
parent 8f28e7c947
commit 60ab4a96b0
2 changed files with 12 additions and 12 deletions
--- a/src/breadability/readable.py
+++ b/src/breadability/readable.py
@ -69,7 +69,12 @@ def transform_misused_divs_into_paragraphs(doc):


 def process(doc):
-    """Process this doc to make it readable."""
+    """Process this doc to make it readable.
+
+    Here's we're going to remove unlikely nodes, find scores on the rest, and
+    clean up and return the final best match.
+
+    """
    unlikely = []
    scorable_node_tags = ['p', 'td', 'pre']
    nodes_to_score = []
@ -92,14 +97,8 @@ def process(doc):

    # process our clean up instructions
    [n.drop_tree() for n in unlikely]
+    return doc

-# def transform_misused_divs_into_paragraphs(self):
-#     for elem in self.html.iter():
-# if elem.tag.lower() == "div":
-#     # transform <div>s that do not contain other block elements into <p>s
-#     if not REGEXES['divToPElementsRe'].search(unicode(''.join(map(tostring, list(elem))))):
-#         self.debug("Altering div(#%s.%s) to p" % (elem.get('id', ''), elem.get('class', '')))
-#         elem.tag = "p"

 class Article(object):
    """Parsed readable object"""
@ -114,6 +113,7 @@ class Article(object):
        doc = build_base_document(doc)
        doc = drop_tag(doc, 'script', 'link', 'style', 'noscript')
        doc = transform_misused_divs_into_paragraphs(doc)
+        doc = process(doc)
        return doc


--- a/src/breadability/tests/test_snippets/test_readable_unlikely.html
+++ b/src/breadability/tests/test_snippets/test_readable_unlikely.html
@ -16,10 +16,10 @@
        <div id="header">Gone</div>

        <!-- These have bad and good terms so should stay -->
-        <p id="mainfoot">Gone</div>
-        <p id="harticleeader">Gone</div>
-        <p class="article header">Gone</div>
-        <p class="column header">Gone</div>
+        <div id="mainfoot">Gone</div>
+        <div id="harticleeader">Gone</div>
+        <div class="article header">Gone</div>
+        <div class="column header">Gone</div>

        <!-- And this will stick around for final -->
        <div>Final content.</div>