Purification

pull/21/head
Mišo Belica 11 years ago
parent 101950478e
commit c69cd4b2ba

@ -5,28 +5,24 @@ from __future__ import absolute_import
import re
import logging
from lxml.etree import tounicode
from lxml.etree import tostring
from lxml.html.clean import Cleaner
from lxml.html import fragment_fromstring
from lxml.html import fromstring
from operator import attrgetter
from pprint import PrettyPrinter
from lxml.html.clean import Cleaner
from lxml.etree import tounicode, tostring
from lxml.html import fragment_fromstring, fromstring
from .document import OriginalDocument
from .scoring import score_candidates
from .scoring import get_link_density
from .scoring import get_class_weight
from .scoring import is_unlikely_node
from .scoring import (score_candidates, get_link_density, get_class_weight,
is_unlikely_node)
from .utils import cached_property
html_cleaner = Cleaner(scripts=True, javascript=True, comments=True,
style=True, links=True, meta=False, add_nofollow=False,
page_structure=False, processing_instructions=True,
embedded=False, frames=False, forms=False,
annoying_tags=False, remove_tags=None,
remove_unknown_tags=False, safe_attrs_only=False)
style=True, links=True, meta=False, add_nofollow=False,
page_structure=False, processing_instructions=True,
embedded=False, frames=False, forms=False,
annoying_tags=False, remove_tags=None,
remove_unknown_tags=False, safe_attrs_only=False)
BASE_DOC = """

Loading…
Cancel
Save