1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-04-04 11:43:33 +02:00

Removed overwritten builtins from mosto of the files.

Just started the process of cleaning up the code base.
This commit is contained in:
2020-04-19 19:06:09 +02:00
parent 571da747b3
commit b66cbd2c1e
95 changed files with 167 additions and 269 deletions

View File

@@ -17,15 +17,15 @@ from functools import partial
from itertools import chain
from math import ceil, floor
import bs4
from ebook_converter import (
__appname__, entity_to_unicode, fit_image, force_unicode, preferred_encoding
)
from ebook_converter.constants import filesystem_encoding
from ebook_converter.devices.interface import DevicePlugin as Device
from ebook_converter.ebooks import ConversionError
from ebook_converter.ebooks.BeautifulSoup import (
BeautifulSoup, Comment, Declaration, NavigableString, ProcessingInstruction, Tag
)
from ebook_converter.ebooks.BeautifulSoup import html5_parser
from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.ebooks.lrf import Book
from ebook_converter.ebooks.lrf.html.color_map import lrs_color
@@ -86,7 +86,7 @@ def tag_regex(tagname):
class HTMLConverter(object):
SELECTOR_PAT = re.compile(r"([A-Za-z0-9\-\_\:\.]+[A-Za-z0-9\-\_\:\.\s\,]*)\s*\{([^\}]*)\}")
PAGE_BREAK_PAT = re.compile(r'page-break-(?:after|before)\s*:\s*(\w+)', re.IGNORECASE)
IGNORED_TAGS = (Comment, Declaration, ProcessingInstruction)
IGNORED_TAGS = (bs4.Comment, bs4.Declaration, bs4.ProcessingInstruction)
MARKUP_MASSAGE = [
# Close <a /> tags
@@ -337,7 +337,7 @@ class HTMLConverter(object):
raw = xml_to_unicode(raw, replace_entities=True)[0]
for pat, repl in nmassage:
raw = pat.sub(repl, raw)
soup = BeautifulSoup(raw)
soup = html5_parser(raw)
if not self.baen and self.is_baen(soup):
self.baen = True
self.log.info(_('\tBaen file detected. Re-parsing...'))
@@ -585,9 +585,9 @@ class HTMLConverter(object):
break
if isinstance(c, HTMLConverter.IGNORED_TAGS):
continue
if isinstance(c, NavigableString):
if isinstance(c, bs4.NavigableString):
text += unicode_type(c)
elif isinstance(c, Tag):
elif isinstance(c, bs4.Tag):
if c.name.lower() == 'img' and c.has_attr('alt'):
alt_text += c['alt']
continue
@@ -720,9 +720,9 @@ class HTMLConverter(object):
for c in copy.copy(ptag.contents):
if isinstance(c, HTMLConverter.IGNORED_TAGS):
continue
elif isinstance(c, Tag):
elif isinstance(c, bs4.Tag):
self.parse_tag(c, pcss)
elif isinstance(c, NavigableString):
elif isinstance(c, bs4.NavigableString):
self.add_text(c, pcss, ppcss)
if not self.in_table:
try:
@@ -1551,11 +1551,11 @@ class HTMLConverter(object):
if tag.contents:
c = tag.contents[0]
if isinstance(c, NavigableString):
if isinstance(c, bs4.NavigableString):
c = unicode_type(c).replace('\r\n', '\n').replace('\r', '\n')
if c.startswith('\n'):
c = c[1:]
tag.contents[0] = NavigableString(c)
tag.contents[0] = bs4.NavigableString(c)
tag.contents[0].setup(tag)
self.process_children(tag, tag_css, tag_pseudo_css)
self.end_current_block()
@@ -1823,7 +1823,7 @@ def process_file(path, options, logger):
for prop in ('author', 'author_sort', 'title', 'title_sort', 'publisher', 'freetext'):
val = getattr(options, prop, None)
if val and not isinstance(val, unicode_type):
soup = BeautifulSoup(val)
soup = html5_parser(val)
setattr(options, prop, unicode_type(soup))
title = (options.title, options.title_sort)

View File

@@ -4,7 +4,7 @@ from ebook_converter.ebooks.lrf.fonts import get_font
from ebook_converter.ebooks.lrf.pylrs.pylrs import TextBlock, Text, CR, Span, \
CharButton, Plot, Paragraph, \
LrsTextTag
from ebook_converter.polyglot.builtins import string_or_bytes, range, native_string_type
from ebook_converter.polyglot.builtins import string_or_bytes, native_string_type
__license__ = 'GPL v3'