1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-04-02 10:23:34 +02:00

Removed polyglots unicode_type usage

This commit is contained in:
2020-04-20 19:25:28 +02:00
parent ef7e2b10be
commit 128705f258
130 changed files with 657 additions and 716 deletions

View File

@@ -37,7 +37,7 @@ from ebook_converter.ebooks.lrf.pylrs.pylrs import (
RuledLine, Span, Sub, Sup, TextBlock
)
from ebook_converter.ptempfile import PersistentTemporaryFile
from ebook_converter.polyglot.builtins import getcwd, itervalues, string_or_bytes, unicode_type
from ebook_converter.polyglot.builtins import getcwd, itervalues, string_or_bytes
from ebook_converter.polyglot.urllib import unquote
from PIL import Image as PILImage
@@ -276,7 +276,7 @@ class HTMLConverter(object):
update_css(npcss, self.override_pcss)
paths = [os.path.abspath(path) for path in paths]
paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, unicode_type) else path for path in paths]
paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, str) else path for path in paths]
while len(paths) > 0 and self.link_level <= self.link_levels:
for path in paths:
@@ -356,7 +356,7 @@ class HTMLConverter(object):
os.makedirs(tdir)
try:
with open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb') as f:
f.write(unicode_type(soup).encode('utf-8'))
f.write(str(soup).encode('utf-8'))
self.log.info(_('Written preprocessed HTML to ')+f.name)
except:
pass
@@ -389,7 +389,7 @@ class HTMLConverter(object):
self.log.info(_('\tConverting to BBeB...'))
self.current_style = {}
self.page_break_found = False
if not isinstance(path, unicode_type):
if not isinstance(path, str):
path = path.decode(sys.getfilesystemencoding())
self.target_prefix = path
self.previous_text = '\n'
@@ -399,7 +399,7 @@ class HTMLConverter(object):
def parse_css(self, style):
"""
Parse the contents of a <style> tag or .css file.
@param style: C{unicode_type(style)} should be the CSS to parse.
@param style: C{str(style)} should be the CSS to parse.
@return: A dictionary with one entry per selector where the key is the
selector name and the value is a dictionary of properties
"""
@@ -587,7 +587,7 @@ class HTMLConverter(object):
if isinstance(c, HTMLConverter.IGNORED_TAGS):
continue
if isinstance(c, bs4.NavigableString):
text += unicode_type(c)
text += str(c)
elif isinstance(c, bs4.Tag):
if c.name.lower() == 'img' and c.has_attr('alt'):
alt_text += c['alt']
@@ -642,7 +642,7 @@ class HTMLConverter(object):
para, text, path, fragment = link['para'], link['text'], link['path'], link['fragment']
ascii_text = text
if not isinstance(path, unicode_type):
if not isinstance(path, str):
path = path.decode(sys.getfilesystemencoding())
if path in self.processed_files:
if path+fragment in self.targets.keys():
@@ -1085,7 +1085,7 @@ class HTMLConverter(object):
s1, s2 = get('margin'), get('padding')
bl = unicode_type(self.current_block.blockStyle.attrs['blockwidth'])+'px'
bl = str(self.current_block.blockStyle.attrs['blockwidth'])+'px'
def set(default, one, two):
fval = None
@@ -1214,7 +1214,7 @@ class HTMLConverter(object):
ans = 120
if ans is not None:
ans += int(self.font_delta * 20)
ans = unicode_type(ans)
ans = str(ans)
return ans
family, weight, style, variant = 'serif', 'normal', 'normal', None
@@ -1320,10 +1320,10 @@ class HTMLConverter(object):
def text_properties(self, tag_css):
indent = self.book.defaultTextStyle.attrs['parindent']
if 'text-indent' in tag_css:
bl = unicode_type(self.current_block.blockStyle.attrs['blockwidth'])+'px'
bl = str(self.current_block.blockStyle.attrs['blockwidth'])+'px'
if 'em' in tag_css['text-indent']:
bl = '10pt'
indent = self.unit_convert(unicode_type(tag_css['text-indent']), pts=True, base_length=bl)
indent = self.unit_convert(str(tag_css['text-indent']), pts=True, base_length=bl)
if not indent:
indent = 0
if indent > 0 and indent < 10 * self.minimum_indent:
@@ -1518,11 +1518,11 @@ class HTMLConverter(object):
elif not urllib.parse.urlparse(tag['src'])[0]:
self.log.warn('Could not find image: '+tag['src'])
else:
self.log.debug("Failed to process: %s"%unicode_type(tag))
self.log.debug("Failed to process: %s"%str(tag))
elif tagname in ['style', 'link']:
ncss, npcss = {}, {}
if tagname == 'style':
text = ''.join([unicode_type(i) for i in tag.findAll(text=True)])
text = ''.join([str(i) for i in tag.findAll(text=True)])
css, pcss = self.parse_css(text)
ncss.update(css)
npcss.update(pcss)
@@ -1554,7 +1554,7 @@ class HTMLConverter(object):
if tag.contents:
c = tag.contents[0]
if isinstance(c, bs4.NavigableString):
c = unicode_type(c).replace('\r\n', '\n').replace('\r', '\n')
c = str(c).replace('\r\n', '\n').replace('\r', '\n')
if c.startswith('\n'):
c = c[1:]
tag.contents[0] = bs4.NavigableString(c)
@@ -1612,7 +1612,7 @@ class HTMLConverter(object):
in_ol = parent.name.lower() == 'ol'
break
parent = parent.parent
prepend = unicode_type(self.list_counter)+'. ' if in_ol else '\u2022' + ' '
prepend = str(self.list_counter)+'. ' if in_ol else '\u2022' + ' '
self.current_para.append(Span(prepend))
self.process_children(tag, tag_css, tag_pseudo_css)
if in_ol:
@@ -1655,7 +1655,7 @@ class HTMLConverter(object):
if (self.anchor_ids and tag.has_attr('id')) or (self.book_designer and tag.get('class') in ('title', ['title'])):
if not tag.has_attr('id'):
tag['id'] = __appname__+'_id_'+unicode_type(self.id_counter)
tag['id'] = __appname__+'_id_'+str(self.id_counter)
self.id_counter += 1
tkey = self.target_prefix+tag['id']
@@ -1728,7 +1728,7 @@ class HTMLConverter(object):
except Exception as err:
self.log.warning(_('An error occurred while processing a table: %s. Ignoring table markup.')%repr(err))
self.log.exception('')
self.log.debug(_('Bad table:\n%s')%unicode_type(tag)[:300])
self.log.debug(_('Bad table:\n%s')%str(tag)[:300])
self.in_table = False
self.process_children(tag, tag_css, tag_pseudo_css)
finally:
@@ -1824,9 +1824,9 @@ def process_file(path, options, logger):
for prop in ('author', 'author_sort', 'title', 'title_sort', 'publisher', 'freetext'):
val = getattr(options, prop, None)
if val and not isinstance(val, unicode_type):
if val and not isinstance(val, str):
soup = html5_parser(val)
setattr(options, prop, unicode_type(soup))
setattr(options, prop, str(soup))
title = (options.title, options.title_sort)
author = (options.author, options.author_sort)
@@ -1870,7 +1870,7 @@ def process_file(path, options, logger):
options.force_page_break = fpb
options.link_exclude = le
options.page_break = pb
if not isinstance(options.chapter_regex, unicode_type):
if not isinstance(options.chapter_regex, str):
options.chapter_regex = options.chapter_regex.decode(preferred_encoding)
options.chapter_regex = re.compile(options.chapter_regex, re.IGNORECASE)
fpba = options.force_page_break_attr.split(',')