mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-04-02 10:23:34 +02:00
Removed polyglots unicode_type usage
This commit is contained in:
@@ -37,7 +37,7 @@ from ebook_converter.ebooks.lrf.pylrs.pylrs import (
|
||||
RuledLine, Span, Sub, Sup, TextBlock
|
||||
)
|
||||
from ebook_converter.ptempfile import PersistentTemporaryFile
|
||||
from ebook_converter.polyglot.builtins import getcwd, itervalues, string_or_bytes, unicode_type
|
||||
from ebook_converter.polyglot.builtins import getcwd, itervalues, string_or_bytes
|
||||
from ebook_converter.polyglot.urllib import unquote
|
||||
|
||||
from PIL import Image as PILImage
|
||||
@@ -276,7 +276,7 @@ class HTMLConverter(object):
|
||||
update_css(npcss, self.override_pcss)
|
||||
|
||||
paths = [os.path.abspath(path) for path in paths]
|
||||
paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, unicode_type) else path for path in paths]
|
||||
paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, str) else path for path in paths]
|
||||
|
||||
while len(paths) > 0 and self.link_level <= self.link_levels:
|
||||
for path in paths:
|
||||
@@ -356,7 +356,7 @@ class HTMLConverter(object):
|
||||
os.makedirs(tdir)
|
||||
try:
|
||||
with open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb') as f:
|
||||
f.write(unicode_type(soup).encode('utf-8'))
|
||||
f.write(str(soup).encode('utf-8'))
|
||||
self.log.info(_('Written preprocessed HTML to ')+f.name)
|
||||
except:
|
||||
pass
|
||||
@@ -389,7 +389,7 @@ class HTMLConverter(object):
|
||||
self.log.info(_('\tConverting to BBeB...'))
|
||||
self.current_style = {}
|
||||
self.page_break_found = False
|
||||
if not isinstance(path, unicode_type):
|
||||
if not isinstance(path, str):
|
||||
path = path.decode(sys.getfilesystemencoding())
|
||||
self.target_prefix = path
|
||||
self.previous_text = '\n'
|
||||
@@ -399,7 +399,7 @@ class HTMLConverter(object):
|
||||
def parse_css(self, style):
|
||||
"""
|
||||
Parse the contents of a <style> tag or .css file.
|
||||
@param style: C{unicode_type(style)} should be the CSS to parse.
|
||||
@param style: C{str(style)} should be the CSS to parse.
|
||||
@return: A dictionary with one entry per selector where the key is the
|
||||
selector name and the value is a dictionary of properties
|
||||
"""
|
||||
@@ -587,7 +587,7 @@ class HTMLConverter(object):
|
||||
if isinstance(c, HTMLConverter.IGNORED_TAGS):
|
||||
continue
|
||||
if isinstance(c, bs4.NavigableString):
|
||||
text += unicode_type(c)
|
||||
text += str(c)
|
||||
elif isinstance(c, bs4.Tag):
|
||||
if c.name.lower() == 'img' and c.has_attr('alt'):
|
||||
alt_text += c['alt']
|
||||
@@ -642,7 +642,7 @@ class HTMLConverter(object):
|
||||
para, text, path, fragment = link['para'], link['text'], link['path'], link['fragment']
|
||||
ascii_text = text
|
||||
|
||||
if not isinstance(path, unicode_type):
|
||||
if not isinstance(path, str):
|
||||
path = path.decode(sys.getfilesystemencoding())
|
||||
if path in self.processed_files:
|
||||
if path+fragment in self.targets.keys():
|
||||
@@ -1085,7 +1085,7 @@ class HTMLConverter(object):
|
||||
|
||||
s1, s2 = get('margin'), get('padding')
|
||||
|
||||
bl = unicode_type(self.current_block.blockStyle.attrs['blockwidth'])+'px'
|
||||
bl = str(self.current_block.blockStyle.attrs['blockwidth'])+'px'
|
||||
|
||||
def set(default, one, two):
|
||||
fval = None
|
||||
@@ -1214,7 +1214,7 @@ class HTMLConverter(object):
|
||||
ans = 120
|
||||
if ans is not None:
|
||||
ans += int(self.font_delta * 20)
|
||||
ans = unicode_type(ans)
|
||||
ans = str(ans)
|
||||
return ans
|
||||
|
||||
family, weight, style, variant = 'serif', 'normal', 'normal', None
|
||||
@@ -1320,10 +1320,10 @@ class HTMLConverter(object):
|
||||
def text_properties(self, tag_css):
|
||||
indent = self.book.defaultTextStyle.attrs['parindent']
|
||||
if 'text-indent' in tag_css:
|
||||
bl = unicode_type(self.current_block.blockStyle.attrs['blockwidth'])+'px'
|
||||
bl = str(self.current_block.blockStyle.attrs['blockwidth'])+'px'
|
||||
if 'em' in tag_css['text-indent']:
|
||||
bl = '10pt'
|
||||
indent = self.unit_convert(unicode_type(tag_css['text-indent']), pts=True, base_length=bl)
|
||||
indent = self.unit_convert(str(tag_css['text-indent']), pts=True, base_length=bl)
|
||||
if not indent:
|
||||
indent = 0
|
||||
if indent > 0 and indent < 10 * self.minimum_indent:
|
||||
@@ -1518,11 +1518,11 @@ class HTMLConverter(object):
|
||||
elif not urllib.parse.urlparse(tag['src'])[0]:
|
||||
self.log.warn('Could not find image: '+tag['src'])
|
||||
else:
|
||||
self.log.debug("Failed to process: %s"%unicode_type(tag))
|
||||
self.log.debug("Failed to process: %s"%str(tag))
|
||||
elif tagname in ['style', 'link']:
|
||||
ncss, npcss = {}, {}
|
||||
if tagname == 'style':
|
||||
text = ''.join([unicode_type(i) for i in tag.findAll(text=True)])
|
||||
text = ''.join([str(i) for i in tag.findAll(text=True)])
|
||||
css, pcss = self.parse_css(text)
|
||||
ncss.update(css)
|
||||
npcss.update(pcss)
|
||||
@@ -1554,7 +1554,7 @@ class HTMLConverter(object):
|
||||
if tag.contents:
|
||||
c = tag.contents[0]
|
||||
if isinstance(c, bs4.NavigableString):
|
||||
c = unicode_type(c).replace('\r\n', '\n').replace('\r', '\n')
|
||||
c = str(c).replace('\r\n', '\n').replace('\r', '\n')
|
||||
if c.startswith('\n'):
|
||||
c = c[1:]
|
||||
tag.contents[0] = bs4.NavigableString(c)
|
||||
@@ -1612,7 +1612,7 @@ class HTMLConverter(object):
|
||||
in_ol = parent.name.lower() == 'ol'
|
||||
break
|
||||
parent = parent.parent
|
||||
prepend = unicode_type(self.list_counter)+'. ' if in_ol else '\u2022' + ' '
|
||||
prepend = str(self.list_counter)+'. ' if in_ol else '\u2022' + ' '
|
||||
self.current_para.append(Span(prepend))
|
||||
self.process_children(tag, tag_css, tag_pseudo_css)
|
||||
if in_ol:
|
||||
@@ -1655,7 +1655,7 @@ class HTMLConverter(object):
|
||||
|
||||
if (self.anchor_ids and tag.has_attr('id')) or (self.book_designer and tag.get('class') in ('title', ['title'])):
|
||||
if not tag.has_attr('id'):
|
||||
tag['id'] = __appname__+'_id_'+unicode_type(self.id_counter)
|
||||
tag['id'] = __appname__+'_id_'+str(self.id_counter)
|
||||
self.id_counter += 1
|
||||
|
||||
tkey = self.target_prefix+tag['id']
|
||||
@@ -1728,7 +1728,7 @@ class HTMLConverter(object):
|
||||
except Exception as err:
|
||||
self.log.warning(_('An error occurred while processing a table: %s. Ignoring table markup.')%repr(err))
|
||||
self.log.exception('')
|
||||
self.log.debug(_('Bad table:\n%s')%unicode_type(tag)[:300])
|
||||
self.log.debug(_('Bad table:\n%s')%str(tag)[:300])
|
||||
self.in_table = False
|
||||
self.process_children(tag, tag_css, tag_pseudo_css)
|
||||
finally:
|
||||
@@ -1824,9 +1824,9 @@ def process_file(path, options, logger):
|
||||
|
||||
for prop in ('author', 'author_sort', 'title', 'title_sort', 'publisher', 'freetext'):
|
||||
val = getattr(options, prop, None)
|
||||
if val and not isinstance(val, unicode_type):
|
||||
if val and not isinstance(val, str):
|
||||
soup = html5_parser(val)
|
||||
setattr(options, prop, unicode_type(soup))
|
||||
setattr(options, prop, str(soup))
|
||||
|
||||
title = (options.title, options.title_sort)
|
||||
author = (options.author, options.author_sort)
|
||||
@@ -1870,7 +1870,7 @@ def process_file(path, options, logger):
|
||||
options.force_page_break = fpb
|
||||
options.link_exclude = le
|
||||
options.page_break = pb
|
||||
if not isinstance(options.chapter_regex, unicode_type):
|
||||
if not isinstance(options.chapter_regex, str):
|
||||
options.chapter_regex = options.chapter_regex.decode(preferred_encoding)
|
||||
options.chapter_regex = re.compile(options.chapter_regex, re.IGNORECASE)
|
||||
fpba = options.force_page_break_attr.split(',')
|
||||
|
||||
Reference in New Issue
Block a user