1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-04-17 11:43:30 +02:00

Removed polyglots unicode_type usage

This commit is contained in:
2020-04-20 19:25:28 +02:00
parent ef7e2b10be
commit 128705f258
130 changed files with 657 additions and 716 deletions

View File

@@ -37,7 +37,7 @@ from ebook_converter.ebooks.lrf.pylrs.pylrs import (
RuledLine, Span, Sub, Sup, TextBlock
)
from ebook_converter.ptempfile import PersistentTemporaryFile
from ebook_converter.polyglot.builtins import getcwd, itervalues, string_or_bytes, unicode_type
from ebook_converter.polyglot.builtins import getcwd, itervalues, string_or_bytes
from ebook_converter.polyglot.urllib import unquote
from PIL import Image as PILImage
@@ -276,7 +276,7 @@ class HTMLConverter(object):
update_css(npcss, self.override_pcss)
paths = [os.path.abspath(path) for path in paths]
paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, unicode_type) else path for path in paths]
paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, str) else path for path in paths]
while len(paths) > 0 and self.link_level <= self.link_levels:
for path in paths:
@@ -356,7 +356,7 @@ class HTMLConverter(object):
os.makedirs(tdir)
try:
with open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb') as f:
f.write(unicode_type(soup).encode('utf-8'))
f.write(str(soup).encode('utf-8'))
self.log.info(_('Written preprocessed HTML to ')+f.name)
except:
pass
@@ -389,7 +389,7 @@ class HTMLConverter(object):
self.log.info(_('\tConverting to BBeB...'))
self.current_style = {}
self.page_break_found = False
if not isinstance(path, unicode_type):
if not isinstance(path, str):
path = path.decode(sys.getfilesystemencoding())
self.target_prefix = path
self.previous_text = '\n'
@@ -399,7 +399,7 @@ class HTMLConverter(object):
def parse_css(self, style):
"""
Parse the contents of a <style> tag or .css file.
@param style: C{unicode_type(style)} should be the CSS to parse.
@param style: C{str(style)} should be the CSS to parse.
@return: A dictionary with one entry per selector where the key is the
selector name and the value is a dictionary of properties
"""
@@ -587,7 +587,7 @@ class HTMLConverter(object):
if isinstance(c, HTMLConverter.IGNORED_TAGS):
continue
if isinstance(c, bs4.NavigableString):
text += unicode_type(c)
text += str(c)
elif isinstance(c, bs4.Tag):
if c.name.lower() == 'img' and c.has_attr('alt'):
alt_text += c['alt']
@@ -642,7 +642,7 @@ class HTMLConverter(object):
para, text, path, fragment = link['para'], link['text'], link['path'], link['fragment']
ascii_text = text
if not isinstance(path, unicode_type):
if not isinstance(path, str):
path = path.decode(sys.getfilesystemencoding())
if path in self.processed_files:
if path+fragment in self.targets.keys():
@@ -1085,7 +1085,7 @@ class HTMLConverter(object):
s1, s2 = get('margin'), get('padding')
bl = unicode_type(self.current_block.blockStyle.attrs['blockwidth'])+'px'
bl = str(self.current_block.blockStyle.attrs['blockwidth'])+'px'
def set(default, one, two):
fval = None
@@ -1214,7 +1214,7 @@ class HTMLConverter(object):
ans = 120
if ans is not None:
ans += int(self.font_delta * 20)
ans = unicode_type(ans)
ans = str(ans)
return ans
family, weight, style, variant = 'serif', 'normal', 'normal', None
@@ -1320,10 +1320,10 @@ class HTMLConverter(object):
def text_properties(self, tag_css):
indent = self.book.defaultTextStyle.attrs['parindent']
if 'text-indent' in tag_css:
bl = unicode_type(self.current_block.blockStyle.attrs['blockwidth'])+'px'
bl = str(self.current_block.blockStyle.attrs['blockwidth'])+'px'
if 'em' in tag_css['text-indent']:
bl = '10pt'
indent = self.unit_convert(unicode_type(tag_css['text-indent']), pts=True, base_length=bl)
indent = self.unit_convert(str(tag_css['text-indent']), pts=True, base_length=bl)
if not indent:
indent = 0
if indent > 0 and indent < 10 * self.minimum_indent:
@@ -1518,11 +1518,11 @@ class HTMLConverter(object):
elif not urllib.parse.urlparse(tag['src'])[0]:
self.log.warn('Could not find image: '+tag['src'])
else:
self.log.debug("Failed to process: %s"%unicode_type(tag))
self.log.debug("Failed to process: %s"%str(tag))
elif tagname in ['style', 'link']:
ncss, npcss = {}, {}
if tagname == 'style':
text = ''.join([unicode_type(i) for i in tag.findAll(text=True)])
text = ''.join([str(i) for i in tag.findAll(text=True)])
css, pcss = self.parse_css(text)
ncss.update(css)
npcss.update(pcss)
@@ -1554,7 +1554,7 @@ class HTMLConverter(object):
if tag.contents:
c = tag.contents[0]
if isinstance(c, bs4.NavigableString):
c = unicode_type(c).replace('\r\n', '\n').replace('\r', '\n')
c = str(c).replace('\r\n', '\n').replace('\r', '\n')
if c.startswith('\n'):
c = c[1:]
tag.contents[0] = bs4.NavigableString(c)
@@ -1612,7 +1612,7 @@ class HTMLConverter(object):
in_ol = parent.name.lower() == 'ol'
break
parent = parent.parent
prepend = unicode_type(self.list_counter)+'. ' if in_ol else '\u2022' + ' '
prepend = str(self.list_counter)+'. ' if in_ol else '\u2022' + ' '
self.current_para.append(Span(prepend))
self.process_children(tag, tag_css, tag_pseudo_css)
if in_ol:
@@ -1655,7 +1655,7 @@ class HTMLConverter(object):
if (self.anchor_ids and tag.has_attr('id')) or (self.book_designer and tag.get('class') in ('title', ['title'])):
if not tag.has_attr('id'):
tag['id'] = __appname__+'_id_'+unicode_type(self.id_counter)
tag['id'] = __appname__+'_id_'+str(self.id_counter)
self.id_counter += 1
tkey = self.target_prefix+tag['id']
@@ -1728,7 +1728,7 @@ class HTMLConverter(object):
except Exception as err:
self.log.warning(_('An error occurred while processing a table: %s. Ignoring table markup.')%repr(err))
self.log.exception('')
self.log.debug(_('Bad table:\n%s')%unicode_type(tag)[:300])
self.log.debug(_('Bad table:\n%s')%str(tag)[:300])
self.in_table = False
self.process_children(tag, tag_css, tag_pseudo_css)
finally:
@@ -1824,9 +1824,9 @@ def process_file(path, options, logger):
for prop in ('author', 'author_sort', 'title', 'title_sort', 'publisher', 'freetext'):
val = getattr(options, prop, None)
if val and not isinstance(val, unicode_type):
if val and not isinstance(val, str):
soup = html5_parser(val)
setattr(options, prop, unicode_type(soup))
setattr(options, prop, str(soup))
title = (options.title, options.title_sort)
author = (options.author, options.author_sort)
@@ -1870,7 +1870,7 @@ def process_file(path, options, logger):
options.force_page_break = fpb
options.link_exclude = le
options.page_break = pb
if not isinstance(options.chapter_regex, unicode_type):
if not isinstance(options.chapter_regex, str):
options.chapter_regex = options.chapter_regex.decode(preferred_encoding)
options.chapter_regex = re.compile(options.chapter_regex, re.IGNORECASE)
fpba = options.force_page_break_attr.split(',')

View File

@@ -1,7 +1,7 @@
"""
elements.py -- replacements and helpers for ElementTree
"""
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes
from ebook_converter.polyglot.builtins import string_or_bytes
class ElementWriter(object):
@@ -24,9 +24,9 @@ class ElementWriter(object):
return text
def _writeAttribute(self, f, name, value):
f.write(' %s="' % unicode_type(name))
f.write(' %s="' % str(name))
if not isinstance(value, string_or_bytes):
value = unicode_type(value)
value = str(value)
value = self._encodeCdata(value)
value = value.replace('"', '&quot;')
f.write(value)
@@ -37,7 +37,7 @@ class ElementWriter(object):
f.write(text)
def _write(self, f, e):
f.write('<' + unicode_type(e.tag))
f.write('<' + str(e.tag))
attributes = e.items()
attributes.sort()

View File

@@ -9,7 +9,7 @@ import codecs
import os
from .pylrfopt import tagListOptimizer
from ebook_converter.polyglot.builtins import iteritems, string_or_bytes, unicode_type
from ebook_converter.polyglot.builtins import iteritems, string_or_bytes
PYLRF_VERSION = "1.0"
@@ -82,7 +82,7 @@ def writeWord(f, word):
if int(word) > 65535:
raise LrfError('Cannot encode a number greater than 65535 in a word.')
if int(word) < 0:
raise LrfError('Cannot encode a number < 0 in a word: '+unicode_type(word))
raise LrfError('Cannot encode a number < 0 in a word: '+str(word))
f.write(struct.pack("<H", int(word)))
@@ -508,7 +508,7 @@ class LrfObject(object):
raise LrfError("object name %s not recognized" % name)
def __str__(self):
return 'LRFObject: ' + self.name + ", " + unicode_type(self.objId)
return 'LRFObject: ' + self.name + ", " + str(self.objId)
def appendLrfTag(self, tag):
self.tags.append(tag)

View File

@@ -51,7 +51,7 @@ DEFAULT_GENREADING = "fs" # default is yes to both lrf and lrs
from ebook_converter import __appname__, __version__
from ebook_converter import entity_to_unicode
from ebook_converter.polyglot.builtins import string_or_bytes, unicode_type, iteritems, native_string_type
from ebook_converter.polyglot.builtins import string_or_bytes, iteritems, native_string_type
class LrsError(Exception):
@@ -226,7 +226,7 @@ class LrsAttributes(object):
raise LrsError("%s does not support setting %s" %
(self.__class__.__name__, name))
if isinstance(value, int):
value = unicode_type(value)
value = str(value)
self.attrs[name] = value
@@ -330,13 +330,13 @@ class LrsObject(object):
def lrsObjectElement(self, name, objlabel="objlabel", labelName=None,
labelDecorate=True, **settings):
element = Element(name)
element.attrib["objid"] = unicode_type(self.objId)
element.attrib["objid"] = str(self.objId)
if labelName is None:
labelName = name
if labelDecorate:
label = "%s.%d" % (labelName, self.objId)
else:
label = unicode_type(self.objId)
label = str(self.objId)
element.attrib[objlabel] = label
element.attrib.update(settings)
return element
@@ -562,7 +562,7 @@ class Book(Delegator):
factor = base_font_size / old_base_font_size
def rescale(old):
return unicode_type(int(int(old) * factor))
return str(int(int(old) * factor))
text_blocks = list(main.get_all(lambda x: isinstance(x, TextBlock)))
for tb in text_blocks:
@@ -693,7 +693,7 @@ class TableOfContents(object):
def addTocEntry(self, tocLabel, textBlock):
if not isinstance(textBlock, (Canvas, TextBlock, ImageBlock, RuledLine)):
raise LrsError("TOC destination must be a Canvas, TextBlock, ImageBlock or RuledLine"+
" not a " + unicode_type(type(textBlock)))
" not a " + str(type(textBlock)))
if textBlock.parent is None:
raise LrsError("TOC text block must be already appended to a page")
@@ -743,8 +743,8 @@ class TocLabel(object):
def toElement(self, se):
return ElementWithText("TocLabel", self.label,
refobj=unicode_type(self.textBlock.objId),
refpage=unicode_type(self.textBlock.parent.objId))
refobj=str(self.textBlock.objId),
refpage=str(self.textBlock.parent.objId))
class BookInfo(object):
@@ -805,7 +805,7 @@ class DocInfo(object):
self.thumbnail = None
self.language = "en"
self.creator = None
self.creationdate = unicode_type(isoformat(date.today()))
self.creationdate = str(isoformat(date.today()))
self.producer = "%s v%s"%(__appname__, __version__)
self.numberofpages = "0"
@@ -829,7 +829,7 @@ class DocInfo(object):
docInfo.append(ElementWithText("Creator", self.creator))
docInfo.append(ElementWithText("CreationDate", self.creationdate))
docInfo.append(ElementWithText("Producer", self.producer))
docInfo.append(ElementWithText("SumPage", unicode_type(self.numberofpages)))
docInfo.append(ElementWithText("SumPage", str(self.numberofpages)))
return docInfo
@@ -1091,7 +1091,7 @@ class LrsStyle(LrsObject, LrsAttributes, LrsContainer):
self.elementName = elementName
self.objectsAppended = False
# self.label = "%s.%d" % (elementName, self.objId)
# self.label = unicode_type(self.objId)
# self.label = str(self.objId)
# self.parent = None
def update(self, settings):
@@ -1101,11 +1101,11 @@ class LrsStyle(LrsObject, LrsAttributes, LrsContainer):
self.attrs[name] = value
def getLabel(self):
return unicode_type(self.objId)
return str(self.objId)
def toElement(self, se):
element = Element(self.elementName, stylelabel=self.getLabel(),
objid=unicode_type(self.objId))
objid=str(self.objId))
element.attrib.update(self.attrs)
return element
@@ -1236,14 +1236,14 @@ class PageStyle(LrsStyle):
del settings[evenbase]
if evenObj.parent is None:
parent.append(evenObj)
settings[evenbase + "id"] = unicode_type(evenObj.objId)
settings[evenbase + "id"] = str(evenObj.objId)
if oddbase in settings:
oddObj = settings[oddbase]
del settings[oddbase]
if oddObj.parent is None:
parent.append(oddObj)
settings[oddbase + "id"] = unicode_type(oddObj.objId)
settings[oddbase + "id"] = str(oddObj.objId)
def appendReferencedObjects(self, parent):
if self.objectsAppended:
@@ -1486,7 +1486,7 @@ class Paragraph(LrsContainer):
def __init__(self, text=None):
LrsContainer.__init__(self, [Text, CR, DropCaps, CharButton,
LrsSimpleChar1, bytes, unicode_type])
LrsSimpleChar1, bytes, str])
if text is not None:
if isinstance(text, string_or_bytes):
text = Text(text)
@@ -1521,7 +1521,7 @@ class Paragraph(LrsContainer):
class LrsTextTag(LrsContainer):
def __init__(self, text, validContents):
LrsContainer.__init__(self, [Text, bytes, unicode_type] + validContents)
LrsContainer.__init__(self, [Text, bytes, str] + validContents)
if text is not None:
self.append(text)
@@ -1580,7 +1580,7 @@ class DropCaps(LrsTextTag):
return self.text is None or not self.text.strip()
def toElement(self, se):
elem = Element('DrawChar', line=unicode_type(self.line))
elem = Element('DrawChar', line=str(self.line))
appendTextElements(elem, self.contents, se)
return elem
@@ -1656,7 +1656,7 @@ class JumpTo(LrsContainer):
self.textBlock = textBlock
def toElement(self, se):
return Element("JumpTo", refpage=unicode_type(self.textBlock.parent.objId), refobj=unicode_type(self.textBlock.objId))
return Element("JumpTo", refpage=str(self.textBlock.parent.objId), refobj=str(self.textBlock.objId))
class Plot(LrsSimpleChar1, LrsContainer):
@@ -1688,8 +1688,8 @@ class Plot(LrsSimpleChar1, LrsContainer):
parent.append(self.obj)
def toElement(self, se):
elem = Element('Plot', xsize=unicode_type(self.xsize), ysize=unicode_type(self.ysize),
refobj=unicode_type(self.obj.objId))
elem = Element('Plot', xsize=str(self.xsize), ysize=str(self.ysize),
refobj=str(self.obj.objId))
if self.adjustment:
elem.set('adjustment', self.adjustment)
return elem
@@ -1771,7 +1771,7 @@ class Space(LrsSimpleChar1, LrsContainer):
if self.xsize == 0:
return
return Element("Space", xsize=unicode_type(self.xsize))
return Element("Space", xsize=str(self.xsize))
def toLrfContainer(self, lrfWriter, container):
if self.xsize != 0:
@@ -1785,7 +1785,7 @@ class Box(LrsSimpleChar1, LrsContainer):
"""
def __init__(self, linetype="solid"):
LrsContainer.__init__(self, [Text, bytes, unicode_type])
LrsContainer.__init__(self, [Text, bytes, str])
if linetype not in LINE_TYPE_ENCODING:
raise LrsError(linetype + " is not a valid line type")
self.linetype = linetype
@@ -1805,7 +1805,7 @@ class Box(LrsSimpleChar1, LrsContainer):
class Span(LrsSimpleChar1, LrsContainer):
def __init__(self, text=None, **attrs):
LrsContainer.__init__(self, [LrsSimpleChar1, Text, bytes, unicode_type])
LrsContainer.__init__(self, [LrsSimpleChar1, Text, bytes, str])
if text is not None:
if isinstance(text, string_or_bytes):
text = Text(text)
@@ -1858,7 +1858,7 @@ class Span(LrsSimpleChar1, LrsContainer):
def toElement(self, se):
element = Element('Span')
for (key, value) in self.attrs.items():
element.set(key, unicode_type(value))
element.set(key, str(value))
appendTextElements(element, self.contents, se)
return element
@@ -1871,9 +1871,9 @@ class EmpLine(LrsTextTag, LrsSimpleChar1):
def __init__(self, text=None, emplineposition='before', emplinetype='solid'):
LrsTextTag.__init__(self, text, [LrsSimpleChar1])
if emplineposition not in self.__class__.emplinepositions:
raise LrsError('emplineposition for an EmpLine must be one of: '+unicode_type(self.__class__.emplinepositions))
raise LrsError('emplineposition for an EmpLine must be one of: '+str(self.__class__.emplinepositions))
if emplinetype not in self.__class__.emplinetypes:
raise LrsError('emplinetype for an EmpLine must be one of: '+unicode_type(self.__class__.emplinetypes))
raise LrsError('emplinetype for an EmpLine must be one of: '+str(self.__class__.emplinetypes))
self.emplinetype = emplinetype
self.emplineposition = emplineposition
@@ -1933,9 +1933,9 @@ class BlockSpace(LrsContainer):
element = Element("BlockSpace")
if self.xspace != 0:
element.attrib["xspace"] = unicode_type(self.xspace)
element.attrib["xspace"] = str(self.xspace)
if self.yspace != 0:
element.attrib["yspace"] = unicode_type(self.yspace)
element.attrib["yspace"] = str(self.yspace)
return element
@@ -1949,7 +1949,7 @@ class CharButton(LrsSimpleChar1, LrsContainer):
"""
def __init__(self, button, text=None):
LrsContainer.__init__(self, [bytes, unicode_type, Text, LrsSimpleChar1])
LrsContainer.__init__(self, [bytes, str, Text, LrsSimpleChar1])
self.button = None
if button is not None:
self.setButton(button)
@@ -1979,7 +1979,7 @@ class CharButton(LrsSimpleChar1, LrsContainer):
container.appendLrfTag(LrfTag("CharButtonEnd"))
def toElement(self, se):
cb = Element("CharButton", refobj=unicode_type(self.button.objId))
cb = Element("CharButton", refobj=str(self.button.objId))
appendTextElements(cb, self.contents, se)
return cb
@@ -2081,8 +2081,8 @@ class JumpButton(LrsObject, LrsContainer):
b = self.lrsObjectElement("Button")
pb = SubElement(b, "PushButton")
SubElement(pb, "JumpTo",
refpage=unicode_type(self.textBlock.parent.objId),
refobj=unicode_type(self.textBlock.objId))
refpage=str(self.textBlock.parent.objId),
refobj=str(self.textBlock.objId))
return b
@@ -2230,8 +2230,8 @@ class PutObj(LrsContainer):
self.content.objId)))
def toElement(self, se):
el = Element("PutObj", x1=unicode_type(self.x1), y1=unicode_type(self.y1),
refobj=unicode_type(self.content.objId))
el = Element("PutObj", x1=str(self.x1), y1=str(self.y1),
refobj=str(self.content.objId))
return el
@@ -2313,9 +2313,9 @@ class Image(LrsObject, LrsContainer, LrsAttributes):
def toElement(self, se):
element = self.lrsObjectElement("Image", **self.attrs)
element.set("refstream", unicode_type(self.refstream.objId))
element.set("refstream", str(self.refstream.objId))
for name in ["x0", "y0", "x1", "y1", "xsize", "ysize"]:
element.set(name, unicode_type(getattr(self, name)))
element.set(name, str(getattr(self, name)))
return element
def toLrf(self, lrfWriter):
@@ -2396,9 +2396,9 @@ class ImageBlock(LrsObject, LrsContainer, LrsAttributes):
def toElement(self, se):
element = self.lrsObjectElement("ImageBlock", **self.attrs)
element.set("refstream", unicode_type(self.refstream.objId))
element.set("refstream", str(self.refstream.objId))
for name in ["x0", "y0", "x1", "y1", "xsize", "ysize"]:
element.set(name, unicode_type(getattr(self, name)))
element.set(name, str(getattr(self, name)))
element.text = self.alttext
return element