1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-04-17 11:43:30 +02:00

Removed polyglots unicode_type usage

This commit is contained in:
2020-04-20 19:25:28 +02:00
parent ef7e2b10be
commit 128705f258
130 changed files with 657 additions and 716 deletions

View File

@@ -9,7 +9,7 @@ import urllib.parse
from ebook_converter import relpath, guess_type, prints, force_unicode
from ebook_converter.utils.config_base import tweaks
from ebook_converter.polyglot.builtins import codepoint_to_chr, unicode_type, getcwd, iteritems, itervalues, as_unicode
from ebook_converter.polyglot.builtins import codepoint_to_chr, getcwd, iteritems, itervalues, as_unicode
from ebook_converter.polyglot.urllib import unquote
@@ -190,7 +190,7 @@ coding = list(zip(
def roman(num):
if num <= 0 or num >= 4000 or int(num) != num:
return unicode_type(num)
return str(num)
result = []
for d, r in coding:
while num >= d:
@@ -205,7 +205,7 @@ def fmt_sidx(i, fmt='%.2f', use_roman=False):
try:
i = float(i)
except TypeError:
return unicode_type(i)
return str(i)
if int(i) == float(i):
return roman(int(i)) if use_roman else '%d'%int(i)
return fmt%i
@@ -249,7 +249,7 @@ class Resource(object):
self._href = href_or_path
else:
pc = url[2]
if isinstance(pc, unicode_type):
if isinstance(pc, str):
pc = pc.encode('utf-8')
pc = unquote(pc).decode('utf-8')
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
@@ -270,7 +270,7 @@ class Resource(object):
basedir = getcwd()
if self.path is None:
return self._href
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment
f = self.fragment.encode('utf-8') if isinstance(self.fragment, str) else self.fragment
frag = '#'+as_unicode(urllib.parse.quote(f)) if self.fragment else ''
if self.path == basedir:
return ''+frag
@@ -278,7 +278,7 @@ class Resource(object):
rpath = relpath(self.path, basedir)
except OSError: # On windows path and basedir could be on different drives
rpath = self.path
if isinstance(rpath, unicode_type):
if isinstance(rpath, str):
rpath = rpath.encode('utf-8')
return as_unicode(urllib.parse.quote(rpath.replace(os.sep, '/')))+frag
@@ -315,7 +315,7 @@ class ResourceCollection(object):
return '[%s]'%', '.join(resources)
def __repr__(self):
return unicode_type(self)
return str(self)
def append(self, resource):
if not isinstance(resource, Resource):
@@ -377,7 +377,7 @@ def check_isbn13(isbn):
check = 10 - (sum(products)%10)
if check == 10:
check = 0
if unicode_type(check) == isbn[12]:
if str(check) == isbn[12]:
return isbn
except Exception:
pass

View File

@@ -3,7 +3,6 @@ from contextlib import closing
from ebook_converter.customize import FileTypePlugin
from ebook_converter.utils.localization import canonicalize_lang
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3'
@@ -167,7 +166,7 @@ def get_comic_book_info(d, mi, series_index='volume'):
from datetime import date
try:
dt = date(puby, 6 if pubm is None else pubm, 15)
dt = parse_only_date(unicode_type(dt))
dt = parse_only_date(str(dt))
mi.pubdate = dt
except Exception:
pass

View File

@@ -7,7 +7,7 @@ from ebook_converter.ebooks.metadata.book import (SC_COPYABLE_FIELDS,
TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS)
from ebook_converter.library.field_metadata import FieldMetadata
from ebook_converter.utils.icu import sort_key
from ebook_converter.polyglot.builtins import iteritems, unicode_type
from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL v3'
@@ -22,7 +22,7 @@ SIMPLE_SET = frozenset(SIMPLE_GET - {'identifiers'})
def human_readable(size, precision=2):
""" Convert a size in bytes into megabytes """
return ('%.'+unicode_type(precision)+'f'+ 'MB') % (size/(1024*1024),)
return ('%.'+str(precision)+'f'+ 'MB') % (size/(1024*1024),)
NULL_VALUES = {
@@ -606,14 +606,14 @@ class Metadata(object):
return authors_to_string(self.authors)
def format_tags(self):
return ', '.join([unicode_type(t) for t in sorted(self.tags, key=sort_key)])
return ', '.join([str(t) for t in sorted(self.tags, key=sort_key)])
def format_rating(self, v=None, divide_by=1):
if v is None:
if self.rating is not None:
return unicode_type(self.rating/divide_by)
return str(self.rating/divide_by)
return 'None'
return unicode_type(v/divide_by)
return str(v/divide_by)
def format_field(self, key, series_with_index=True):
'''
@@ -637,15 +637,15 @@ class Metadata(object):
if cmeta and cmeta['datatype'] == 'series':
if self.get(tkey):
res = self.get_extra(tkey)
return (unicode_type(cmeta['name']+'_index'),
return (str(cmeta['name']+'_index'),
self.format_series_index(res), res, cmeta)
else:
return (unicode_type(cmeta['name']+'_index'), '', '', cmeta)
return (str(cmeta['name']+'_index'), '', '', cmeta)
if key in self.custom_field_keys():
res = self.get(key, None) # get evaluates all necessary composites
cmeta = self.get_user_metadata(key, make_copy=False)
name = unicode_type(cmeta['name'])
name = str(cmeta['name'])
if res is None or res == '': # can't check "not res" because of numeric fields
return (name, res, None, None)
orig_res = res
@@ -668,7 +668,7 @@ class Metadata(object):
res = fmt.format(res)
except:
pass
return (name, unicode_type(res), orig_res, cmeta)
return (name, str(res), orig_res, cmeta)
# convert top-level ids into their value
if key in TOP_LEVEL_IDENTIFIERS:
@@ -682,11 +682,11 @@ class Metadata(object):
if fmkey in field_metadata and field_metadata[fmkey]['kind'] == 'field':
res = self.get(key, None)
fmeta = field_metadata[fmkey]
name = unicode_type(fmeta['name'])
name = str(fmeta['name'])
if res is None or res == '':
return (name, res, None, None)
orig_res = res
name = unicode_type(fmeta['name'])
name = str(fmeta['name'])
datatype = fmeta['datatype']
if key == 'authors':
res = authors_to_string(res)
@@ -704,7 +704,7 @@ class Metadata(object):
res = '%.2g'%(res/2)
elif key == 'size':
res = human_readable(res)
return (name, unicode_type(res), orig_res, fmeta)
return (name, str(res), orig_res, fmeta)
return (None, None, None, None)
@@ -718,7 +718,7 @@ class Metadata(object):
ans = []
def fmt(x, y):
ans.append('%-20s: %s'%(unicode_type(x), unicode_type(y)))
ans.append('%-20s: %s'%(str(x), str(y)))
fmt('Title', self.title)
if self.title_sort:
@@ -732,7 +732,7 @@ class Metadata(object):
if getattr(self, 'book_producer', False):
fmt('Book Producer', self.book_producer)
if self.tags:
fmt('Tags', ', '.join([unicode_type(t) for t in self.tags]))
fmt('Tags', ', '.join([str(t) for t in self.tags]))
if self.series:
fmt('Series', self.series + ' #%s'%self.format_series_index())
if not self.is_null('languages'):
@@ -745,7 +745,7 @@ class Metadata(object):
if self.pubdate is not None:
fmt('Published', isoformat(self.pubdate))
if self.rights is not None:
fmt('Rights', unicode_type(self.rights))
fmt('Rights', str(self.rights))
if self.identifiers:
fmt('Identifiers', ', '.join(['%s:%s'%(k, v) for k, v in
iteritems(self.identifiers)]))
@@ -756,7 +756,7 @@ class Metadata(object):
val = self.get(key, None)
if val:
(name, val) = self.format_field(key)
fmt(name, unicode_type(val))
fmt(name, str(val))
return '\n'.join(ans)
def to_html(self):
@@ -765,22 +765,22 @@ class Metadata(object):
'''
from ebook_converter.ebooks.metadata import authors_to_string
from ebook_converter.utils.date import isoformat
ans = [(_('Title'), unicode_type(self.title))]
ans = [(_('Title'), str(self.title))]
ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
ans += [(_('Publisher'), unicode_type(self.publisher))]
ans += [(_('Producer'), unicode_type(self.book_producer))]
ans += [(_('Comments'), unicode_type(self.comments))]
ans += [('ISBN', unicode_type(self.isbn))]
ans += [(_('Tags'), ', '.join([unicode_type(t) for t in self.tags]))]
ans += [(_('Publisher'), str(self.publisher))]
ans += [(_('Producer'), str(self.book_producer))]
ans += [(_('Comments'), str(self.comments))]
ans += [('ISBN', str(self.isbn))]
ans += [(_('Tags'), ', '.join([str(t) for t in self.tags]))]
if self.series:
ans += [(_('Series'), unicode_type(self.series) + ' #%s'%self.format_series_index())]
ans += [(_('Series'), str(self.series) + ' #%s'%self.format_series_index())]
ans += [(_('Languages'), ', '.join(self.languages))]
if self.timestamp is not None:
ans += [(_('Timestamp'), unicode_type(isoformat(self.timestamp, as_utc=False, sep=' ')))]
ans += [(_('Timestamp'), str(isoformat(self.timestamp, as_utc=False, sep=' ')))]
if self.pubdate is not None:
ans += [(_('Published'), unicode_type(isoformat(self.pubdate, as_utc=False, sep=' ')))]
ans += [(_('Published'), str(isoformat(self.pubdate, as_utc=False, sep=' ')))]
if self.rights is not None:
ans += [(_('Rights'), unicode_type(self.rights))]
ans += [(_('Rights'), str(self.rights))]
for key in self.custom_field_keys():
val = self.get(key, None)
if val:

View File

@@ -14,7 +14,6 @@ from ebook_converter.utils.imghdr import identify
from ebook_converter import guess_type, guess_all_extensions, prints, force_unicode
from ebook_converter.ebooks.metadata import MetaInformation, check_isbn
from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.polyglot.builtins import unicode_type
from ebook_converter.polyglot.binary import as_base64_unicode
@@ -115,7 +114,7 @@ def get_metadata(stream):
# fallback for book_title
if book_title:
book_title = unicode_type(book_title)
book_title = str(book_title)
else:
book_title = force_unicode(os.path.splitext(
os.path.basename(getattr(stream, 'name',
@@ -252,7 +251,7 @@ def _parse_tags(root, mi, ctx):
# -- i18n Translations-- ?
tags = ctx.XPath('//fb:%s/fb:genre/text()' % genre_sec)(root)
if tags:
mi.tags = list(map(unicode_type, tags))
mi.tags = list(map(str, tags))
break
@@ -304,7 +303,7 @@ def _parse_pubdate(root, mi, ctx):
year = ctx.XPath('number(//fb:publish-info/fb:year/text())')(root)
if float.is_integer(year):
# only year is available, so use 2nd of June
mi.pubdate = parse_only_date(unicode_type(int(year)))
mi.pubdate = parse_only_date(str(int(year)))
def _parse_language(root, mi, ctx):

View File

@@ -6,7 +6,7 @@ from ebook_converter.ebooks.metadata.opf2 import OPF
from ebook_converter import isbytestring
from ebook_converter.customize.ui import get_file_type_metadata, set_file_type_metadata
from ebook_converter.ebooks.metadata import MetaInformation, string_to_authors
from ebook_converter.polyglot.builtins import getcwd, unicode_type
from ebook_converter.polyglot.builtins import getcwd
__license__ = 'GPL v3'
@@ -229,7 +229,7 @@ def forked_read_metadata(path, tdir):
f.seek(0, 2)
sz = f.tell()
with lopen(os.path.join(tdir, 'size.txt'), 'wb') as s:
s.write(unicode_type(sz).encode('ascii'))
s.write(str(sz).encode('ascii'))
f.seek(0)
mi = get_metadata(f, fmt)
if mi.cover_data and mi.cover_data[1]:

View File

@@ -27,7 +27,7 @@ from ebook_converter import prints, guess_type
from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
from ebook_converter.utils.config import tweaks
from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.polyglot.builtins import iteritems, unicode_type, getcwd
from ebook_converter.polyglot.builtins import iteritems, getcwd
from ebook_converter.polyglot.urllib import unquote
@@ -91,7 +91,7 @@ class Resource(object): # {{{
self._href = href_or_path
else:
pc = url[2]
if isinstance(pc, unicode_type):
if isinstance(pc, str):
pc = pc.encode('utf-8')
pc = pc.decode('utf-8')
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
@@ -159,7 +159,7 @@ class ResourceCollection(object): # {{{
__unicode__ = __str__
def __repr__(self):
return unicode_type(self)
return str(self)
def append(self, resource):
if not isinstance(resource, Resource):
@@ -219,10 +219,10 @@ class ManifestItem(Resource): # {{{
__unicode__ = __unicode__representation__
def __str__(self):
return unicode_type(self).encode('utf-8')
return str(self).encode('utf-8')
def __repr__(self):
return unicode_type(self)
return str(self)
def __getitem__(self, index):
if index == 0:
@@ -425,7 +425,7 @@ class Guide(ResourceCollection): # {{{
class MetadataField(object):
def __init__(self, name, is_dc=True, formatter=None, none_is=None,
renderer=lambda x: unicode_type(x)):
renderer=lambda x: str(x)):
self.name = name
self.is_dc = is_dc
self.formatter = formatter
@@ -806,7 +806,7 @@ class OPF(object): # {{{
def unquote_urls(self):
def get_href(item):
raw = unquote(item.get('href', ''))
if not isinstance(raw, unicode_type):
if not isinstance(raw, str):
raw = raw.decode('utf-8')
return raw
for item in self.itermanifest():
@@ -835,7 +835,7 @@ class OPF(object): # {{{
titles = ()
if val:
title = titles[0] if titles else self.create_metadata_element('title')
title.text = re.sub(r'\s+', ' ', unicode_type(val))
title.text = re.sub(r'\s+', ' ', str(val))
@property
def authors(self):
@@ -878,7 +878,7 @@ class OPF(object): # {{{
for key in matches[0].attrib:
if key.endswith('file-as'):
matches[0].attrib.pop(key)
matches[0].set('{%s}file-as'%self.NAMESPACES['opf'], unicode_type(val))
matches[0].set('{%s}file-as'%self.NAMESPACES['opf'], str(val))
@property
def tags(self):
@@ -895,7 +895,7 @@ class OPF(object): # {{{
tag.getparent().remove(tag)
for tag in val:
elem = self.create_metadata_element('subject')
self.set_text(elem, unicode_type(tag))
self.set_text(elem, str(tag))
@property
def pubdate(self):
@@ -951,7 +951,7 @@ class OPF(object): # {{{
xid = x.get('id', None)
is_package_identifier = uuid_id is not None and uuid_id == xid
if is_package_identifier:
self.set_text(x, unicode_type(uuid.uuid4()))
self.set_text(x, str(uuid.uuid4()))
for attr in x.attrib:
if attr.endswith('scheme'):
x.attrib[attr] = 'uuid'
@@ -962,7 +962,7 @@ class OPF(object): # {{{
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'ISBN'}
matches = [self.create_metadata_element('identifier',
attrib=attrib)]
self.set_text(matches[0], unicode_type(val))
self.set_text(matches[0], str(val))
def get_identifiers(self):
identifiers = {}
@@ -1015,7 +1015,7 @@ class OPF(object): # {{{
for typ, val in iteritems(identifiers):
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: typ.upper()}
self.set_text(self.create_metadata_element(
'identifier', attrib=attrib), unicode_type(val))
'identifier', attrib=attrib), str(val))
@property
def application_id(self):
@@ -1038,7 +1038,7 @@ class OPF(object): # {{{
if uuid_id and uuid_id in removed_ids:
attrib['id'] = uuid_id
self.set_text(self.create_metadata_element(
'identifier', attrib=attrib), unicode_type(val))
'identifier', attrib=attrib), str(val))
@property
def uuid(self):
@@ -1052,7 +1052,7 @@ class OPF(object): # {{{
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'uuid'}
matches = [self.create_metadata_element('identifier',
attrib=attrib)]
self.set_text(matches[0], unicode_type(val))
self.set_text(matches[0], str(val))
@property
def language(self):
@@ -1083,7 +1083,7 @@ class OPF(object): # {{{
for lang in val:
l = self.create_metadata_element('language')
self.set_text(l, unicode_type(lang))
self.set_text(l, str(lang))
@property
def raw_languages(self):
@@ -1103,7 +1103,7 @@ class OPF(object): # {{{
if not matches:
matches = [self.create_metadata_element('contributor')]
matches[0].set('{%s}role'%self.NAMESPACES['opf'], 'bkp')
self.set_text(matches[0], unicode_type(val))
self.set_text(matches[0], str(val))
def identifier_iter(self):
for item in self.identifier_path(self.metadata):
@@ -1363,7 +1363,7 @@ class OPFCreator(Metadata):
self.page_progression_direction = None
self.primary_writing_mode = None
if self.application_id is None:
self.application_id = unicode_type(uuid.uuid4())
self.application_id = str(uuid.uuid4())
if not isinstance(self.toc, TOC):
self.toc = None
if not self.authors:
@@ -1487,7 +1487,7 @@ class OPFCreator(Metadata):
a(DC_ELEM('contributor', '%s (%s) [%s]'%(__appname__, __version__,
'https://calibre-ebook.com'), opf_attrs={'role':'bkp',
'file-as':__appname__}))
a(DC_ELEM('identifier', unicode_type(self.application_id),
a(DC_ELEM('identifier', str(self.application_id),
opf_attrs={'scheme':__appname__},
dc_attrs={'id':__appname__+'_id'}))
if getattr(self, 'pubdate', None) is not None:
@@ -1515,7 +1515,7 @@ class OPFCreator(Metadata):
if self.title_sort:
a(CAL_ELEM('calibre:title_sort', self.title_sort))
if self.rating is not None:
a(CAL_ELEM('calibre:rating', unicode_type(self.rating)))
a(CAL_ELEM('calibre:rating', str(self.rating)))
if self.timestamp is not None:
a(CAL_ELEM('calibre:timestamp', self.timestamp.isoformat()))
if self.publication_type is not None:
@@ -1532,7 +1532,7 @@ class OPFCreator(Metadata):
href = ref.href()
if isinstance(href, bytes):
href = href.decode('utf-8')
item = E.item(id=unicode_type(ref.id), href=href)
item = E.item(id=str(ref.id), href=href)
item.set('media-type', ref.mime_type)
manifest.append(item)
spine = E.spine()
@@ -1583,10 +1583,10 @@ def metadata_to_opf(mi, as_string=True, default_lang=None):
from ebook_converter.ebooks.oeb.base import OPF, DC
if not mi.application_id:
mi.application_id = unicode_type(uuid.uuid4())
mi.application_id = str(uuid.uuid4())
if not mi.uuid:
mi.uuid = unicode_type(uuid.uuid4())
mi.uuid = str(uuid.uuid4())
if not mi.book_producer:
mi.book_producer = __appname__ + ' (%s) '%__version__ + \
@@ -1667,7 +1667,7 @@ def metadata_to_opf(mi, as_string=True, default_lang=None):
if mi.series_index is not None:
meta('series_index', mi.format_series_index())
if mi.rating is not None:
meta('rating', unicode_type(mi.rating))
meta('rating', str(mi.rating))
if hasattr(mi.timestamp, 'isoformat'):
meta('timestamp', isoformat(mi.timestamp))
if mi.publication_type:
@@ -1682,7 +1682,7 @@ def metadata_to_opf(mi, as_string=True, default_lang=None):
metadata[-1].tail = '\n' +(' '*4)
if mi.cover:
if not isinstance(mi.cover, unicode_type):
if not isinstance(mi.cover, str):
mi.cover = mi.cover.decode(filesystem_encoding)
guide.text = '\n'+(' '*8)
r = guide.makeelement(OPF('reference'),

View File

@@ -10,7 +10,7 @@ from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.ebooks.metadata import (
MetaInformation, string_to_authors, check_isbn, check_doi)
from ebook_converter.utils.ipc.simple_worker import fork_job, WorkerError
from ebook_converter.polyglot.builtins import iteritems, unicode_type
from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL v3'
@@ -94,8 +94,8 @@ def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg', pr
args['creationflags'] = w.HIGH_PRIORITY_CLASS | w.CREATE_NO_WINDOW
try:
subprocess.check_call([
pdftoppm, '-cropbox', '-' + image_format, '-f', unicode_type(first),
'-l', unicode_type(last), pdfpath, os.path.join(outputdir, prefix)
pdftoppm, '-cropbox', '-' + image_format, '-f', str(first),
'-l', str(last), pdfpath, os.path.join(outputdir, prefix)
], **args)
except subprocess.CalledProcessError as e:
raise ValueError('Failed to render PDF, pdftoppm errorcode: %s'%e.returncode)

View File

@@ -6,7 +6,7 @@ import re
from ebook_converter import force_unicode
from ebook_converter.ebooks.metadata import MetaInformation
from ebook_converter.polyglot.builtins import codepoint_to_chr, string_or_bytes, unicode_type, int_to_byte
from ebook_converter.polyglot.builtins import codepoint_to_chr, string_or_bytes, int_to_byte
title_pat = re.compile(br'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
author_pat = re.compile(br'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
@@ -74,7 +74,7 @@ def detect_codepage(stream):
def encode(unistr):
if not isinstance(unistr, unicode_type):
if not isinstance(unistr, str):
unistr = force_unicode(unistr)
return ''.join(c if ord(c) < 128 else '\\u{}?'.format(ord(c)) for c in unistr)

View File

@@ -12,7 +12,7 @@ from ebook_converter.constants import __appname__, __version__
from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.utils.cleantext import clean_xml_chars
from ebook_converter.polyglot.builtins import unicode_type, getcwd
from ebook_converter.polyglot.builtins import getcwd
from ebook_converter.polyglot.urllib import unquote
@@ -67,7 +67,7 @@ class TOC(list):
def __str__(self):
lines = ['TOC: %s#%s %s'%(self.href, self.fragment, self.text)]
for child in self:
c = unicode_type(child).splitlines()
c = str(child).splitlines()
for l in c:
lines.append('\t'+l)
return '\n'.join(lines)
@@ -245,8 +245,8 @@ class TOC(list):
def render(self, stream, uid):
root = E.ncx(
E.head(
E.meta(name='dtb:uid', content=unicode_type(uid)),
E.meta(name='dtb:depth', content=unicode_type(self.depth())),
E.meta(name='dtb:uid', content=str(uid)),
E.meta(name='dtb:depth', content=str(self.depth())),
E.meta(name='dtb:generator', content='%s (%s)'%(__appname__,
__version__)),
E.meta(name='dtb:totalPageCount', content='0'),
@@ -268,10 +268,10 @@ class TOC(list):
text = clean_xml_chars(text)
elem = E.navPoint(
E.navLabel(E.text(re.sub(r'\s+', ' ', text))),
E.content(src=unicode_type(np.href)+(('#' + unicode_type(np.fragment))
E.content(src=str(np.href)+(('#' + str(np.fragment))
if np.fragment else '')),
id=item_id,
playOrder=unicode_type(np.play_order)
playOrder=str(np.play_order)
)
au = getattr(np, 'author', None)
if au: