1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-04-25 07:51:28 +02:00

Removed polyglots unicode_type usage

This commit is contained in:
2020-04-20 19:25:28 +02:00
parent ef7e2b10be
commit 128705f258
130 changed files with 657 additions and 716 deletions
+9 -9
View File
@@ -14,7 +14,7 @@ import urllib.parse
import urllib.request import urllib.request
import warnings import warnings
from ebook_converter.polyglot.builtins import codepoint_to_chr, unicode_type, hasenv, native_string_type from ebook_converter.polyglot.builtins import codepoint_to_chr, hasenv, native_string_type
from functools import partial from functools import partial
if not hasenv('CALIBRE_SHOW_DEPRECATION_WARNINGS'): if not hasenv('CALIBRE_SHOW_DEPRECATION_WARNINGS'):
@@ -80,7 +80,7 @@ def get_types_map():
def to_unicode(raw, encoding='utf-8', errors='strict'): def to_unicode(raw, encoding='utf-8', errors='strict'):
if isinstance(raw, unicode_type): if isinstance(raw, str):
return raw return raw
return raw.decode(encoding, errors) return raw.decode(encoding, errors)
@@ -170,7 +170,7 @@ def prints(*args, **kwargs):
safe_encode = kwargs.get('safe_encode', False) safe_encode = kwargs.get('safe_encode', False)
count = 0 count = 0
for i, arg in enumerate(args): for i, arg in enumerate(args):
if isinstance(arg, unicode_type): if isinstance(arg, str):
if iswindows: if iswindows:
from ebook_converter.utils.terminal import Detect from ebook_converter.utils.terminal import Detect
cs = Detect(file) cs = Detect(file)
@@ -194,8 +194,8 @@ def prints(*args, **kwargs):
try: try:
arg = native_string_type(arg) arg = native_string_type(arg)
except ValueError: except ValueError:
arg = unicode_type(arg) arg = str(arg)
if isinstance(arg, unicode_type): if isinstance(arg, str):
try: try:
arg = arg.encode(enc) arg = arg.encode(enc)
except UnicodeEncodeError: except UnicodeEncodeError:
@@ -334,7 +334,7 @@ def get_parsed_proxy(typ='http', debug=True):
traceback.print_exc() traceback.print_exc()
else: else:
if debug: if debug:
prints('Using http proxy', unicode_type(ans)) prints('Using http proxy', str(ans))
return ans return ans
@@ -517,7 +517,7 @@ def strftime(fmt, t=None):
if isinstance(ans, bytes): if isinstance(ans, bytes):
ans = ans.decode(preferred_encoding, 'replace') ans = ans.decode(preferred_encoding, 'replace')
if early_year: if early_year:
ans = ans.replace('_early year hack##', unicode_type(orig_year)) ans = ans.replace('_early year hack##', str(orig_year))
return ans return ans
@@ -629,7 +629,7 @@ def force_unicode(obj, enc=preferred_encoding):
def as_unicode(obj, enc=preferred_encoding): def as_unicode(obj, enc=preferred_encoding):
if not isbytestring(obj): if not isbytestring(obj):
try: try:
obj = unicode_type(obj) obj = str(obj)
except Exception: except Exception:
try: try:
obj = native_string_type(obj) obj = native_string_type(obj)
@@ -652,7 +652,7 @@ def human_readable(size, sep=' '):
if size < (1 << ((i + 1) * 10)): if size < (1 << ((i + 1) * 10)):
divisor, suffix = (1 << (i * 10)), candidate divisor, suffix = (1 << (i * 10)), candidate
break break
size = unicode_type(float(size)/divisor) size = str(float(size)/divisor)
if size.find(".") > -1: if size.find(".") > -1:
size = size[:size.find(".")+2] size = size[:size.find(".")+2]
if size.endswith('.0'): if size.endswith('.0'):
+10 -7
View File
@@ -1,12 +1,15 @@
#!/usr/bin/env python2 import codecs
# vim:fileencoding=utf-8 import collections
# License: GPLv3 Copyright: 2015, Kovid Goyal <kovid at kovidgoyal.net> import importlib
from ebook_converter.polyglot.builtins import unicode_type, environ_item, hasenv, getenv, as_unicode, native_string_type import locale
import sys, locale, codecs, os, importlib, collections import os
import sys
from ebook_converter.polyglot.builtins import environ_item, hasenv, getenv, as_unicode, native_string_type
__appname__ = 'calibre' __appname__ = 'calibre'
numeric_version = (4, 12, 0) numeric_version = (4, 12, 0)
__version__ = '.'.join(map(unicode_type, numeric_version)) __version__ = '.'.join(map(str, numeric_version))
git_version = None git_version = None
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>" __author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
@@ -216,7 +219,7 @@ class Plugins(collections.Mapping):
except Exception as err: except Exception as err:
p = None p = None
try: try:
plugin_err = unicode_type(err) plugin_err = str(err)
except Exception: except Exception:
plugin_err = as_unicode(native_string_type(err), encoding=preferred_encoding, errors='replace') plugin_err = as_unicode(native_string_type(err), encoding=preferred_encoding, errors='replace')
self._plugins[name] = p, plugin_err self._plugins[name] = p, plugin_err
+3 -3
View File
@@ -15,7 +15,7 @@ import operator
import string import string
from ebook_converter.css_selectors.errors import SelectorSyntaxError, ExpressionError from ebook_converter.css_selectors.errors import SelectorSyntaxError, ExpressionError
from ebook_converter.polyglot.builtins import unicode_type, codepoint_to_chr from ebook_converter.polyglot.builtins import codepoint_to_chr
utab = {c:c+32 for c in range(ord(u'A'), ord(u'Z')+1)} utab = {c:c+32 for c in range(ord(u'A'), ord(u'Z')+1)}
@@ -25,7 +25,7 @@ if sys.version_info.major < 3:
def ascii_lower(string): def ascii_lower(string):
"""Lower-case, but only in the ASCII range.""" """Lower-case, but only in the ASCII range."""
return string.translate(utab if isinstance(string, unicode_type) else tab) return string.translate(utab if isinstance(string, str) else tab)
def urepr(x): def urepr(x):
if isinstance(x, list): if isinstance(x, list):
@@ -469,7 +469,7 @@ def parse_simple_selector(stream, inside_negation=False):
if ident.lower() in special_pseudo_elements: if ident.lower() in special_pseudo_elements:
# Special case: CSS 2.1 pseudo-elements can have a single ':' # Special case: CSS 2.1 pseudo-elements can have a single ':'
# Any new pseudo-element must have two. # Any new pseudo-element must have two.
pseudo_element = unicode_type(ident) pseudo_element = str(ident)
continue continue
if stream.peek() != ('DELIM', '('): if stream.peek() != ('DELIM', '('):
result = Pseudo(result, ident) result = Pseudo(result, ident)
+1 -2
View File
@@ -2,7 +2,6 @@ import os, sys, zipfile, importlib
from ebook_converter.constants import numeric_version, iswindows, isosx from ebook_converter.constants import numeric_version, iswindows, isosx
from ebook_converter.ptempfile import PersistentTemporaryFile from ebook_converter.ptempfile import PersistentTemporaryFile
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -196,7 +195,7 @@ class Plugin(object): # {{{
config_dialog.exec_() config_dialog.exec_()
if config_dialog.result() == QDialog.Accepted: if config_dialog.result() == QDialog.Accepted:
sc = unicode_type(sc.text()).strip() sc = str(sc.text()).strip()
customize_plugin(self, sc) customize_plugin(self, sc)
geom = bytearray(config_dialog.saveGeometry()) geom = bytearray(config_dialog.saveGeometry())
+2 -3
View File
@@ -5,7 +5,6 @@ import re, os, shutil, numbers
from ebook_converter import CurrentDir from ebook_converter import CurrentDir
from ebook_converter.customize import Plugin from ebook_converter.customize import Plugin
from ebook_converter.polyglot.builtins import unicode_type
class ConversionOption(object): class ConversionOption(object):
@@ -79,7 +78,7 @@ class OptionRecommendation(object):
self.option.choices: self.option.choices:
raise ValueError('OpRec: %s: Recommended value not in choices'% raise ValueError('OpRec: %s: Recommended value not in choices'%
self.option.name) self.option.name)
if not (isinstance(self.recommended_value, (numbers.Number, bytes, unicode_type)) or self.recommended_value is None): if not (isinstance(self.recommended_value, (numbers.Number, bytes, str)) or self.recommended_value is None):
raise ValueError('OpRec: %s:'%self.option.name + repr( raise ValueError('OpRec: %s:'%self.option.name + repr(
self.recommended_value) + ' is not a string or a number') self.recommended_value) + ' is not a string or a number')
@@ -298,7 +297,7 @@ class OutputFormatPlugin(Plugin):
@property @property
def is_periodical(self): def is_periodical(self):
return self.oeb.metadata.publication_type and \ return self.oeb.metadata.publication_type and \
unicode_type(self.oeb.metadata.publication_type[0]).startswith('periodical:') str(self.oeb.metadata.publication_type[0]).startswith('periodical:')
def specialize_options(self, log, opts, input_fmt): def specialize_options(self, log, opts, input_fmt):
''' '''
+2 -2
View File
@@ -18,7 +18,7 @@ from ebook_converter.utils.config import (make_config_dir, Config, ConfigProxy,
plugin_dir, OptionParser) plugin_dir, OptionParser)
# from ebook_converter.ebooks.metadata.sources.base import Source # from ebook_converter.ebooks.metadata.sources.base import Source
from ebook_converter.constants import DEBUG, numeric_version from ebook_converter.constants import DEBUG, numeric_version
from ebook_converter.polyglot.builtins import iteritems, itervalues, unicode_type from ebook_converter.polyglot.builtins import iteritems, itervalues
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -740,7 +740,7 @@ def build_plugin(path):
from ebook_converter import prints from ebook_converter import prints
from ebook_converter.ptempfile import PersistentTemporaryFile from ebook_converter.ptempfile import PersistentTemporaryFile
from ebook_converter.utils.zipfile import ZipFile, ZIP_STORED from ebook_converter.utils.zipfile import ZipFile, ZIP_STORED
path = unicode_type(path) path = str(path)
names = frozenset(os.listdir(path)) names = frozenset(os.listdir(path))
if '__init__.py' not in names: if '__init__.py' not in names:
prints(path, ' is not a valid plugin') prints(path, ' is not a valid plugin')
+3 -3
View File
@@ -12,7 +12,7 @@ from ebook_converter.constants import ispy3
from ebook_converter.customize import (Plugin, numeric_version, platform, from ebook_converter.customize import (Plugin, numeric_version, platform,
InvalidPlugin, PluginNotFound) InvalidPlugin, PluginNotFound)
from ebook_converter.polyglot.builtins import (itervalues, string_or_bytes, from ebook_converter.polyglot.builtins import (itervalues, string_or_bytes,
unicode_type, reload) reload)
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -215,7 +215,7 @@ class PluginLoader(object):
if ans.minimum_calibre_version > numeric_version: if ans.minimum_calibre_version > numeric_version:
raise InvalidPlugin( raise InvalidPlugin(
'The plugin at %s needs a version of calibre >= %s' % 'The plugin at %s needs a version of calibre >= %s' %
(as_unicode(path_to_zip_file), '.'.join(map(unicode_type, (as_unicode(path_to_zip_file), '.'.join(map(str,
ans.minimum_calibre_version)))) ans.minimum_calibre_version))))
if platform not in ans.supported_platforms: if platform not in ans.supported_platforms:
@@ -230,7 +230,7 @@ class PluginLoader(object):
raise raise
def _locate_code(self, zf, path_to_zip_file): def _locate_code(self, zf, path_to_zip_file):
names = [x if isinstance(x, unicode_type) else x.decode('utf-8') for x in names = [x if isinstance(x, str) else x.decode('utf-8') for x in
zf.namelist()] zf.namelist()]
names = [x[1:] if x[0] == '/' else x for x in names] names = [x[1:] if x[0] == '/' else x for x in names]
+2 -3
View File
@@ -9,7 +9,6 @@ from various formats.
import os, re, numbers, sys import os, re, numbers, sys
from ebook_converter import prints from ebook_converter import prints
from ebook_converter.ebooks.chardet import xml_to_unicode from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.polyglot.builtins import unicode_type
class ConversionError(Exception): class ConversionError(Exception):
@@ -80,7 +79,7 @@ def extract_calibre_cover(raw, base, log):
if matches is None: if matches is None:
body = soup.find('body') body = soup.find('body')
if body is not None: if body is not None:
text = u''.join(map(unicode_type, body.findAll(text=True))) text = u''.join(map(str, body.findAll(text=True)))
if text.strip(): if text.strip():
# Body has text, abort # Body has text, abort
return return
@@ -150,7 +149,7 @@ def check_ebook_format(stream, current_guess):
def normalize(x): def normalize(x):
if isinstance(x, unicode_type): if isinstance(x, str):
import unicodedata import unicodedata
x = unicodedata.normalize('NFC', x) x = unicodedata.normalize('NFC', x)
return x return x
+2 -3
View File
@@ -1,5 +1,4 @@
import re, codecs import re, codecs
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -131,7 +130,7 @@ def force_encoding(raw, verbose, assume_utf8=False):
def detect_xml_encoding(raw, verbose=False, assume_utf8=False): def detect_xml_encoding(raw, verbose=False, assume_utf8=False):
if not raw or isinstance(raw, unicode_type): if not raw or isinstance(raw, str):
return raw, None return raw, None
for x in ('utf8', 'utf-16-le', 'utf-16-be'): for x in ('utf8', 'utf-16-le', 'utf-16-be'):
bom = getattr(codecs, 'BOM_'+x.upper().replace('-16', '16').replace( bom = getattr(codecs, 'BOM_'+x.upper().replace('-16', '16').replace(
@@ -175,7 +174,7 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False,
return '', None return '', None
raw, encoding = detect_xml_encoding(raw, verbose=verbose, raw, encoding = detect_xml_encoding(raw, verbose=verbose,
assume_utf8=assume_utf8) assume_utf8=assume_utf8)
if not isinstance(raw, unicode_type): if not isinstance(raw, str):
raw = raw.decode(encoding, 'replace') raw = raw.decode(encoding, 'replace')
if strip_encoding_pats: if strip_encoding_pats:
@@ -10,7 +10,7 @@ from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.customize.conversion import InputFormatPlugin from ebook_converter.customize.conversion import InputFormatPlugin
from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.constants import filesystem_encoding from ebook_converter.constants import filesystem_encoding
from ebook_converter.polyglot.builtins import unicode_type, as_bytes from ebook_converter.polyglot.builtins import as_bytes
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = ('2008, Kovid Goyal <kovid at kovidgoyal.net>, ' __copyright__ = ('2008, Kovid Goyal <kovid at kovidgoyal.net>, '
@@ -41,7 +41,7 @@ class CHMInput(InputFormatPlugin):
log.debug('Processing CHM...') log.debug('Processing CHM...')
with TemporaryDirectory('_chm2oeb') as tdir: with TemporaryDirectory('_chm2oeb') as tdir:
if not isinstance(tdir, unicode_type): if not isinstance(tdir, str):
tdir = tdir.decode(filesystem_encoding) tdir = tdir.decode(filesystem_encoding)
html_input = plugin_for_input_format('html') html_input = plugin_for_input_format('html')
for opt in html_input.options: for opt in html_input.options:
@@ -129,7 +129,7 @@ class CHMInput(InputFormatPlugin):
base = os.path.dirname(os.path.abspath(htmlpath)) base = os.path.dirname(os.path.abspath(htmlpath))
def unquote(x): def unquote(x):
if isinstance(x, unicode_type): if isinstance(x, str):
x = x.encode('utf-8') x = x.encode('utf-8')
return _unquote(x).decode('utf-8') return _unquote(x).decode('utf-8')
@@ -7,7 +7,7 @@ from ebook_converter.customize.conversion import (OutputFormatPlugin,
OptionRecommendation) OptionRecommendation)
from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter import CurrentDir from ebook_converter import CurrentDir
from ebook_converter.polyglot.builtins import unicode_type, as_bytes from ebook_converter.polyglot.builtins import as_bytes
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -225,15 +225,15 @@ class EPUBOutput(OutputFormatPlugin):
identifiers = oeb.metadata['identifier'] identifiers = oeb.metadata['identifier']
uuid = None uuid = None
for x in identifiers: for x in identifiers:
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type(x).startswith('urn:uuid:'): if x.get(OPF('scheme'), None).lower() == 'uuid' or str(x).startswith('urn:uuid:'):
uuid = unicode_type(x).split(':')[-1] uuid = str(x).split(':')[-1]
break break
encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', []) encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', [])
if uuid is None: if uuid is None:
self.log.warn('No UUID identifier found') self.log.warn('No UUID identifier found')
from uuid import uuid4 from uuid import uuid4
uuid = unicode_type(uuid4()) uuid = str(uuid4())
oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid) oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid)
if encrypted_fonts and not uuid.startswith('urn:uuid:'): if encrypted_fonts and not uuid.startswith('urn:uuid:'):
@@ -241,7 +241,7 @@ class EPUBOutput(OutputFormatPlugin):
# for some absurd reason, or it will throw a hissy fit and refuse # for some absurd reason, or it will throw a hissy fit and refuse
# to use the obfuscated fonts. # to use the obfuscated fonts.
for x in identifiers: for x in identifiers:
if unicode_type(x) == uuid: if str(x) == uuid:
x.content = 'urn:uuid:'+uuid x.content = 'urn:uuid:'+uuid
with TemporaryDirectory('_epub_output') as tdir: with TemporaryDirectory('_epub_output') as tdir:
@@ -336,7 +336,7 @@ class EPUBOutput(OutputFormatPlugin):
f.write(bytes(bytearray(data[i] ^ key[i%16] for i in range(1024)))) f.write(bytes(bytearray(data[i] ^ key[i%16] for i in range(1024))))
else: else:
self.log.warn('Font', path, 'is invalid, ignoring') self.log.warn('Font', path, 'is invalid, ignoring')
if not isinstance(uri, unicode_type): if not isinstance(uri, str):
uri = uri.decode('utf-8') uri = uri.decode('utf-8')
fonts.append(''' fonts.append('''
<enc:EncryptedData> <enc:EncryptedData>
@@ -10,7 +10,7 @@ from ebook_converter.customize.conversion import (InputFormatPlugin,
from ebook_converter.utils.localization import get_lang from ebook_converter.utils.localization import get_lang
from ebook_converter.utils.filenames import ascii_filename from ebook_converter.utils.filenames import ascii_filename
from ebook_converter.utils.imghdr import what from ebook_converter.utils.imghdr import what
from ebook_converter.polyglot.builtins import unicode_type, getcwd, as_unicode from ebook_converter.polyglot.builtins import getcwd, as_unicode
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -135,7 +135,7 @@ class HTMLInput(InputFormatPlugin):
if not metadata.title: if not metadata.title:
oeb.logger.warn('Title not specified') oeb.logger.warn('Title not specified')
metadata.add('title', self.oeb.translate(__('Unknown'))) metadata.add('title', self.oeb.translate(__('Unknown')))
bookid = unicode_type(uuid.uuid4()) bookid = str(uuid.uuid4())
metadata.add('identifier', bookid, id='uuid_id', scheme='uuid') metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
for ident in metadata.identifier: for ident in metadata.identifier:
if 'id' in ident.attrib: if 'id' in ident.attrib:
@@ -225,7 +225,7 @@ class HTMLInput(InputFormatPlugin):
def link_to_local_path(self, link_, base=None): def link_to_local_path(self, link_, base=None):
from ebook_converter.ebooks.html.input import Link from ebook_converter.ebooks.html.input import Link
if not isinstance(link_, unicode_type): if not isinstance(link_, str):
try: try:
link_ = link_.decode('utf-8', 'error') link_ = link_.decode('utf-8', 'error')
except: except:
@@ -288,7 +288,7 @@ class HTMLInput(InputFormatPlugin):
# bhref refers to an already existing file. The read() method of # bhref refers to an already existing file. The read() method of
# DirContainer will call unquote on it before trying to read the # DirContainer will call unquote on it before trying to read the
# file, therefore we quote it here. # file, therefore we quote it here.
if isinstance(bhref, unicode_type): if isinstance(bhref, str):
bhref = bhref.encode('utf-8') bhref = bhref.encode('utf-8')
item.html_input_href = as_unicode(urllib.parse.quote(bhref)) item.html_input_href = as_unicode(urllib.parse.quote(bhref))
if guessed in self.OEB_STYLES: if guessed in self.OEB_STYLES:
@@ -8,7 +8,6 @@ from lxml import etree
from ebook_converter import CurrentDir from ebook_converter import CurrentDir
from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation
from ebook_converter.ebooks.oeb.base import element from ebook_converter.ebooks.oeb.base import element
from ebook_converter.polyglot.builtins import unicode_type
from ebook_converter.polyglot.urllib import unquote from ebook_converter.polyglot.urllib import unquote
from ebook_converter.ptempfile import PersistentTemporaryDirectory from ebook_converter.ptempfile import PersistentTemporaryDirectory
from ebook_converter.utils.cleantext import clean_xml_chars from ebook_converter.utils.cleantext import clean_xml_chars
@@ -155,7 +154,7 @@ class HTMLOutput(OutputFormatPlugin):
toc=html_toc, meta=meta, nextLink=nextLink, toc=html_toc, meta=meta, nextLink=nextLink,
tocUrl=tocUrl, cssLink=cssLink, tocUrl=tocUrl, cssLink=cssLink,
firstContentPageLink=nextLink) firstContentPageLink=nextLink)
if isinstance(t, unicode_type): if isinstance(t, str):
t = t.encode('utf-8') t = t.encode('utf-8')
f.write(t) f.write(t)
@@ -4,7 +4,6 @@ import os
from ebook_converter.customize.conversion import OutputFormatPlugin, \ from ebook_converter.customize.conversion import OutputFormatPlugin, \
OptionRecommendation OptionRecommendation
from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL 3' __license__ = 'GPL 3'
@@ -78,9 +77,9 @@ class HTMLZOutput(OutputFormatPlugin):
fname = u'index' fname = u'index'
if opts.htmlz_title_filename: if opts.htmlz_title_filename:
from ebook_converter.utils.filenames import shorten_components_to from ebook_converter.utils.filenames import shorten_components_to
fname = shorten_components_to(100, (ascii_filename(unicode_type(oeb_book.metadata.title[0])),))[0] fname = shorten_components_to(100, (ascii_filename(str(oeb_book.metadata.title[0])),))[0]
with open(os.path.join(tdir, fname+u'.html'), 'wb') as tf: with open(os.path.join(tdir, fname+u'.html'), 'wb') as tf:
if isinstance(html, unicode_type): if isinstance(html, str):
html = html.encode('utf-8') html = html.encode('utf-8')
tf.write(html) tf.write(html)
@@ -2,7 +2,6 @@ import sys, os
from ebook_converter.customize.conversion import OutputFormatPlugin from ebook_converter.customize.conversion import OutputFormatPlugin
from ebook_converter.customize.conversion import OptionRecommendation from ebook_converter.customize.conversion import OptionRecommendation
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -15,7 +14,7 @@ class LRFOptions(object):
def __init__(self, output, opts, oeb): def __init__(self, output, opts, oeb):
def f2s(f): def f2s(f):
try: try:
return unicode_type(f[0]) return str(f[0])
except: except:
return '' return ''
m = oeb.metadata m = oeb.metadata
@@ -29,13 +28,13 @@ class LRFOptions(object):
self.title_sort = self.author_sort = '' self.title_sort = self.author_sort = ''
for x in m.creator: for x in m.creator:
if x.role == 'aut': if x.role == 'aut':
self.author = unicode_type(x) self.author = str(x)
fa = unicode_type(getattr(x, 'file_as', '')) fa = str(getattr(x, 'file_as', ''))
if fa: if fa:
self.author_sort = fa self.author_sort = fa
for x in m.title: for x in m.title:
if unicode_type(x.file_as): if str(x.file_as):
self.title_sort = unicode_type(x.file_as) self.title_sort = str(x.file_as)
self.freetext = f2s(m.description) self.freetext = f2s(m.description)
self.category = f2s(m.subject) self.category = f2s(m.subject)
self.cover = None self.cover = None
@@ -1,7 +1,6 @@
import os import os
from ebook_converter.customize.conversion import InputFormatPlugin from ebook_converter.customize.conversion import InputFormatPlugin
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL 3' __license__ = 'GPL 3'
@@ -50,7 +49,7 @@ class MOBIInput(InputFormatPlugin):
raw = parse_cache.pop('calibre_raw_mobi_markup', False) raw = parse_cache.pop('calibre_raw_mobi_markup', False)
if raw: if raw:
if isinstance(raw, unicode_type): if isinstance(raw, str):
raw = raw.encode('utf-8') raw = raw.encode('utf-8')
with lopen('debug-raw.html', 'wb') as f: with lopen('debug-raw.html', 'wb') as f:
f.write(raw) f.write(raw)
@@ -1,6 +1,5 @@
from ebook_converter.customize.conversion import (OutputFormatPlugin, from ebook_converter.customize.conversion import (OutputFormatPlugin,
OptionRecommendation) OptionRecommendation)
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -119,7 +118,7 @@ class MOBIOutput(OutputFormatPlugin):
if not found: if not found:
from ebook_converter.ebooks import generate_masthead from ebook_converter.ebooks import generate_masthead
self.oeb.log.debug('No masthead found in manifest, generating default mastheadImage...') self.oeb.log.debug('No masthead found in manifest, generating default mastheadImage...')
raw = generate_masthead(unicode_type(self.oeb.metadata['title'][0])) raw = generate_masthead(str(self.oeb.metadata['title'][0]))
id, href = self.oeb.manifest.generate('masthead', 'masthead') id, href = self.oeb.manifest.generate('masthead', 'masthead')
self.oeb.manifest.add(id, href, 'image/gif', data=raw) self.oeb.manifest.add(id, href, 'image/gif', data=raw)
self.oeb.guide.add('masthead', 'Masthead Image', href) self.oeb.guide.add('masthead', 'Masthead Image', href)
@@ -163,7 +162,7 @@ class MOBIOutput(OutputFormatPlugin):
sec.nodes.remove(a) sec.nodes.remove(a)
root = TOC(klass='periodical', href=self.oeb.spine[0].href, root = TOC(klass='periodical', href=self.oeb.spine[0].href,
title=unicode_type(self.oeb.metadata.title[0])) title=str(self.oeb.metadata.title[0]))
for s in sections: for s in sections:
if articles[id(s)]: if articles[id(s)]:
@@ -6,7 +6,7 @@ import glob, os
from ebook_converter.customize.conversion import (OutputFormatPlugin, from ebook_converter.customize.conversion import (OutputFormatPlugin,
OptionRecommendation) OptionRecommendation)
from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL 3' __license__ = 'GPL 3'
@@ -190,8 +190,8 @@ class PDFOutput(OutputFormatPlugin):
def get_cover_data(self): def get_cover_data(self):
oeb = self.oeb oeb = self.oeb
if (oeb.metadata.cover and unicode_type(oeb.metadata.cover[0]) in oeb.manifest.ids): if (oeb.metadata.cover and str(oeb.metadata.cover[0]) in oeb.manifest.ids):
cover_id = unicode_type(oeb.metadata.cover[0]) cover_id = str(oeb.metadata.cover[0])
item = oeb.manifest.ids[cover_id] item = oeb.manifest.ids[cover_id]
self.cover_data = item.data self.cover_data = item.data
@@ -3,7 +3,6 @@ import os, io
from ebook_converter.customize.conversion import (OutputFormatPlugin, from ebook_converter.customize.conversion import (OutputFormatPlugin,
OptionRecommendation) OptionRecommendation)
from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL 3' __license__ = 'GPL 3'
@@ -40,7 +39,7 @@ class PMLOutput(OutputFormatPlugin):
with TemporaryDirectory('_pmlz_output') as tdir: with TemporaryDirectory('_pmlz_output') as tdir:
pmlmlizer = PMLMLizer(log) pmlmlizer = PMLMLizer(log)
pml = unicode_type(pmlmlizer.extract_content(oeb_book, opts)) pml = str(pmlmlizer.extract_content(oeb_book, opts))
with lopen(os.path.join(tdir, 'index.pml'), 'wb') as out: with lopen(os.path.join(tdir, 'index.pml'), 'wb') as out:
out.write(pml.encode(opts.pml_output_encoding, 'replace')) out.write(pml.encode(opts.pml_output_encoding, 'replace'))
@@ -3,7 +3,6 @@ import os
from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation
from ebook_converter.constants import numeric_version from ebook_converter.constants import numeric_version
from ebook_converter import walk from ebook_converter import walk
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -161,6 +160,6 @@ class RecipeInput(InputFormatPlugin):
def save_download(self, zf): def save_download(self, zf):
raw = self.recipe_source raw = self.recipe_source
if isinstance(raw, unicode_type): if isinstance(raw, str):
raw = raw.encode('utf-8') raw = raw.encode('utf-8')
zf.writestr('download.recipe', raw) zf.writestr('download.recipe', raw)
@@ -3,7 +3,6 @@ import os
from ebook_converter.customize.conversion import InputFormatPlugin from ebook_converter.customize.conversion import InputFormatPlugin
from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.utils.filenames import ascii_filename from ebook_converter.utils.filenames import ascii_filename
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL 3' __license__ = 'GPL 3'
@@ -73,7 +72,7 @@ class SNBInput(InputFormatPlugin):
if d['cover'] != '': if d['cover'] != '':
oeb.guide.add('cover', 'Cover', d['cover']) oeb.guide.add('cover', 'Cover', d['cover'])
bookid = unicode_type(uuid.uuid4()) bookid = str(uuid.uuid4())
oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid') oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid')
for ident in oeb.metadata.identifier: for ident in oeb.metadata.identifier:
if 'id' in ident.attrib: if 'id' in ident.attrib:
@@ -3,7 +3,6 @@ import os
from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation
from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.constants import __appname__, __version__ from ebook_converter.constants import __appname__, __version__
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL 3' __license__ = 'GPL 3'
@@ -73,20 +72,20 @@ class SNBOutput(OutputFormatPlugin):
# Process Meta data # Process Meta data
meta = oeb_book.metadata meta = oeb_book.metadata
if meta.title: if meta.title:
title = unicode_type(meta.title[0]) title = str(meta.title[0])
else: else:
title = '' title = ''
authors = [unicode_type(x) for x in meta.creator if x.role == 'aut'] authors = [str(x) for x in meta.creator if x.role == 'aut']
if meta.publisher: if meta.publisher:
publishers = unicode_type(meta.publisher[0]) publishers = str(meta.publisher[0])
else: else:
publishers = '' publishers = ''
if meta.language: if meta.language:
lang = unicode_type(meta.language[0]).upper() lang = str(meta.language[0]).upper()
else: else:
lang = '' lang = ''
if meta.description: if meta.description:
abstract = unicode_type(meta.description[0]) abstract = str(meta.description[0])
else: else:
abstract = '' abstract = ''
+4 -4
View File
@@ -13,7 +13,7 @@ from ebook_converter.utils.zipfile import ZipFile
from ebook_converter import (extract, walk, isbytestring, filesystem_encoding, from ebook_converter import (extract, walk, isbytestring, filesystem_encoding,
get_types_map) get_types_map)
from ebook_converter.constants import __version__ from ebook_converter.constants import __version__
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes from ebook_converter.polyglot.builtins import string_or_bytes
__license__ = 'GPL 3' __license__ = 'GPL 3'
@@ -795,7 +795,7 @@ OptionRecommendation(name='search_replace',
def unarchive(self, path, tdir): def unarchive(self, path, tdir):
extract(path, tdir) extract(path, tdir)
files = list(walk(tdir)) files = list(walk(tdir))
files = [f if isinstance(f, unicode_type) else f.decode(filesystem_encoding) files = [f if isinstance(f, str) else f.decode(filesystem_encoding)
for f in files] for f in files]
from ebook_converter.customize.ui import available_input_formats from ebook_converter.customize.ui import available_input_formats
fmts = set(available_input_formats()) fmts = set(available_input_formats())
@@ -848,7 +848,7 @@ OptionRecommendation(name='search_replace',
rec = self.get_option_by_name(name) rec = self.get_option_by_name(name)
help = getattr(rec, 'help', None) help = getattr(rec, 'help', None)
if help is not None: if help is not None:
return help.replace('%default', unicode_type(rec.recommended_value)) return help.replace('%default', str(rec.recommended_value))
def get_all_help(self): def get_all_help(self):
ans = {} ans = {}
@@ -916,7 +916,7 @@ OptionRecommendation(name='search_replace',
try: try:
val = parse_date(val, assume_utc=x=='timestamp') val = parse_date(val, assume_utc=x=='timestamp')
except: except:
self.log.exception(_('Failed to parse date/time') + ' ' + unicode_type(val)) self.log.exception(_('Failed to parse date/time') + ' ' + str(val))
continue continue
setattr(mi, x, val) setattr(mi, x, val)
+12 -13
View File
@@ -2,7 +2,6 @@ import functools, re, json
from math import ceil from math import ceil
from ebook_converter import entity_to_unicode, as_unicode from ebook_converter import entity_to_unicode, as_unicode
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -72,8 +71,8 @@ def smarten_punctuation(html, log=None):
from ebook_converter.ebooks.conversion.utils import HeuristicProcessor from ebook_converter.ebooks.conversion.utils import HeuristicProcessor
preprocessor = HeuristicProcessor(log=log) preprocessor = HeuristicProcessor(log=log)
from uuid import uuid4 from uuid import uuid4
start = 'calibre-smartypants-'+unicode_type(uuid4()) start = 'calibre-smartypants-'+str(uuid4())
stop = 'calibre-smartypants-'+unicode_type(uuid4()) stop = 'calibre-smartypants-'+str(uuid4())
html = html.replace('<!--', start) html = html.replace('<!--', start)
html = html.replace('-->', stop) html = html.replace('-->', stop)
html = preprocessor.fix_nbsp_indents(html) html = preprocessor.fix_nbsp_indents(html)
@@ -149,20 +148,20 @@ class DocAnalysis(object):
maxLineLength=1900 # Discard larger than this to stay in range maxLineLength=1900 # Discard larger than this to stay in range
buckets=20 # Each line is divided into a bucket based on length buckets=20 # Each line is divided into a bucket based on length
# print("there are "+unicode_type(len(lines))+" lines") # print("there are "+str(len(lines))+" lines")
# max = 0 # max = 0
# for line in self.lines: # for line in self.lines:
# l = len(line) # l = len(line)
# if l > max: # if l > max:
# max = l # max = l
# print("max line found is "+unicode_type(max)) # print("max line found is "+str(max))
# Build the line length histogram # Build the line length histogram
hRaw = [0 for i in range(0,buckets)] hRaw = [0 for i in range(0,buckets)]
for line in self.lines: for line in self.lines:
l = len(line) l = len(line)
if l > minLineLength and l < maxLineLength: if l > minLineLength and l < maxLineLength:
l = int(l // 100) l = int(l // 100)
# print("adding "+unicode_type(l)) # print("adding "+str(l))
hRaw[l]+=1 hRaw[l]+=1
# Normalize the histogram into percents # Normalize the histogram into percents
@@ -171,8 +170,8 @@ class DocAnalysis(object):
h = [float(count)/totalLines for count in hRaw] h = [float(count)/totalLines for count in hRaw]
else: else:
h = [] h = []
# print("\nhRaw histogram lengths are: "+unicode_type(hRaw)) # print("\nhRaw histogram lengths are: "+str(hRaw))
# print(" percents are: "+unicode_type(h)+"\n") # print(" percents are: "+str(h)+"\n")
# Find the biggest bucket # Find the biggest bucket
maxValue = 0 maxValue = 0
@@ -184,7 +183,7 @@ class DocAnalysis(object):
# print("Line lengths are too variable. Not unwrapping.") # print("Line lengths are too variable. Not unwrapping.")
return False return False
else: else:
# print(unicode_type(maxValue)+" of the lines were in one bucket") # print(str(maxValue)+" of the lines were in one bucket")
return True return True
@@ -220,8 +219,8 @@ class Dehyphenator(object):
wraptags = match.group('wraptags') wraptags = match.group('wraptags')
except: except:
wraptags = '' wraptags = ''
hyphenated = unicode_type(firsthalf) + "-" + unicode_type(secondhalf) hyphenated = str(firsthalf) + "-" + str(secondhalf)
dehyphenated = unicode_type(firsthalf) + unicode_type(secondhalf) dehyphenated = str(firsthalf) + str(secondhalf)
if self.suffixes.match(secondhalf) is None: if self.suffixes.match(secondhalf) is None:
lookupword = self.removesuffixes.sub('', dehyphenated) lookupword = self.removesuffixes.sub('', dehyphenated)
else: else:
@@ -327,7 +326,7 @@ class CSSPreProcessor(object):
# are commented lines before the first @import or @charset rule. Since # are commented lines before the first @import or @charset rule. Since
# the conversion will remove all stylesheets anyway, we don't lose # the conversion will remove all stylesheets anyway, we don't lose
# anything # anything
data = re.sub(unicode_type(r'/\*.*?\*/'), '', data, flags=re.DOTALL) data = re.sub(str(r'/\*.*?\*/'), '', data, flags=re.DOTALL)
ans, namespaced = [], False ans, namespaced = [], False
for line in data.splitlines(): for line in data.splitlines():
@@ -535,7 +534,7 @@ class HTMLPreProcessor(object):
docanalysis = DocAnalysis('pdf', html) docanalysis = DocAnalysis('pdf', html)
length = docanalysis.line_length(getattr(self.extra_opts, 'unwrap_factor')) length = docanalysis.line_length(getattr(self.extra_opts, 'unwrap_factor'))
if length: if length:
# print("The pdf line length returned is " + unicode_type(length)) # print("The pdf line length returned is " + str(length))
# unwrap em/en dashes # unwrap em/en dashes
end_rules.append((re.compile( end_rules.append((re.compile(
r'(?<=.{%i}[–—])\s*<p>\s*(?=[\[a-z\d])' % length), lambda match: '')) r'(?<=.{%i}[–—])\s*<p>\s*(?=[\[a-z\d])' % length), lambda match: ''))
+63 -64
View File
@@ -3,7 +3,6 @@ from math import ceil
from ebook_converter.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator from ebook_converter.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator
from ebook_converter.utils.logging import default_log from ebook_converter.utils.logging import default_log
from ebook_converter.utils.wordcount import get_wordcount_obj from ebook_converter.utils.wordcount import get_wordcount_obj
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -51,8 +50,8 @@ class HeuristicProcessor(object):
title = match.group('title') title = match.group('title')
if not title: if not title:
self.html_preprocess_sections = self.html_preprocess_sections + 1 self.html_preprocess_sections = self.html_preprocess_sections + 1
self.log.debug("marked " + unicode_type(self.html_preprocess_sections) + self.log.debug("marked " + str(self.html_preprocess_sections) +
" chapters. - " + unicode_type(chap)) " chapters. - " + str(chap))
return '<h2>'+chap+'</h2>\n' return '<h2>'+chap+'</h2>\n'
else: else:
delete_whitespace = re.compile('^\\s*(?P<c>.*?)\\s*$') delete_whitespace = re.compile('^\\s*(?P<c>.*?)\\s*$')
@@ -60,16 +59,16 @@ class HeuristicProcessor(object):
txt_chap = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(chap))) txt_chap = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(chap)))
txt_title = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(title))) txt_title = delete_quotes.sub('', delete_whitespace.sub('\\g<c>', html2text(title)))
self.html_preprocess_sections = self.html_preprocess_sections + 1 self.html_preprocess_sections = self.html_preprocess_sections + 1
self.log.debug("marked " + unicode_type(self.html_preprocess_sections) + self.log.debug("marked " + str(self.html_preprocess_sections) +
" chapters & titles. - " + unicode_type(chap) + ", " + unicode_type(title)) " chapters & titles. - " + str(chap) + ", " + str(title))
return '<h2 title="'+txt_chap+', '+txt_title+'">'+chap+'</h2>\n<h3 class="sigilNotInTOC">'+title+'</h3>\n' return '<h2 title="'+txt_chap+', '+txt_title+'">'+chap+'</h2>\n<h3 class="sigilNotInTOC">'+title+'</h3>\n'
def chapter_break(self, match): def chapter_break(self, match):
chap = match.group('section') chap = match.group('section')
styles = match.group('styles') styles = match.group('styles')
self.html_preprocess_sections = self.html_preprocess_sections + 1 self.html_preprocess_sections = self.html_preprocess_sections + 1
self.log.debug("marked " + unicode_type(self.html_preprocess_sections) + self.log.debug("marked " + str(self.html_preprocess_sections) +
" section markers based on punctuation. - " + unicode_type(chap)) " section markers based on punctuation. - " + str(chap))
return '<'+styles+' style="page-break-before:always">'+chap return '<'+styles+' style="page-break-before:always">'+chap
def analyze_title_matches(self, match): def analyze_title_matches(self, match):
@@ -112,8 +111,8 @@ class HeuristicProcessor(object):
line_end = line_end_ere.findall(raw) line_end = line_end_ere.findall(raw)
tot_htm_ends = len(htm_end) tot_htm_ends = len(htm_end)
tot_ln_fds = len(line_end) tot_ln_fds = len(line_end)
# self.log.debug("There are " + unicode_type(tot_ln_fds) + " total Line feeds, and " + # self.log.debug("There are " + str(tot_ln_fds) + " total Line feeds, and " +
# unicode_type(tot_htm_ends) + " marked up endings") # str(tot_htm_ends) + " marked up endings")
if percent > 1: if percent > 1:
percent = 1 percent = 1
@@ -121,7 +120,7 @@ class HeuristicProcessor(object):
percent = 0 percent = 0
min_lns = tot_ln_fds * percent min_lns = tot_ln_fds * percent
# self.log.debug("There must be fewer than " + unicode_type(min_lns) + " unmarked lines to add markup") # self.log.debug("There must be fewer than " + str(min_lns) + " unmarked lines to add markup")
return min_lns > tot_htm_ends return min_lns > tot_htm_ends
def dump(self, raw, where): def dump(self, raw, where):
@@ -158,17 +157,17 @@ class HeuristicProcessor(object):
] ]
ITALICIZE_STYLE_PATS = [ ITALICIZE_STYLE_PATS = [
unicode_type(r'(?msu)(?<=[\s>"\'])_\*/(?P<words>[^\*_]+)/\*_'), str(r'(?msu)(?<=[\s>"\'])_\*/(?P<words>[^\*_]+)/\*_'),
unicode_type(r'(?msu)(?<=[\s>"\'])~~(?P<words>[^~]+)~~'), str(r'(?msu)(?<=[\s>"\'])~~(?P<words>[^~]+)~~'),
unicode_type(r'(?msu)(?<=[\s>"\'])_/(?P<words>[^/_]+)/_'), str(r'(?msu)(?<=[\s>"\'])_/(?P<words>[^/_]+)/_'),
unicode_type(r'(?msu)(?<=[\s>"\'])_\*(?P<words>[^\*_]+)\*_'), str(r'(?msu)(?<=[\s>"\'])_\*(?P<words>[^\*_]+)\*_'),
unicode_type(r'(?msu)(?<=[\s>"\'])\*/(?P<words>[^/\*]+)/\*'), str(r'(?msu)(?<=[\s>"\'])\*/(?P<words>[^/\*]+)/\*'),
unicode_type(r'(?msu)(?<=[\s>"\'])/:(?P<words>[^:/]+):/'), str(r'(?msu)(?<=[\s>"\'])/:(?P<words>[^:/]+):/'),
unicode_type(r'(?msu)(?<=[\s>"\'])\|:(?P<words>[^:\|]+):\|'), str(r'(?msu)(?<=[\s>"\'])\|:(?P<words>[^:\|]+):\|'),
unicode_type(r'(?msu)(?<=[\s>"\'])\*(?P<words>[^\*]+)\*'), str(r'(?msu)(?<=[\s>"\'])\*(?P<words>[^\*]+)\*'),
unicode_type(r'(?msu)(?<=[\s>"\'])~(?P<words>[^~]+)~'), str(r'(?msu)(?<=[\s>"\'])~(?P<words>[^~]+)~'),
unicode_type(r'(?msu)(?<=[\s>"\'])/(?P<words>[^/\*><]+)/'), str(r'(?msu)(?<=[\s>"\'])/(?P<words>[^/\*><]+)/'),
unicode_type(r'(?msu)(?<=[\s>"\'])_(?P<words>[^_]+)_'), str(r'(?msu)(?<=[\s>"\'])_(?P<words>[^_]+)_'),
] ]
for word in ITALICIZE_WORDS: for word in ITALICIZE_WORDS:
@@ -178,10 +177,10 @@ class HeuristicProcessor(object):
search_text = re.sub(r'<[^>]*>', '', search_text) search_text = re.sub(r'<[^>]*>', '', search_text)
for pat in ITALICIZE_STYLE_PATS: for pat in ITALICIZE_STYLE_PATS:
for match in re.finditer(pat, search_text): for match in re.finditer(pat, search_text):
ital_string = unicode_type(match.group('words')) ital_string = str(match.group('words'))
# self.log.debug("italicising "+unicode_type(match.group(0))+" with <i>"+ital_string+"</i>") # self.log.debug("italicising "+str(match.group(0))+" with <i>"+ital_string+"</i>")
try: try:
html = re.sub(re.escape(unicode_type(match.group(0))), '<i>%s</i>' % ital_string, html) html = re.sub(re.escape(str(match.group(0))), '<i>%s</i>' % ital_string, html)
except OverflowError: except OverflowError:
# match.group(0) was too large to be compiled into a regex # match.group(0) was too large to be compiled into a regex
continue continue
@@ -206,10 +205,10 @@ class HeuristicProcessor(object):
if wordcount > 200000: if wordcount > 200000:
typical_chapters = 15000. typical_chapters = 15000.
self.min_chapters = int(ceil(wordcount / typical_chapters)) self.min_chapters = int(ceil(wordcount / typical_chapters))
self.log.debug("minimum chapters required are: "+unicode_type(self.min_chapters)) self.log.debug("minimum chapters required are: "+str(self.min_chapters))
heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE) heading = re.compile('<h[1-3][^>]*>', re.IGNORECASE)
self.html_preprocess_sections = len(heading.findall(html)) self.html_preprocess_sections = len(heading.findall(html))
self.log.debug("found " + unicode_type(self.html_preprocess_sections) + " pre-existing headings") self.log.debug("found " + str(self.html_preprocess_sections) + " pre-existing headings")
# Build the Regular Expressions in pieces # Build the Regular Expressions in pieces
init_lookahead = "(?=<(p|div))" init_lookahead = "(?=<(p|div))"
@@ -299,7 +298,7 @@ class HeuristicProcessor(object):
if n_lookahead_req: if n_lookahead_req:
n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line) n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line)
if not analyze: if not analyze:
self.log.debug("Marked " + unicode_type(self.html_preprocess_sections) + " headings, " + log_message) self.log.debug("Marked " + str(self.html_preprocess_sections) + " headings, " + log_message)
chapter_marker = arg_ignorecase+init_lookahead+full_chapter_line+blank_lines+lp_n_lookahead_open+n_lookahead+lp_n_lookahead_close+ \ chapter_marker = arg_ignorecase+init_lookahead+full_chapter_line+blank_lines+lp_n_lookahead_open+n_lookahead+lp_n_lookahead_close+ \
lp_opt_title_open+title_line_open+title_header_open+lp_title+title_header_close+title_line_close+lp_opt_title_close lp_opt_title_open+title_line_open+title_header_open+lp_title+title_header_close+title_line_close+lp_opt_title_close
@@ -313,10 +312,10 @@ class HeuristicProcessor(object):
title_req = True title_req = True
strict_title = False strict_title = False
self.log.debug( self.log.debug(
unicode_type(type_name)+" had "+unicode_type(hits)+ str(type_name)+" had "+str(hits)+
" hits - "+unicode_type(self.chapters_no_title)+" chapters with no title, "+ " hits - "+str(self.chapters_no_title)+" chapters with no title, "+
unicode_type(self.chapters_with_title)+" chapters with titles, "+ str(self.chapters_with_title)+" chapters with titles, "+
unicode_type(float(self.chapters_with_title) / float(hits))+" percent. ") str(float(self.chapters_with_title) / float(hits))+" percent. ")
if type_name == 'common': if type_name == 'common':
analysis_result.append([chapter_type, n_lookahead_req, strict_title, ignorecase, title_req, log_message, type_name]) analysis_result.append([chapter_type, n_lookahead_req, strict_title, ignorecase, title_req, log_message, type_name])
elif self.min_chapters <= hits < max_chapters or self.min_chapters < 3 > hits: elif self.min_chapters <= hits < max_chapters or self.min_chapters < 3 > hits:
@@ -333,8 +332,8 @@ class HeuristicProcessor(object):
words_per_chptr = wordcount words_per_chptr = wordcount
if words_per_chptr > 0 and self.html_preprocess_sections > 0: if words_per_chptr > 0 and self.html_preprocess_sections > 0:
words_per_chptr = wordcount // self.html_preprocess_sections words_per_chptr = wordcount // self.html_preprocess_sections
self.log.debug("Total wordcount is: "+ unicode_type(wordcount)+", Average words per section is: "+ self.log.debug("Total wordcount is: "+ str(wordcount)+", Average words per section is: "+
unicode_type(words_per_chptr)+", Marked up "+unicode_type(self.html_preprocess_sections)+" chapters") str(words_per_chptr)+", Marked up "+str(self.html_preprocess_sections)+" chapters")
return html return html
def punctuation_unwrap(self, length, content, format): def punctuation_unwrap(self, length, content, format):
@@ -364,8 +363,8 @@ class HeuristicProcessor(object):
# define the pieces of the regex # define the pieces of the regex
# (?<!\&\w{4});) is a semicolon not part of an entity # (?<!\&\w{4});) is a semicolon not part of an entity
lookahead = "(?<=.{"+unicode_type(length)+r"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IAß]|(?<!\&\w{4});))" lookahead = "(?<=.{"+str(length)+r"}([a-zა-ჰäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IAß]|(?<!\&\w{4});))"
em_en_lookahead = "(?<=.{"+unicode_type(length)+"}[\u2013\u2014])" em_en_lookahead = "(?<=.{"+str(length)+"}[\u2013\u2014])"
soft_hyphen = "\xad" soft_hyphen = "\xad"
line_ending = "\\s*(?P<style_close></(span|[iub])>)?\\s*(</(p|div)>)?" line_ending = "\\s*(?P<style_close></(span|[iub])>)?\\s*(</(p|div)>)?"
blanklines = "\\s*(?P<up2threeblanks><(p|span|div)[^>]*>\\s*(<(p|span|div)[^>]*>\\s*</(span|p|div)>\\s*)</(span|p|div)>\\s*){0,3}\\s*" blanklines = "\\s*(?P<up2threeblanks><(p|span|div)[^>]*>\\s*(<(p|span|div)[^>]*>\\s*</(span|p|div)>\\s*)</(span|p|div)>\\s*){0,3}\\s*"
@@ -425,18 +424,18 @@ class HeuristicProcessor(object):
return html return html
def fix_nbsp_indents(self, html): def fix_nbsp_indents(self, html):
txtindent = re.compile(unicode_type(r'<(?P<tagtype>p|div)(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}'), re.IGNORECASE) txtindent = re.compile(str(r'<(?P<tagtype>p|div)(?P<formatting>[^>]*)>\s*(?P<span>(<span[^>]*>\s*)+)?\s*(\u00a0){2,}'), re.IGNORECASE)
html = txtindent.sub(self.insert_indent, html) html = txtindent.sub(self.insert_indent, html)
if self.found_indents > 1: if self.found_indents > 1:
self.log.debug("replaced "+unicode_type(self.found_indents)+ " nbsp indents with inline styles") self.log.debug("replaced "+str(self.found_indents)+ " nbsp indents with inline styles")
return html return html
def cleanup_markup(self, html): def cleanup_markup(self, html):
# remove remaining non-breaking spaces # remove remaining non-breaking spaces
html = re.sub(unicode_type(r'\u00a0'), ' ', html) html = re.sub(str(r'\u00a0'), ' ', html)
# Get rid of various common microsoft specific tags which can cause issues later # Get rid of various common microsoft specific tags which can cause issues later
# Get rid of empty <o:p> tags to simplify other processing # Get rid of empty <o:p> tags to simplify other processing
html = re.sub(unicode_type(r'\s*<o:p>\s*</o:p>'), ' ', html) html = re.sub(str(r'\s*<o:p>\s*</o:p>'), ' ', html)
# Delete microsoft 'smart' tags # Delete microsoft 'smart' tags
html = re.sub('(?i)</?st1:\\w+>', '', html) html = re.sub('(?i)</?st1:\\w+>', '', html)
# Re-open self closing paragraph tags # Re-open self closing paragraph tags
@@ -476,8 +475,8 @@ class HeuristicProcessor(object):
blanklines = self.blankreg.findall(html) blanklines = self.blankreg.findall(html)
lines = self.linereg.findall(html) lines = self.linereg.findall(html)
if len(lines) > 1: if len(lines) > 1:
self.log.debug("There are " + unicode_type(len(blanklines)) + " blank lines. " + self.log.debug("There are " + str(len(blanklines)) + " blank lines. " +
unicode_type(float(len(blanklines)) / float(len(lines))) + " percent blank") str(float(len(blanklines)) / float(len(lines))) + " percent blank")
if float(len(blanklines)) / float(len(lines)) > 0.40: if float(len(blanklines)) / float(len(lines)) > 0.40:
return True return True
@@ -499,11 +498,11 @@ class HeuristicProcessor(object):
lines = float(len(self.single_blank.findall(to_merge))) - 1. lines = float(len(self.single_blank.findall(to_merge))) - 1.
em = base_em + (em_per_line * lines) em = base_em + (em_per_line * lines)
if to_merge.find('whitespace'): if to_merge.find('whitespace'):
newline = self.any_multi_blank.sub('\n<p class="whitespace'+unicode_type(int(em * 10))+ newline = self.any_multi_blank.sub('\n<p class="whitespace'+str(int(em * 10))+
'" style="text-align:center; margin-top:'+unicode_type(em)+'em"> </p>', match.group(0)) '" style="text-align:center; margin-top:'+str(em)+'em"> </p>', match.group(0))
else: else:
newline = self.any_multi_blank.sub('\n<p class="softbreak'+unicode_type(int(em * 10))+ newline = self.any_multi_blank.sub('\n<p class="softbreak'+str(int(em * 10))+
'" style="text-align:center; margin-top:'+unicode_type(em)+'em"> </p>', match.group(0)) '" style="text-align:center; margin-top:'+str(em)+'em"> </p>', match.group(0))
return newline return newline
html = self.any_multi_blank.sub(merge_matches, html) html = self.any_multi_blank.sub(merge_matches, html)
@@ -527,9 +526,9 @@ class HeuristicProcessor(object):
top_margin = '' top_margin = ''
bottom_margin = '' bottom_margin = ''
if initblanks is not None: if initblanks is not None:
top_margin = 'margin-top:'+unicode_type(len(self.single_blank.findall(initblanks)))+'em;' top_margin = 'margin-top:'+str(len(self.single_blank.findall(initblanks)))+'em;'
if endblanks is not None: if endblanks is not None:
bottom_margin = 'margin-bottom:'+unicode_type(len(self.single_blank.findall(endblanks)))+'em;' bottom_margin = 'margin-bottom:'+str(len(self.single_blank.findall(endblanks)))+'em;'
if initblanks is None and endblanks is None: if initblanks is None and endblanks is None:
return content return content
@@ -606,7 +605,7 @@ class HeuristicProcessor(object):
else: else:
replacement_break = re.sub('(?i)(width=\\d+\\%?|width:\\s*\\d+(\\%|px|pt|em)?;?)', '', replacement_break) replacement_break = re.sub('(?i)(width=\\d+\\%?|width:\\s*\\d+(\\%|px|pt|em)?;?)', '', replacement_break)
divpercent = (100 - width) // 2 divpercent = (100 - width) // 2
hr_open = re.sub('45', unicode_type(divpercent), hr_open) hr_open = re.sub('45', str(divpercent), hr_open)
scene_break = hr_open+replacement_break+'</div>' scene_break = hr_open+replacement_break+'</div>'
else: else:
scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>' scene_break = hr_open+'<hr style="height: 3px; background:#505050" /></div>'
@@ -666,12 +665,12 @@ class HeuristicProcessor(object):
else: else:
styles = match.group('styles').split(';') styles = match.group('styles').split(';')
is_paragraph = self.check_paragraph(content) is_paragraph = self.check_paragraph(content)
# print "styles for this line are: "+unicode_type(styles) # print "styles for this line are: "+str(styles)
split_styles = [] split_styles = []
for style in styles: for style in styles:
# print "style is: "+unicode_type(style) # print "style is: "+str(style)
newstyle = style.split(':') newstyle = style.split(':')
# print "newstyle is: "+unicode_type(newstyle) # print "newstyle is: "+str(newstyle)
split_styles.append(newstyle) split_styles.append(newstyle)
styles = split_styles styles = split_styles
for style, setting in styles: for style, setting in styles:
@@ -682,7 +681,7 @@ class HeuristicProcessor(object):
if 9 < setting < 14: if 9 < setting < 14:
text_indent = indented_text text_indent = indented_text
else: else:
text_indent = style+':'+unicode_type(setting)+'pt;' text_indent = style+':'+str(setting)+'pt;'
if style == 'padding': if style == 'padding':
setting = re.sub('pt', '', setting).split(' ') setting = re.sub('pt', '', setting).split(' ')
if int(setting[1]) < 16 and int(setting[3]) < 16: if int(setting[1]) < 16 and int(setting[3]) < 16:
@@ -703,23 +702,23 @@ class HeuristicProcessor(object):
blockquote_open_loop = blockquote_open blockquote_open_loop = blockquote_open
if debugabby: if debugabby:
self.log.debug('\n\n******\n') self.log.debug('\n\n******\n')
self.log.debug('padding top is: '+unicode_type(setting[0])) self.log.debug('padding top is: '+str(setting[0]))
self.log.debug('padding right is:' +unicode_type(setting[1])) self.log.debug('padding right is:' +str(setting[1]))
self.log.debug('padding bottom is: ' + unicode_type(setting[2])) self.log.debug('padding bottom is: ' + str(setting[2]))
self.log.debug('padding left is: ' +unicode_type(setting[3])) self.log.debug('padding left is: ' +str(setting[3]))
# print "text-align is: "+unicode_type(text_align) # print "text-align is: "+str(text_align)
# print "\n***\nline is:\n "+unicode_type(match.group(0))+'\n' # print "\n***\nline is:\n "+str(match.group(0))+'\n'
if debugabby: if debugabby:
# print "this line is a paragraph = "+unicode_type(is_paragraph)+", previous line was "+unicode_type(self.previous_was_paragraph) # print "this line is a paragraph = "+str(is_paragraph)+", previous line was "+str(self.previous_was_paragraph)
self.log.debug("styles for this line were:", styles) self.log.debug("styles for this line were:", styles)
self.log.debug('newline is:') self.log.debug('newline is:')
self.log.debug(blockquote_open_loop+blockquote_close_loop+ self.log.debug(blockquote_open_loop+blockquote_close_loop+
paragraph_before+'<p style="'+text_indent+text_align+ paragraph_before+'<p style="'+text_indent+text_align+
'">'+content+'</p>'+paragraph_after+'\n\n\n\n\n') '">'+content+'</p>'+paragraph_after+'\n\n\n\n\n')
# print "is_paragraph is "+unicode_type(is_paragraph)+", previous_was_paragraph is "+unicode_type(self.previous_was_paragraph) # print "is_paragraph is "+str(is_paragraph)+", previous_was_paragraph is "+str(self.previous_was_paragraph)
self.previous_was_paragraph = is_paragraph self.previous_was_paragraph = is_paragraph
# print "previous_was_paragraph is now set to "+unicode_type(self.previous_was_paragraph)+"\n\n\n" # print "previous_was_paragraph is now set to "+str(self.previous_was_paragraph)+"\n\n\n"
return blockquote_open_loop+blockquote_close_loop+paragraph_before+'<p style="'+text_indent+text_align+'">'+content+'</p>'+paragraph_after return blockquote_open_loop+blockquote_close_loop+paragraph_before+'<p style="'+text_indent+text_align+'">'+content+'</p>'+paragraph_after
html = abbyy_line.sub(convert_styles, html) html = abbyy_line.sub(convert_styles, html)
@@ -802,12 +801,12 @@ class HeuristicProcessor(object):
# more of the lines break in the same region of the document then unwrapping is required # more of the lines break in the same region of the document then unwrapping is required
docanalysis = DocAnalysis(format, html) docanalysis = DocAnalysis(format, html)
hardbreaks = docanalysis.line_histogram(.50) hardbreaks = docanalysis.line_histogram(.50)
self.log.debug("Hard line breaks check returned "+unicode_type(hardbreaks)) self.log.debug("Hard line breaks check returned "+str(hardbreaks))
# Calculate Length # Calculate Length
unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4) unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4)
length = docanalysis.line_length(unwrap_factor) length = docanalysis.line_length(unwrap_factor)
self.log.debug("Median line length is " + unicode_type(length) + ", calculated with " + format + " format") self.log.debug("Median line length is " + str(length) + ", calculated with " + format + " format")
# ##### Unwrap lines ###### # ##### Unwrap lines ######
if getattr(self.extra_opts, 'unwrap_lines', False): if getattr(self.extra_opts, 'unwrap_lines', False):
@@ -829,7 +828,7 @@ class HeuristicProcessor(object):
# If still no sections after unwrapping mark split points on lines with no punctuation # If still no sections after unwrapping mark split points on lines with no punctuation
if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False): if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False):
self.log.debug("Looking for more split points based on punctuation," self.log.debug("Looking for more split points based on punctuation,"
" currently have " + unicode_type(self.html_preprocess_sections)) " currently have " + str(self.html_preprocess_sections))
chapdetect3 = re.compile( chapdetect3 = re.compile(
r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([\W]+\s*)+)' r'<(?P<styles>(p|div)[^>]*)>\s*(?P<section>(<span[^>]*>)?\s*(?!([\W]+\s*)+)'
r'(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*' r'(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*(<[ibu][^>]*>){0,2}\s*(<span[^>]*>)?\s*'
+3 -3
View File
@@ -1,5 +1,5 @@
from collections import OrderedDict from collections import OrderedDict
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -49,8 +49,8 @@ class Footnotes(object):
if note is not None and note.type == 'normal': if note is not None and note.type == 'normal':
self.counter += 1 self.counter += 1
anchor = 'note_%d' % self.counter anchor = 'note_%d' % self.counter
self.notes[anchor] = (unicode_type(self.counter), note) self.notes[anchor] = (str(self.counter), note)
return anchor, unicode_type(self.counter) return anchor, str(self.counter)
return None, None return None, None
def __iter__(self): def __iter__(self):
+2 -2
View File
@@ -7,7 +7,7 @@ from lxml.html.builder import OL, UL, SPAN
from ebook_converter.ebooks.docx.block_styles import ParagraphStyle from ebook_converter.ebooks.docx.block_styles import ParagraphStyle
from ebook_converter.ebooks.docx.char_styles import RunStyle, inherit from ebook_converter.ebooks.docx.char_styles import RunStyle, inherit
from ebook_converter.ebooks.metadata import roman from ebook_converter.ebooks.metadata import roman
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -288,7 +288,7 @@ class Numbering(object):
seen_instances.add(num_id) seen_instances.add(num_id)
p.tag = 'li' p.tag = 'li'
p.set('value', '%s' % counter[ilvl]) p.set('value', '%s' % counter[ilvl])
p.set('list-lvl', unicode_type(ilvl)) p.set('list-lvl', str(ilvl))
p.set('list-id', num_id) p.set('list-id', num_id)
if lvl.num_template is not None: if lvl.num_template is not None:
val = lvl.format_template(counter, ilvl, lvl.num_template) val = lvl.format_template(counter, ilvl, lvl.num_template)
+3 -3
View File
@@ -2,7 +2,7 @@ from lxml.html.builder import TABLE, TR, TD
from ebook_converter.ebooks.docx.block_styles import inherit, read_shd as rs, read_border, binary_property, border_props, ParagraphStyle, border_to_css from ebook_converter.ebooks.docx.block_styles import inherit, read_shd as rs, read_border, binary_property, border_props, ParagraphStyle, border_to_css
from ebook_converter.ebooks.docx.char_styles import RunStyle from ebook_converter.ebooks.docx.char_styles import RunStyle
from ebook_converter.polyglot.builtins import iteritems, itervalues, unicode_type from ebook_converter.polyglot.builtins import iteritems, itervalues
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -643,9 +643,9 @@ class Table(object):
td = TD() td = TD()
style_map[td] = s = self.style_map[tc] style_map[td] = s = self.style_map[tc]
if s.col_span is not inherit: if s.col_span is not inherit:
td.set('colspan', unicode_type(s.col_span)) td.set('colspan', str(s.col_span))
if s.row_span is not inherit: if s.row_span is not inherit:
td.set('rowspan', unicode_type(s.row_span)) td.set('rowspan', str(s.row_span))
td.tail = '\n\t\t\t' td.tail = '\n\t\t\t'
tr.append(td) tr.append(td)
for x in self.namespace.XPath('./w:p|./w:tbl')(tc): for x in self.namespace.XPath('./w:p|./w:tbl')(tc):
+3 -3
View File
@@ -21,7 +21,7 @@ from ebook_converter.ebooks.docx.fields import Fields
from ebook_converter.ebooks.docx.settings import Settings from ebook_converter.ebooks.docx.settings import Settings
from ebook_converter.ebooks.metadata.opf2 import OPFCreator from ebook_converter.ebooks.metadata.opf2 import OPFCreator
from ebook_converter.utils.localization import canonicalize_lang, lang_as_iso639_1 from ebook_converter.utils.localization import canonicalize_lang, lang_as_iso639_1
from ebook_converter.polyglot.builtins import iteritems, itervalues, getcwd, unicode_type from ebook_converter.polyglot.builtins import iteritems, itervalues, getcwd
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -476,7 +476,7 @@ class Convert(object):
current_hyperlink = x current_hyperlink = x
elif x.tag.endswith('}instrText') and x.text and x.text.strip().startswith('TOC '): elif x.tag.endswith('}instrText') and x.text and x.text.strip().startswith('TOC '):
old_anchor = current_anchor old_anchor = current_anchor
anchor = unicode_type(uuid.uuid4()) anchor = str(uuid.uuid4())
self.anchor_map[anchor] = current_anchor = generate_anchor('toc', frozenset(itervalues(self.anchor_map))) self.anchor_map[anchor] = current_anchor = generate_anchor('toc', frozenset(itervalues(self.anchor_map)))
self.toc_anchor = current_anchor self.toc_anchor = current_anchor
if old_anchor is not None: if old_anchor is not None:
@@ -493,7 +493,7 @@ class Convert(object):
if m is not None: if m is not None:
n = min(6, max(1, int(m.group(1)))) n = min(6, max(1, int(m.group(1))))
dest.tag = 'h%d' % n dest.tag = 'h%d' % n
dest.set('data-heading-level', unicode_type(n)) dest.set('data-heading-level', str(n))
if style.bidi is True: if style.bidi is True:
dest.set('dir', 'rtl') dest.set('dir', 'rtl')
@@ -11,7 +11,7 @@ from ebook_converter.ebooks.pdf.render.common import PAPER_SIZES
from ebook_converter.utils.date import utcnow from ebook_converter.utils.date import utcnow
from ebook_converter.utils.localization import canonicalize_lang, lang_as_iso639_1 from ebook_converter.utils.localization import canonicalize_lang, lang_as_iso639_1
from ebook_converter.utils.zipfile import ZipFile from ebook_converter.utils.zipfile import ZipFile
from ebook_converter.polyglot.builtins import iteritems, unicode_type, native_string_type from ebook_converter.polyglot.builtins import iteritems, native_string_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -62,9 +62,9 @@ def create_skeleton(opts, namespaces=None):
def margin(which): def margin(which):
val = page_margin(opts, which) val = page_margin(opts, which)
return w(which), unicode_type(int(val * 20)) return w(which), str(int(val * 20))
body.append(E.sectPr( body.append(E.sectPr(
E.pgSz(**{w('w'):unicode_type(width), w('h'):unicode_type(height)}), E.pgSz(**{w('w'):str(width), w('h'):str(height)}),
E.pgMar(**dict(map(margin, 'left top right bottom'.split()))), E.pgMar(**dict(map(margin, 'left top right bottom'.split()))),
E.cols(**{w('space'):'720'}), E.cols(**{w('space'):'720'}),
E.docGrid(**{w('linePitch'):"360"}), E.docGrid(**{w('linePitch'):"360"}),
@@ -11,7 +11,7 @@ from ebook_converter.ebooks.docx.writer.lists import ListsManager
from ebook_converter.ebooks.oeb.stylizer import Stylizer as Sz, Style as St from ebook_converter.ebooks.oeb.stylizer import Stylizer as Sz, Style as St
from ebook_converter.ebooks.oeb.base import XPath, barename from ebook_converter.ebooks.oeb.base import XPath, barename
from ebook_converter.utils.localization import lang_as_iso639_1 from ebook_converter.utils.localization import lang_as_iso639_1
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes from ebook_converter.polyglot.builtins import string_or_bytes
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -98,7 +98,7 @@ class TextRun(object):
for text, preserve_whitespace, bookmark in self.texts: for text, preserve_whitespace, bookmark in self.texts:
if bookmark is not None: if bookmark is not None:
bid = links_manager.bookmark_id bid = links_manager.bookmark_id
makeelement(r, 'w:bookmarkStart', w_id=unicode_type(bid), w_name=bookmark) makeelement(r, 'w:bookmarkStart', w_id=str(bid), w_name=bookmark)
if text is None: if text is None:
makeelement(r, 'w:br', w_clear=preserve_whitespace) makeelement(r, 'w:br', w_clear=preserve_whitespace)
elif hasattr(text, 'xpath'): elif hasattr(text, 'xpath'):
@@ -109,7 +109,7 @@ class TextRun(object):
if preserve_whitespace: if preserve_whitespace:
t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve') t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve')
if bookmark is not None: if bookmark is not None:
makeelement(r, 'w:bookmarkEnd', w_id=unicode_type(bid)) makeelement(r, 'w:bookmarkEnd', w_id=str(bid))
def __repr__(self): def __repr__(self):
return repr(self.texts) return repr(self.texts)
@@ -125,7 +125,7 @@ class TextRun(object):
def style_weight(self): def style_weight(self):
ans = 0 ans = 0
for text, preserve_whitespace, bookmark in self.texts: for text, preserve_whitespace, bookmark in self.texts:
if isinstance(text, unicode_type): if isinstance(text, str):
ans += len(text) ans += len(text)
return ans return ans
@@ -205,7 +205,7 @@ class Block(object):
p = makeelement(body, 'w:p') p = makeelement(body, 'w:p')
end_bookmarks = [] end_bookmarks = []
for bmark in self.bookmarks: for bmark in self.bookmarks:
end_bookmarks.append(unicode_type(self.links_manager.bookmark_id)) end_bookmarks.append(str(self.links_manager.bookmark_id))
makeelement(p, 'w:bookmarkStart', w_id=end_bookmarks[-1], w_name=bmark) makeelement(p, 'w:bookmarkStart', w_id=end_bookmarks[-1], w_name=bmark)
if self.block_lang: if self.block_lang:
rpr = makeelement(p, 'w:rPr') rpr = makeelement(p, 'w:rPr')
@@ -218,8 +218,8 @@ class Block(object):
self.float_spec.serialize(self, ppr) self.float_spec.serialize(self, ppr)
if self.numbering_id is not None: if self.numbering_id is not None:
numpr = makeelement(ppr, 'w:numPr') numpr = makeelement(ppr, 'w:numPr')
makeelement(numpr, 'w:ilvl', w_val=unicode_type(self.numbering_id[1])) makeelement(numpr, 'w:ilvl', w_val=str(self.numbering_id[1]))
makeelement(numpr, 'w:numId', w_val=unicode_type(self.numbering_id[0])) makeelement(numpr, 'w:numId', w_val=str(self.numbering_id[0]))
if self.linked_style is not None: if self.linked_style is not None:
makeelement(ppr, 'w:pStyle', w_val=self.linked_style.id) makeelement(ppr, 'w:pStyle', w_val=self.linked_style.id)
elif self.style.id: elif self.style.id:
@@ -439,8 +439,8 @@ class Convert(object):
if self.add_toc: if self.add_toc:
self.links_manager.process_toc_links(self.oeb) self.links_manager.process_toc_links(self.oeb)
if self.add_cover and self.oeb.metadata.cover and unicode_type(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids: if self.add_cover and self.oeb.metadata.cover and str(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids:
cover_id = unicode_type(self.oeb.metadata.cover[0]) cover_id = str(self.oeb.metadata.cover[0])
item = self.oeb.manifest.ids[cover_id] item = self.oeb.manifest.ids[cover_id]
self.cover_img = self.images_manager.read_image(item.href) self.cover_img = self.images_manager.read_image(item.href)
+7 -7
View File
@@ -2,7 +2,7 @@ import os
import posixpath import posixpath
from collections import namedtuple from collections import namedtuple
from functools import partial from functools import partial
from ebook_converter.polyglot.builtins import iteritems, itervalues, unicode_type from ebook_converter.polyglot.builtins import iteritems, itervalues
from lxml import etree from lxml import etree
@@ -31,7 +31,7 @@ def get_image_margins(style):
ans = {} ans = {}
for edge in 'Left Right Top Bottom'.split(): for edge in 'Left Right Top Bottom'.split():
val = as_num(getattr(style, 'padding' + edge)) + as_num(getattr(style, 'margin' + edge)) val = as_num(getattr(style, 'padding' + edge)) + as_num(getattr(style, 'margin' + edge))
ans['dist' + edge[0]] = unicode_type(pt_to_emu(val)) ans['dist' + edge[0]] = str(pt_to_emu(val))
return ans return ans
@@ -123,7 +123,7 @@ class ImagesManager(object):
makeelement(parent, 'wp:simplePos', x='0', y='0') makeelement(parent, 'wp:simplePos', x='0', y='0')
makeelement(makeelement(parent, 'wp:positionH', relativeFrom='margin'), 'wp:align').text = floating makeelement(makeelement(parent, 'wp:positionH', relativeFrom='margin'), 'wp:align').text = floating
makeelement(makeelement(parent, 'wp:positionV', relativeFrom='line'), 'wp:align').text = 'top' makeelement(makeelement(parent, 'wp:positionV', relativeFrom='line'), 'wp:align').text = 'top'
makeelement(parent, 'wp:extent', cx=unicode_type(width), cy=unicode_type(height)) makeelement(parent, 'wp:extent', cx=str(width), cy=str(height))
if fake_margins: if fake_margins:
# DOCX does not support setting margins for inline images, so we # DOCX does not support setting margins for inline images, so we
# fake it by using effect extents to simulate margins # fake it by using effect extents to simulate margins
@@ -141,7 +141,7 @@ class ImagesManager(object):
def create_docx_image_markup(self, parent, name, alt, img_rid, width, height): def create_docx_image_markup(self, parent, name, alt, img_rid, width, height):
makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces
makeelement(parent, 'wp:docPr', id=unicode_type(self.count), name=name, descr=alt) makeelement(parent, 'wp:docPr', id=str(self.count), name=name, descr=alt)
makeelement(makeelement(parent, 'wp:cNvGraphicFramePr'), 'a:graphicFrameLocks', noChangeAspect="1") makeelement(makeelement(parent, 'wp:cNvGraphicFramePr'), 'a:graphicFrameLocks', noChangeAspect="1")
g = makeelement(parent, 'a:graphic') g = makeelement(parent, 'a:graphic')
gd = makeelement(g, 'a:graphicData', uri=namespaces['pic']) gd = makeelement(g, 'a:graphicData', uri=namespaces['pic'])
@@ -154,7 +154,7 @@ class ImagesManager(object):
makeelement(makeelement(bf, 'a:stretch'), 'a:fillRect') makeelement(makeelement(bf, 'a:stretch'), 'a:fillRect')
spPr = makeelement(pic, 'pic:spPr') spPr = makeelement(pic, 'pic:spPr')
xfrm = makeelement(spPr, 'a:xfrm') xfrm = makeelement(spPr, 'a:xfrm')
makeelement(xfrm, 'a:off', x='0', y='0'), makeelement(xfrm, 'a:ext', cx=unicode_type(width), cy=unicode_type(height)) makeelement(xfrm, 'a:off', x='0', y='0'), makeelement(xfrm, 'a:ext', cx=str(width), cy=str(height))
makeelement(makeelement(spPr, 'a:prstGeom', prst='rect'), 'a:avLst') makeelement(makeelement(spPr, 'a:prstGeom', prst='rect'), 'a:avLst')
def create_filename(self, href, fmt): def create_filename(self, href, fmt):
@@ -165,7 +165,7 @@ class ImagesManager(object):
base = fname base = fname
while fname.lower() in self.seen_filenames: while fname.lower() in self.seen_filenames:
num += 1 num += 1
fname = base + unicode_type(num) fname = base + str(num)
self.seen_filenames.add(fname.lower()) self.seen_filenames.add(fname.lower())
fname += os.extsep + fmt.lower() fname += os.extsep + fmt.lower()
return fname return fname
@@ -200,7 +200,7 @@ class ImagesManager(object):
makeelement(makeelement(parent, 'wp:positionH', relativeFrom='page'), 'wp:align').text = 'center' makeelement(makeelement(parent, 'wp:positionH', relativeFrom='page'), 'wp:align').text = 'center'
makeelement(makeelement(parent, 'wp:positionV', relativeFrom='page'), 'wp:align').text = 'center' makeelement(makeelement(parent, 'wp:positionV', relativeFrom='page'), 'wp:align').text = 'center'
width, height = map(pt_to_emu, (width, height)) width, height = map(pt_to_emu, (width, height))
makeelement(parent, 'wp:extent', cx=unicode_type(width), cy=unicode_type(height)) makeelement(parent, 'wp:extent', cx=str(width), cy=str(height))
makeelement(parent, 'wp:effectExtent', l='0', r='0', t='0', b='0') makeelement(parent, 'wp:effectExtent', l='0', r='0', t='0', b='0')
makeelement(parent, 'wp:wrapTopAndBottom') makeelement(parent, 'wp:wrapTopAndBottom')
self.create_docx_image_markup(parent, 'cover.jpg', _('Cover'), img.rid, width, height) self.create_docx_image_markup(parent, 'cover.jpg', _('Cover'), img.rid, width, height)
+2 -3
View File
@@ -4,7 +4,6 @@ import urllib.parse
import uuid import uuid
from ebook_converter.utils.filenames import ascii_text from ebook_converter.utils.filenames import ascii_text
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -34,7 +33,7 @@ class TOCItem(object):
p = makeelement(body, 'w:p', append=False) p = makeelement(body, 'w:p', append=False)
ppr = makeelement(p, 'w:pPr') ppr = makeelement(p, 'w:pPr')
makeelement(ppr, 'w:pStyle', w_val="Normal") makeelement(ppr, 'w:pStyle', w_val="Normal")
makeelement(ppr, 'w:ind', w_left='0', w_firstLineChars='0', w_firstLine='0', w_leftChars=unicode_type(200 * self.level)) makeelement(ppr, 'w:ind', w_left='0', w_firstLineChars='0', w_firstLine='0', w_leftChars=str(200 * self.level))
if self.is_first: if self.is_first:
makeelement(ppr, 'w:pageBreakBefore', w_val='off') makeelement(ppr, 'w:pageBreakBefore', w_val='off')
r = makeelement(p, 'w:r') r = makeelement(p, 'w:r')
@@ -68,7 +67,7 @@ class LinksManager(object):
self.namespace = namespace self.namespace = namespace
self.log = log self.log = log
self.document_relationships = document_relationships self.document_relationships = document_relationships
self.top_anchor = unicode_type(uuid.uuid4().hex) self.top_anchor = str(uuid.uuid4().hex)
self.anchor_map = {} self.anchor_map = {}
self.used_bookmark_names = set() self.used_bookmark_names = set()
self.bmark_id = 0 self.bmark_id = 0
+7 -7
View File
@@ -1,7 +1,7 @@
from collections import defaultdict from collections import defaultdict
from operator import attrgetter from operator import attrgetter
from ebook_converter.polyglot.builtins import iteritems, itervalues, unicode_type from ebook_converter.polyglot.builtins import iteritems, itervalues
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -80,7 +80,7 @@ class NumberingDefinition(object):
def serialize(self, parent): def serialize(self, parent):
makeelement = self.namespace.makeelement makeelement = self.namespace.makeelement
an = makeelement(parent, 'w:abstractNum', w_abstractNumId=unicode_type(self.num_id)) an = makeelement(parent, 'w:abstractNum', w_abstractNumId=str(self.num_id))
makeelement(an, 'w:multiLevelType', w_val='hybridMultilevel') makeelement(an, 'w:multiLevelType', w_val='hybridMultilevel')
makeelement(an, 'w:name', w_val='List %d' % (self.num_id + 1)) makeelement(an, 'w:name', w_val='List %d' % (self.num_id + 1))
for level in self.levels: for level in self.levels:
@@ -111,12 +111,12 @@ class Level(object):
return hash((self.start, self.num_fmt, self.lvl_text)) return hash((self.start, self.num_fmt, self.lvl_text))
def serialize(self, parent, makeelement): def serialize(self, parent, makeelement):
lvl = makeelement(parent, 'w:lvl', w_ilvl=unicode_type(self.ilvl)) lvl = makeelement(parent, 'w:lvl', w_ilvl=str(self.ilvl))
makeelement(lvl, 'w:start', w_val=unicode_type(self.start)) makeelement(lvl, 'w:start', w_val=str(self.start))
makeelement(lvl, 'w:numFmt', w_val=self.num_fmt) makeelement(lvl, 'w:numFmt', w_val=self.num_fmt)
makeelement(lvl, 'w:lvlText', w_val=self.lvl_text) makeelement(lvl, 'w:lvlText', w_val=self.lvl_text)
makeelement(lvl, 'w:lvlJc', w_val='left') makeelement(lvl, 'w:lvlJc', w_val='left')
makeelement(makeelement(lvl, 'w:pPr'), 'w:ind', w_hanging='360', w_left=unicode_type(1152 + self.ilvl * 360)) makeelement(makeelement(lvl, 'w:pPr'), 'w:ind', w_hanging='360', w_left=str(1152 + self.ilvl * 360))
if self.num_fmt == 'bullet': if self.num_fmt == 'bullet':
ff = {'\uf0b7':'Symbol', '\uf0a7':'Wingdings'}.get(self.lvl_text, 'Courier New') ff = {'\uf0b7':'Symbol', '\uf0a7':'Wingdings'}.get(self.lvl_text, 'Courier New')
makeelement(makeelement(lvl, 'w:rPr'), 'w:rFonts', w_ascii=ff, w_hAnsi=ff, w_hint="default") makeelement(makeelement(lvl, 'w:rPr'), 'w:rFonts', w_ascii=ff, w_hAnsi=ff, w_hint="default")
@@ -162,5 +162,5 @@ class ListsManager(object):
defn.serialize(parent) defn.serialize(parent)
makeelement = self.namespace.makeelement makeelement = self.namespace.makeelement
for defn in self.definitions: for defn in self.definitions:
n = makeelement(parent, 'w:num', w_numId=unicode_type(defn.num_id + 1)) n = makeelement(parent, 'w:num', w_numId=str(defn.num_id + 1))
makeelement(n, 'w:abstractNumId', w_val=unicode_type(defn.num_id)) makeelement(n, 'w:abstractNumId', w_val=str(defn.num_id))
+29 -29
View File
@@ -7,7 +7,7 @@ from lxml import etree
from ebook_converter.ebooks import parse_css_length from ebook_converter.ebooks import parse_css_length
from ebook_converter.ebooks.docx.writer.utils import convert_color, int_or_zero from ebook_converter.ebooks.docx.writer.utils import convert_color, int_or_zero
from ebook_converter.utils.localization import lang_as_iso639_1 from ebook_converter.utils.localization import lang_as_iso639_1
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems
from ebook_converter.tinycss.css21 import CSS21Parser from ebook_converter.tinycss.css21 import CSS21Parser
@@ -73,7 +73,7 @@ class CombinedStyle(object):
pPr = makeelement(block, 'w:pPr') pPr = makeelement(block, 'w:pPr')
self.bs.serialize_properties(pPr, normal_style.bs) self.bs.serialize_properties(pPr, normal_style.bs)
if self.outline_level is not None: if self.outline_level is not None:
makeelement(pPr, 'w:outlineLvl', w_val=unicode_type(self.outline_level + 1)) makeelement(pPr, 'w:outlineLvl', w_val=str(self.outline_level + 1))
rPr = makeelement(block, 'w:rPr') rPr = makeelement(block, 'w:rPr')
self.rs.serialize_properties(rPr, normal_style.rs) self.rs.serialize_properties(rPr, normal_style.rs)
@@ -106,16 +106,16 @@ class FloatSpec(object):
def serialize(self, block, parent): def serialize(self, block, parent):
if self.is_dropcaps: if self.is_dropcaps:
attrs = dict(w_dropCap='drop', w_lines=unicode_type(self.dropcaps_lines), w_wrap='around', w_vAnchor='text', w_hAnchor='text') attrs = dict(w_dropCap='drop', w_lines=str(self.dropcaps_lines), w_wrap='around', w_vAnchor='text', w_hAnchor='text')
else: else:
attrs = dict( attrs = dict(
w_wrap='around', w_vAnchor='text', w_hAnchor='text', w_xAlign=self.x_align, w_y='1', w_wrap='around', w_vAnchor='text', w_hAnchor='text', w_xAlign=self.x_align, w_y='1',
w_hSpace=unicode_type(self.h_space), w_vSpace=unicode_type(self.v_space), w_hRule=self.h_rule w_hSpace=str(self.h_space), w_vSpace=str(self.v_space), w_hRule=self.h_rule
) )
if self.w is not None: if self.w is not None:
attrs['w_w'] = unicode_type(self.w) attrs['w_w'] = str(self.w)
if self.h is not None: if self.h is not None:
attrs['w_h'] = unicode_type(self.h) attrs['w_h'] = str(self.h)
self.makeelement(parent, 'w:framePr', **attrs) self.makeelement(parent, 'w:framePr', **attrs)
# Margins are already applied by the frame style, so override them to # Margins are already applied by the frame style, so override them to
# be zero on individual blocks # be zero on individual blocks
@@ -135,7 +135,7 @@ class FloatSpec(object):
width = getattr(self, 'border_%s_width' % edge) width = getattr(self, 'border_%s_width' % edge)
bstyle = getattr(self, 'border_%s_style' % edge) bstyle = getattr(self, 'border_%s_style' % edge)
self.makeelement( self.makeelement(
bdr, 'w:'+edge, w_space=unicode_type(padding), w_val=bstyle, w_sz=unicode_type(width), w_color=getattr(self, 'border_%s_color' % edge)) bdr, 'w:'+edge, w_space=str(padding), w_val=bstyle, w_sz=str(width), w_color=getattr(self, 'border_%s_color' % edge))
class DOCXStyle(object): class DOCXStyle(object):
@@ -231,7 +231,7 @@ class TextStyle(DOCXStyle):
self.spacing = None self.spacing = None
va = css.first_vertical_align va = css.first_vertical_align
if isinstance(va, numbers.Number): if isinstance(va, numbers.Number):
self.vertical_align = unicode_type(int(va * 2)) self.vertical_align = str(int(va * 2))
else: else:
val = { val = {
'top':'superscript', 'text-top':'superscript', 'sup':'superscript', 'super':'superscript', 'top':'superscript', 'text-top':'superscript', 'sup':'superscript', 'super':'superscript',
@@ -287,9 +287,9 @@ class TextStyle(DOCXStyle):
w = self.w w = self.w
is_normal_style = self is normal_style is_normal_style = self is normal_style
if is_normal_style or self.padding != normal_style.padding: if is_normal_style or self.padding != normal_style.padding:
bdr.set(w('space'), unicode_type(self.padding)) bdr.set(w('space'), str(self.padding))
if is_normal_style or self.border_width != normal_style.border_width: if is_normal_style or self.border_width != normal_style.border_width:
bdr.set(w('sz'), unicode_type(self.border_width)) bdr.set(w('sz'), str(self.border_width))
if is_normal_style or self.border_style != normal_style.border_style: if is_normal_style or self.border_style != normal_style.border_style:
bdr.set(w('val'), self.border_style) bdr.set(w('val'), self.border_style)
if is_normal_style or self.border_color != normal_style.border_color: if is_normal_style or self.border_color != normal_style.border_color:
@@ -339,7 +339,7 @@ class TextStyle(DOCXStyle):
if check_attr('shadow'): if check_attr('shadow'):
rPr.append(makeelement(rPr, 'shadow', val=bmap(self.shadow))) rPr.append(makeelement(rPr, 'shadow', val=bmap(self.shadow)))
if check_attr('spacing'): if check_attr('spacing'):
rPr.append(makeelement(rPr, 'spacing', val=unicode_type(self.spacing or 0))) rPr.append(makeelement(rPr, 'spacing', val=str(self.spacing or 0)))
if is_normal_style: if is_normal_style:
rPr.append(makeelement(rPr, 'vertAlign', val=self.vertical_align if self.vertical_align in {'superscript', 'subscript'} else 'baseline')) rPr.append(makeelement(rPr, 'vertAlign', val=self.vertical_align if self.vertical_align in {'superscript', 'subscript'} else 'baseline'))
elif self.vertical_align != normal_style.vertical_align: elif self.vertical_align != normal_style.vertical_align:
@@ -377,7 +377,7 @@ class DescendantTextStyle(object):
for name, attr in (('sz', 'font_size'), ('b', 'bold'), ('i', 'italic')): for name, attr in (('sz', 'font_size'), ('b', 'bold'), ('i', 'italic')):
pval, cval = vals(attr) pval, cval = vals(attr)
if pval != cval: if pval != cval:
val = 'on' if attr in {'bold', 'italic'} else unicode_type(cval) # bold, italic are toggle properties val = 'on' if attr in {'bold', 'italic'} else str(cval) # bold, italic are toggle properties
for suffix in ('', 'Cs'): for suffix in ('', 'Cs'):
add(name + suffix, val=val) add(name + suffix, val=val)
@@ -398,7 +398,7 @@ class DescendantTextStyle(object):
if check('shadow'): if check('shadow'):
add('shadow', val='on') # toggle property add('shadow', val='on') # toggle property
if check('spacing'): if check('spacing'):
add('spacing', val=unicode_type(child_style.spacing or 0)) add('spacing', val=str(child_style.spacing or 0))
if check('vertical_align'): if check('vertical_align'):
val = child_style.vertical_align val = child_style.vertical_align
if val in {'superscript', 'subscript', 'baseline'}: if val in {'superscript', 'subscript', 'baseline'}:
@@ -408,9 +408,9 @@ class DescendantTextStyle(object):
bdr = {} bdr = {}
if check('padding'): if check('padding'):
bdr['space'] = unicode_type(child_style.padding) bdr['space'] = str(child_style.padding)
if check('border_width'): if check('border_width'):
bdr['sz'] = unicode_type(child_style.border_width) bdr['sz'] = str(child_style.border_width)
if check('border_style'): if check('border_style'):
bdr['val'] = child_style.border_style bdr['val'] = child_style.border_style
if check('border_color'): if check('border_color'):
@@ -534,14 +534,14 @@ class BlockStyle(DOCXStyle):
e = bdr.makeelement(w(edge)) e = bdr.makeelement(w(edge))
padding = getattr(self, 'padding_' + edge) padding = getattr(self, 'padding_' + edge)
if (self is normal_style and padding > 0) or (padding != getattr(normal_style, 'padding_' + edge)): if (self is normal_style and padding > 0) or (padding != getattr(normal_style, 'padding_' + edge)):
e.set(w('space'), unicode_type(padding)) e.set(w('space'), str(padding))
width = getattr(self, 'border_%s_width' % edge) width = getattr(self, 'border_%s_width' % edge)
bstyle = getattr(self, 'border_%s_style' % edge) bstyle = getattr(self, 'border_%s_style' % edge)
if (self is normal_style and width > 0 and bstyle != 'none' if (self is normal_style and width > 0 and bstyle != 'none'
) or width != getattr(normal_style, 'border_%s_width' % edge ) or width != getattr(normal_style, 'border_%s_width' % edge
) or bstyle != getattr(normal_style, 'border_%s_style' % edge): ) or bstyle != getattr(normal_style, 'border_%s_style' % edge):
e.set(w('val'), bstyle) e.set(w('val'), bstyle)
e.set(w('sz'), unicode_type(width)) e.set(w('sz'), str(width))
e.set(w('color'), getattr(self, 'border_%s_color' % edge)) e.set(w('color'), getattr(self, 'border_%s_color' % edge))
if e.attrib: if e.attrib:
bdr.append(e) bdr.append(e)
@@ -565,15 +565,15 @@ class BlockStyle(DOCXStyle):
if css_unit in ('em', 'ex'): if css_unit in ('em', 'ex'):
lines = max(0, int(css_val * (50 if css_unit == 'ex' else 100))) lines = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
if (self is normal_style and lines > 0) or getter(self) != getter(normal_style): if (self is normal_style and lines > 0) or getter(self) != getter(normal_style):
spacing.set(w(attr + 'Lines'), unicode_type(lines)) spacing.set(w(attr + 'Lines'), str(lines))
else: else:
getter = attrgetter('margin_' + edge) getter = attrgetter('margin_' + edge)
val = getter(self) val = getter(self)
if (self is normal_style and val > 0) or val != getter(normal_style): if (self is normal_style and val > 0) or val != getter(normal_style):
spacing.set(w(attr), unicode_type(val)) spacing.set(w(attr), str(val))
if self is normal_style or self.line_height != normal_style.line_height: if self is normal_style or self.line_height != normal_style.line_height:
spacing.set(w('line'), unicode_type(self.line_height)) spacing.set(w('line'), str(self.line_height))
spacing.set(w('lineRule'), 'atLeast') spacing.set(w('lineRule'), 'atLeast')
if spacing.attrib: if spacing.attrib:
@@ -586,31 +586,31 @@ class BlockStyle(DOCXStyle):
if css_unit in ('em', 'ex'): if css_unit in ('em', 'ex'):
chars = max(0, int(css_val * (50 if css_unit == 'ex' else 100))) chars = max(0, int(css_val * (50 if css_unit == 'ex' else 100)))
if (self is normal_style and chars > 0) or getter(self) != getter(normal_style): if (self is normal_style and chars > 0) or getter(self) != getter(normal_style):
ind.set(w(edge + 'Chars'), unicode_type(chars)) ind.set(w(edge + 'Chars'), str(chars))
else: else:
getter = attrgetter('margin_' + edge) getter = attrgetter('margin_' + edge)
val = getter(self) val = getter(self)
if (self is normal_style and val > 0) or val != getter(normal_style): if (self is normal_style and val > 0) or val != getter(normal_style):
ind.set(w(edge), unicode_type(val)) ind.set(w(edge), str(val))
ind.set(w(edge + 'Chars'), '0') # This is needed to override any declaration in the parent style ind.set(w(edge + 'Chars'), '0') # This is needed to override any declaration in the parent style
css_val, css_unit = parse_css_length(self.css_text_indent) css_val, css_unit = parse_css_length(self.css_text_indent)
if css_unit in ('em', 'ex'): if css_unit in ('em', 'ex'):
chars = int(css_val * (50 if css_unit == 'ex' else 100)) chars = int(css_val * (50 if css_unit == 'ex' else 100))
if css_val >= 0: if css_val >= 0:
if (self is normal_style and chars > 0) or self.css_text_indent != normal_style.css_text_indent: if (self is normal_style and chars > 0) or self.css_text_indent != normal_style.css_text_indent:
ind.set(w('firstLineChars'), unicode_type(chars)) ind.set(w('firstLineChars'), str(chars))
else: else:
if (self is normal_style and chars < 0) or self.css_text_indent != normal_style.css_text_indent: if (self is normal_style and chars < 0) or self.css_text_indent != normal_style.css_text_indent:
ind.set(w('hangingChars'), unicode_type(abs(chars))) ind.set(w('hangingChars'), str(abs(chars)))
else: else:
val = self.text_indent val = self.text_indent
if val >= 0: if val >= 0:
if (self is normal_style and val > 0) or self.text_indent != normal_style.text_indent: if (self is normal_style and val > 0) or self.text_indent != normal_style.text_indent:
ind.set(w('firstLine'), unicode_type(val)) ind.set(w('firstLine'), str(val))
ind.set(w('firstLineChars'), '0') # This is needed to override any declaration in the parent style ind.set(w('firstLineChars'), '0') # This is needed to override any declaration in the parent style
else: else:
if (self is normal_style and val < 0) or self.text_indent != normal_style.text_indent: if (self is normal_style and val < 0) or self.text_indent != normal_style.text_indent:
ind.set(w('hanging'), unicode_type(abs(val))) ind.set(w('hanging'), str(abs(val)))
ind.set(w('hangingChars'), '0') ind.set(w('hangingChars'), '0')
if ind.attrib: if ind.attrib:
pPr.append(ind) pPr.append(ind)
@@ -684,7 +684,7 @@ class StylesManager(object):
pure_block_styles.add(bs) pure_block_styles.add(bs)
self.pure_block_styles = sorted(pure_block_styles, key=block_counts.__getitem__) self.pure_block_styles = sorted(pure_block_styles, key=block_counts.__getitem__)
bnum = len(unicode_type(max(1, len(pure_block_styles) - 1))) bnum = len(str(max(1, len(pure_block_styles) - 1)))
for i, bs in enumerate(self.pure_block_styles): for i, bs in enumerate(self.pure_block_styles):
bs.id = bs.name = '%0{}d Block'.format(bnum) % i bs.id = bs.name = '%0{}d Block'.format(bnum) % i
bs.seq = i bs.seq = i
@@ -704,7 +704,7 @@ class StylesManager(object):
heading_style = styles[-1] heading_style = styles[-1]
heading_style.outline_level = i heading_style.outline_level = i
snum = len(unicode_type(max(1, len(counts) - 1))) snum = len(str(max(1, len(counts) - 1)))
heading_styles = [] heading_styles = []
for i, (style, count) in enumerate(counts.most_common()): for i, (style, count) in enumerate(counts.most_common()):
if i == 0: if i == 0:
@@ -732,7 +732,7 @@ class StylesManager(object):
if run.descendant_style is None: if run.descendant_style is None:
run.descendant_style = descendant_style_map[ds] = ds run.descendant_style = descendant_style_map[ds] = ds
ds_counts[run.descendant_style] += run.style_weight ds_counts[run.descendant_style] += run.style_weight
rnum = len(unicode_type(max(1, len(ds_counts) - 1))) rnum = len(str(max(1, len(ds_counts) - 1)))
for i, (text_style, count) in enumerate(ds_counts.most_common()): for i, (text_style, count) in enumerate(ds_counts.most_common()):
text_style.id = 'Text%d' % i text_style.id = 'Text%d' % i
text_style.name = '%0{}d Text'.format(rnum) % i text_style.name = '%0{}d Text'.format(rnum) % i
+6 -6
View File
@@ -2,7 +2,7 @@ from collections import namedtuple
from ebook_converter.ebooks.docx.writer.utils import convert_color from ebook_converter.ebooks.docx.writer.utils import convert_color
from ebook_converter.ebooks.docx.writer.styles import read_css_block_borders as rcbb, border_edges from ebook_converter.ebooks.docx.writer.styles import read_css_block_borders as rcbb, border_edges
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -112,7 +112,7 @@ class Cell(object):
def serialize(self, parent, makeelement): def serialize(self, parent, makeelement):
tc = makeelement(parent, 'w:tc') tc = makeelement(parent, 'w:tc')
tcPr = makeelement(tc, 'w:tcPr') tcPr = makeelement(tc, 'w:tcPr')
makeelement(tcPr, 'w:tcW', w_type=self.width[0], w_w=unicode_type(self.width[1])) makeelement(tcPr, 'w:tcW', w_type=self.width[0], w_w=str(self.width[1]))
# For some reason, Word 2007 refuses to honor <w:shd> at the table or row # For some reason, Word 2007 refuses to honor <w:shd> at the table or row
# level, despite what the specs say, so we inherit and apply at the # level, despite what the specs say, so we inherit and apply at the
# cell level # cell level
@@ -123,7 +123,7 @@ class Cell(object):
b = makeelement(tcPr, 'w:tcBorders', append=False) b = makeelement(tcPr, 'w:tcBorders', append=False)
for edge, border in iteritems(self.borders): for edge, border in iteritems(self.borders):
if border is not None and border.width > 0 and border.style != 'none': if border is not None and border.width > 0 and border.style != 'none':
makeelement(b, 'w:' + edge, w_val=border.style, w_sz=unicode_type(border.width), w_color=border.color) makeelement(b, 'w:' + edge, w_val=border.style, w_sz=str(border.width), w_color=border.color)
if len(b) > 0: if len(b) > 0:
tcPr.append(b) tcPr.append(b)
@@ -133,7 +133,7 @@ class Cell(object):
if edge in {'top', 'bottom'} or (edge == 'left' and self is self.row.first_cell) or (edge == 'right' and self is self.row.last_cell): if edge in {'top', 'bottom'} or (edge == 'left' and self is self.row.first_cell) or (edge == 'right' and self is self.row.last_cell):
padding += getattr(self.row, 'padding_' + edge) padding += getattr(self.row, 'padding_' + edge)
if padding > 0: if padding > 0:
makeelement(m, 'w:' + edge, w_type='dxa', w_w=unicode_type(int(padding * 20))) makeelement(m, 'w:' + edge, w_type='dxa', w_w=str(int(padding * 20)))
if len(m) > 0: if len(m) > 0:
tcPr.append(m) tcPr.append(m)
@@ -353,14 +353,14 @@ class Table(object):
return return
tbl = makeelement(parent, 'w:tbl') tbl = makeelement(parent, 'w:tbl')
tblPr = makeelement(tbl, 'w:tblPr') tblPr = makeelement(tbl, 'w:tblPr')
makeelement(tblPr, 'w:tblW', w_type=self.width[0], w_w=unicode_type(self.width[1])) makeelement(tblPr, 'w:tblW', w_type=self.width[0], w_w=str(self.width[1]))
if self.float in {'left', 'right'}: if self.float in {'left', 'right'}:
kw = {'w_vertAnchor':'text', 'w_horzAnchor':'text', 'w_tblpXSpec':self.float} kw = {'w_vertAnchor':'text', 'w_horzAnchor':'text', 'w_tblpXSpec':self.float}
for edge in border_edges: for edge in border_edges:
val = getattr(self, 'margin_' + edge) or 0 val = getattr(self, 'margin_' + edge) or 0
if {self.float, edge} == {'left', 'right'}: if {self.float, edge} == {'left', 'right'}:
val = max(val, 2) val = max(val, 2)
kw['w_' + edge + 'FromText'] = unicode_type(max(0, int(val *20))) kw['w_' + edge + 'FromText'] = str(max(0, int(val *20)))
makeelement(tblPr, 'w:tblpPr', **kw) makeelement(tblPr, 'w:tblpPr', **kw)
if self.jc is not None: if self.jc is not None:
makeelement(tblPr, 'w:jc', w_val=self.jc) makeelement(tblPr, 'w:jc', w_val=self.jc)
+8 -8
View File
@@ -15,7 +15,7 @@ from ebook_converter.utils.localization import lang_as_iso639_1
from ebook_converter.utils.xml_parse import safe_xml_fromstring from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.utils.img import save_cover_data_to from ebook_converter.utils.img import save_cover_data_to
from ebook_converter.ebooks.oeb.base import urlnormalize from ebook_converter.ebooks.oeb.base import urlnormalize
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes from ebook_converter.polyglot.builtins import string_or_bytes
from ebook_converter.polyglot.binary import as_base64_unicode from ebook_converter.polyglot.binary import as_base64_unicode
@@ -153,7 +153,7 @@ class FB2MLizer(object):
metadata['author'] = '<author><first-name></first-name><last-name></last-name></author>' metadata['author'] = '<author><first-name></first-name><last-name></last-name></author>'
metadata['keywords'] = '' metadata['keywords'] = ''
tags = list(map(unicode_type, self.oeb_book.metadata.subject)) tags = list(map(str, self.oeb_book.metadata.subject))
if tags: if tags:
tags = ', '.join(prepare_string_for_xml(x) for x in tags) tags = ', '.join(prepare_string_for_xml(x) for x in tags)
metadata['keywords'] = '<keywords>%s</keywords>'%tags metadata['keywords'] = '<keywords>%s</keywords>'%tags
@@ -168,12 +168,12 @@ class FB2MLizer(object):
year = publisher = isbn = '' year = publisher = isbn = ''
identifiers = self.oeb_book.metadata['identifier'] identifiers = self.oeb_book.metadata['identifier']
for x in identifiers: for x in identifiers:
if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type(x).startswith('urn:uuid:'): if x.get(OPF('scheme'), None).lower() == 'uuid' or str(x).startswith('urn:uuid:'):
metadata['id'] = unicode_type(x).split(':')[-1] metadata['id'] = str(x).split(':')[-1]
break break
if metadata['id'] is None: if metadata['id'] is None:
self.log.warn('No UUID identifier found') self.log.warn('No UUID identifier found')
metadata['id'] = unicode_type(uuid.uuid4()) metadata['id'] = str(uuid.uuid4())
try: try:
date = self.oeb_book.metadata['date'][0] date = self.oeb_book.metadata['date'][0]
@@ -235,7 +235,7 @@ class FB2MLizer(object):
</description>''') % metadata </description>''') % metadata
# Remove empty lines. # Remove empty lines.
return '\n'.join(filter(unicode_type.strip, header.splitlines())) return '\n'.join(filter(str.strip, header.splitlines()))
def fb2_footer(self): def fb2_footer(self):
return '</FictionBook>' return '</FictionBook>'
@@ -246,8 +246,8 @@ class FB2MLizer(object):
cover_href = None cover_href = None
# Get the raster cover if it's available. # Get the raster cover if it's available.
if self.oeb_book.metadata.cover and unicode_type(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids: if self.oeb_book.metadata.cover and str(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids:
id = unicode_type(self.oeb_book.metadata.cover[0]) id = str(self.oeb_book.metadata.cover[0])
cover_item = self.oeb_book.manifest.ids[id] cover_item = self.oeb_book.manifest.ids[id]
if cover_item.media_type in OEB_RASTER_IMAGES: if cover_item.media_type in OEB_RASTER_IMAGES:
cover_href = cover_item.href cover_href = cover_item.href
+2 -3
View File
@@ -11,7 +11,6 @@ from ebook_converter.ebooks.oeb.base import urlunquote
from ebook_converter.ebooks.chardet import detect_xml_encoding from ebook_converter.ebooks.chardet import detect_xml_encoding
from ebook_converter.constants import iswindows from ebook_converter.constants import iswindows
from ebook_converter import unicode_path, as_unicode, replace_entities from ebook_converter import unicode_path, as_unicode, replace_entities
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -44,7 +43,7 @@ class Link(object):
:param base: The base directory that relative URLs are with respect to. :param base: The base directory that relative URLs are with respect to.
Must be a unicode string. Must be a unicode string.
''' '''
assert isinstance(url, unicode_type) and isinstance(base, unicode_type) assert isinstance(url, str) and isinstance(base, str)
self.url = url self.url = url
self.parsed_url = urllib.parse.urlparse(self.url) self.parsed_url = urllib.parse.urlparse(self.url)
self.is_local = self.parsed_url.scheme in ('', 'file') self.is_local = self.parsed_url.scheme in ('', 'file')
@@ -149,7 +148,7 @@ class HTMLFile(object):
return 'HTMLFile:%d:%s:%s'%(self.level, 'b' if self.is_binary else 'a', self.path) return 'HTMLFile:%d:%s:%s'%(self.level, 'b' if self.is_binary else 'a', self.path)
def __repr__(self): def __repr__(self):
return unicode_type(self) return str(self)
def find_links(self, src): def find_links(self, src):
for match in self.LINK_PAT.finditer(src): for match in self.LINK_PAT.finditer(src):
+1 -2
View File
@@ -2,7 +2,6 @@ import textwrap, os, glob
from ebook_converter.customize import FileTypePlugin from ebook_converter.customize import FileTypePlugin
from ebook_converter.constants import numeric_version from ebook_converter.constants import numeric_version
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -111,7 +110,7 @@ every time you add an HTML file to the library.\
config_dialog.exec_() config_dialog.exec_()
if config_dialog.result() == QDialog.Accepted: if config_dialog.result() == QDialog.Accepted:
sc = unicode_type(sc.text()).strip() sc = str(sc.text()).strip()
if bf.isChecked(): if bf.isChecked():
sc += '|bf' sc += '|bf'
customize_plugin(self, sc) customize_plugin(self, sc)
+2 -2
View File
@@ -13,7 +13,7 @@ from ebook_converter.ebooks.oeb.base import (
XHTML, XHTML_NS, SVG_NS, barename, namespace, OEB_IMAGES, XLINK, rewrite_links, urlnormalize) XHTML, XHTML_NS, SVG_NS, barename, namespace, OEB_IMAGES, XLINK, rewrite_links, urlnormalize)
from ebook_converter.ebooks.oeb.stylizer import Stylizer from ebook_converter.ebooks.oeb.stylizer import Stylizer
from ebook_converter.utils.logging import default_log from ebook_converter.utils.logging import default_log
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes, as_bytes from ebook_converter.polyglot.builtins import string_or_bytes, as_bytes
__license__ = 'GPL 3' __license__ = 'GPL 3'
@@ -43,7 +43,7 @@ class OEB2HTML(object):
self.log.info('Converting OEB book to HTML...') self.log.info('Converting OEB book to HTML...')
self.opts = opts self.opts = opts
try: try:
self.book_title = unicode_type(oeb_book.metadata.title[0]) self.book_title = str(oeb_book.metadata.title[0])
except Exception: except Exception:
self.book_title = _('Unknown') self.book_title = _('Unknown')
self.links = {} self.links = {}
+20 -20
View File
@@ -37,7 +37,7 @@ from ebook_converter.ebooks.lrf.pylrs.pylrs import (
RuledLine, Span, Sub, Sup, TextBlock RuledLine, Span, Sub, Sup, TextBlock
) )
from ebook_converter.ptempfile import PersistentTemporaryFile from ebook_converter.ptempfile import PersistentTemporaryFile
from ebook_converter.polyglot.builtins import getcwd, itervalues, string_or_bytes, unicode_type from ebook_converter.polyglot.builtins import getcwd, itervalues, string_or_bytes
from ebook_converter.polyglot.urllib import unquote from ebook_converter.polyglot.urllib import unquote
from PIL import Image as PILImage from PIL import Image as PILImage
@@ -276,7 +276,7 @@ class HTMLConverter(object):
update_css(npcss, self.override_pcss) update_css(npcss, self.override_pcss)
paths = [os.path.abspath(path) for path in paths] paths = [os.path.abspath(path) for path in paths]
paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, unicode_type) else path for path in paths] paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, str) else path for path in paths]
while len(paths) > 0 and self.link_level <= self.link_levels: while len(paths) > 0 and self.link_level <= self.link_levels:
for path in paths: for path in paths:
@@ -356,7 +356,7 @@ class HTMLConverter(object):
os.makedirs(tdir) os.makedirs(tdir)
try: try:
with open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb') as f: with open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb') as f:
f.write(unicode_type(soup).encode('utf-8')) f.write(str(soup).encode('utf-8'))
self.log.info(_('Written preprocessed HTML to ')+f.name) self.log.info(_('Written preprocessed HTML to ')+f.name)
except: except:
pass pass
@@ -389,7 +389,7 @@ class HTMLConverter(object):
self.log.info(_('\tConverting to BBeB...')) self.log.info(_('\tConverting to BBeB...'))
self.current_style = {} self.current_style = {}
self.page_break_found = False self.page_break_found = False
if not isinstance(path, unicode_type): if not isinstance(path, str):
path = path.decode(sys.getfilesystemencoding()) path = path.decode(sys.getfilesystemencoding())
self.target_prefix = path self.target_prefix = path
self.previous_text = '\n' self.previous_text = '\n'
@@ -399,7 +399,7 @@ class HTMLConverter(object):
def parse_css(self, style): def parse_css(self, style):
""" """
Parse the contents of a <style> tag or .css file. Parse the contents of a <style> tag or .css file.
@param style: C{unicode_type(style)} should be the CSS to parse. @param style: C{str(style)} should be the CSS to parse.
@return: A dictionary with one entry per selector where the key is the @return: A dictionary with one entry per selector where the key is the
selector name and the value is a dictionary of properties selector name and the value is a dictionary of properties
""" """
@@ -587,7 +587,7 @@ class HTMLConverter(object):
if isinstance(c, HTMLConverter.IGNORED_TAGS): if isinstance(c, HTMLConverter.IGNORED_TAGS):
continue continue
if isinstance(c, bs4.NavigableString): if isinstance(c, bs4.NavigableString):
text += unicode_type(c) text += str(c)
elif isinstance(c, bs4.Tag): elif isinstance(c, bs4.Tag):
if c.name.lower() == 'img' and c.has_attr('alt'): if c.name.lower() == 'img' and c.has_attr('alt'):
alt_text += c['alt'] alt_text += c['alt']
@@ -642,7 +642,7 @@ class HTMLConverter(object):
para, text, path, fragment = link['para'], link['text'], link['path'], link['fragment'] para, text, path, fragment = link['para'], link['text'], link['path'], link['fragment']
ascii_text = text ascii_text = text
if not isinstance(path, unicode_type): if not isinstance(path, str):
path = path.decode(sys.getfilesystemencoding()) path = path.decode(sys.getfilesystemencoding())
if path in self.processed_files: if path in self.processed_files:
if path+fragment in self.targets.keys(): if path+fragment in self.targets.keys():
@@ -1085,7 +1085,7 @@ class HTMLConverter(object):
s1, s2 = get('margin'), get('padding') s1, s2 = get('margin'), get('padding')
bl = unicode_type(self.current_block.blockStyle.attrs['blockwidth'])+'px' bl = str(self.current_block.blockStyle.attrs['blockwidth'])+'px'
def set(default, one, two): def set(default, one, two):
fval = None fval = None
@@ -1214,7 +1214,7 @@ class HTMLConverter(object):
ans = 120 ans = 120
if ans is not None: if ans is not None:
ans += int(self.font_delta * 20) ans += int(self.font_delta * 20)
ans = unicode_type(ans) ans = str(ans)
return ans return ans
family, weight, style, variant = 'serif', 'normal', 'normal', None family, weight, style, variant = 'serif', 'normal', 'normal', None
@@ -1320,10 +1320,10 @@ class HTMLConverter(object):
def text_properties(self, tag_css): def text_properties(self, tag_css):
indent = self.book.defaultTextStyle.attrs['parindent'] indent = self.book.defaultTextStyle.attrs['parindent']
if 'text-indent' in tag_css: if 'text-indent' in tag_css:
bl = unicode_type(self.current_block.blockStyle.attrs['blockwidth'])+'px' bl = str(self.current_block.blockStyle.attrs['blockwidth'])+'px'
if 'em' in tag_css['text-indent']: if 'em' in tag_css['text-indent']:
bl = '10pt' bl = '10pt'
indent = self.unit_convert(unicode_type(tag_css['text-indent']), pts=True, base_length=bl) indent = self.unit_convert(str(tag_css['text-indent']), pts=True, base_length=bl)
if not indent: if not indent:
indent = 0 indent = 0
if indent > 0 and indent < 10 * self.minimum_indent: if indent > 0 and indent < 10 * self.minimum_indent:
@@ -1518,11 +1518,11 @@ class HTMLConverter(object):
elif not urllib.parse.urlparse(tag['src'])[0]: elif not urllib.parse.urlparse(tag['src'])[0]:
self.log.warn('Could not find image: '+tag['src']) self.log.warn('Could not find image: '+tag['src'])
else: else:
self.log.debug("Failed to process: %s"%unicode_type(tag)) self.log.debug("Failed to process: %s"%str(tag))
elif tagname in ['style', 'link']: elif tagname in ['style', 'link']:
ncss, npcss = {}, {} ncss, npcss = {}, {}
if tagname == 'style': if tagname == 'style':
text = ''.join([unicode_type(i) for i in tag.findAll(text=True)]) text = ''.join([str(i) for i in tag.findAll(text=True)])
css, pcss = self.parse_css(text) css, pcss = self.parse_css(text)
ncss.update(css) ncss.update(css)
npcss.update(pcss) npcss.update(pcss)
@@ -1554,7 +1554,7 @@ class HTMLConverter(object):
if tag.contents: if tag.contents:
c = tag.contents[0] c = tag.contents[0]
if isinstance(c, bs4.NavigableString): if isinstance(c, bs4.NavigableString):
c = unicode_type(c).replace('\r\n', '\n').replace('\r', '\n') c = str(c).replace('\r\n', '\n').replace('\r', '\n')
if c.startswith('\n'): if c.startswith('\n'):
c = c[1:] c = c[1:]
tag.contents[0] = bs4.NavigableString(c) tag.contents[0] = bs4.NavigableString(c)
@@ -1612,7 +1612,7 @@ class HTMLConverter(object):
in_ol = parent.name.lower() == 'ol' in_ol = parent.name.lower() == 'ol'
break break
parent = parent.parent parent = parent.parent
prepend = unicode_type(self.list_counter)+'. ' if in_ol else '\u2022' + ' ' prepend = str(self.list_counter)+'. ' if in_ol else '\u2022' + ' '
self.current_para.append(Span(prepend)) self.current_para.append(Span(prepend))
self.process_children(tag, tag_css, tag_pseudo_css) self.process_children(tag, tag_css, tag_pseudo_css)
if in_ol: if in_ol:
@@ -1655,7 +1655,7 @@ class HTMLConverter(object):
if (self.anchor_ids and tag.has_attr('id')) or (self.book_designer and tag.get('class') in ('title', ['title'])): if (self.anchor_ids and tag.has_attr('id')) or (self.book_designer and tag.get('class') in ('title', ['title'])):
if not tag.has_attr('id'): if not tag.has_attr('id'):
tag['id'] = __appname__+'_id_'+unicode_type(self.id_counter) tag['id'] = __appname__+'_id_'+str(self.id_counter)
self.id_counter += 1 self.id_counter += 1
tkey = self.target_prefix+tag['id'] tkey = self.target_prefix+tag['id']
@@ -1728,7 +1728,7 @@ class HTMLConverter(object):
except Exception as err: except Exception as err:
self.log.warning(_('An error occurred while processing a table: %s. Ignoring table markup.')%repr(err)) self.log.warning(_('An error occurred while processing a table: %s. Ignoring table markup.')%repr(err))
self.log.exception('') self.log.exception('')
self.log.debug(_('Bad table:\n%s')%unicode_type(tag)[:300]) self.log.debug(_('Bad table:\n%s')%str(tag)[:300])
self.in_table = False self.in_table = False
self.process_children(tag, tag_css, tag_pseudo_css) self.process_children(tag, tag_css, tag_pseudo_css)
finally: finally:
@@ -1824,9 +1824,9 @@ def process_file(path, options, logger):
for prop in ('author', 'author_sort', 'title', 'title_sort', 'publisher', 'freetext'): for prop in ('author', 'author_sort', 'title', 'title_sort', 'publisher', 'freetext'):
val = getattr(options, prop, None) val = getattr(options, prop, None)
if val and not isinstance(val, unicode_type): if val and not isinstance(val, str):
soup = html5_parser(val) soup = html5_parser(val)
setattr(options, prop, unicode_type(soup)) setattr(options, prop, str(soup))
title = (options.title, options.title_sort) title = (options.title, options.title_sort)
author = (options.author, options.author_sort) author = (options.author, options.author_sort)
@@ -1870,7 +1870,7 @@ def process_file(path, options, logger):
options.force_page_break = fpb options.force_page_break = fpb
options.link_exclude = le options.link_exclude = le
options.page_break = pb options.page_break = pb
if not isinstance(options.chapter_regex, unicode_type): if not isinstance(options.chapter_regex, str):
options.chapter_regex = options.chapter_regex.decode(preferred_encoding) options.chapter_regex = options.chapter_regex.decode(preferred_encoding)
options.chapter_regex = re.compile(options.chapter_regex, re.IGNORECASE) options.chapter_regex = re.compile(options.chapter_regex, re.IGNORECASE)
fpba = options.force_page_break_attr.split(',') fpba = options.force_page_break_attr.split(',')
+4 -4
View File
@@ -1,7 +1,7 @@
""" """
elements.py -- replacements and helpers for ElementTree elements.py -- replacements and helpers for ElementTree
""" """
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes from ebook_converter.polyglot.builtins import string_or_bytes
class ElementWriter(object): class ElementWriter(object):
@@ -24,9 +24,9 @@ class ElementWriter(object):
return text return text
def _writeAttribute(self, f, name, value): def _writeAttribute(self, f, name, value):
f.write(' %s="' % unicode_type(name)) f.write(' %s="' % str(name))
if not isinstance(value, string_or_bytes): if not isinstance(value, string_or_bytes):
value = unicode_type(value) value = str(value)
value = self._encodeCdata(value) value = self._encodeCdata(value)
value = value.replace('"', '&quot;') value = value.replace('"', '&quot;')
f.write(value) f.write(value)
@@ -37,7 +37,7 @@ class ElementWriter(object):
f.write(text) f.write(text)
def _write(self, f, e): def _write(self, f, e):
f.write('<' + unicode_type(e.tag)) f.write('<' + str(e.tag))
attributes = e.items() attributes = e.items()
attributes.sort() attributes.sort()
+3 -3
View File
@@ -9,7 +9,7 @@ import codecs
import os import os
from .pylrfopt import tagListOptimizer from .pylrfopt import tagListOptimizer
from ebook_converter.polyglot.builtins import iteritems, string_or_bytes, unicode_type from ebook_converter.polyglot.builtins import iteritems, string_or_bytes
PYLRF_VERSION = "1.0" PYLRF_VERSION = "1.0"
@@ -82,7 +82,7 @@ def writeWord(f, word):
if int(word) > 65535: if int(word) > 65535:
raise LrfError('Cannot encode a number greater than 65535 in a word.') raise LrfError('Cannot encode a number greater than 65535 in a word.')
if int(word) < 0: if int(word) < 0:
raise LrfError('Cannot encode a number < 0 in a word: '+unicode_type(word)) raise LrfError('Cannot encode a number < 0 in a word: '+str(word))
f.write(struct.pack("<H", int(word))) f.write(struct.pack("<H", int(word)))
@@ -508,7 +508,7 @@ class LrfObject(object):
raise LrfError("object name %s not recognized" % name) raise LrfError("object name %s not recognized" % name)
def __str__(self): def __str__(self):
return 'LRFObject: ' + self.name + ", " + unicode_type(self.objId) return 'LRFObject: ' + self.name + ", " + str(self.objId)
def appendLrfTag(self, tag): def appendLrfTag(self, tag):
self.tags.append(tag) self.tags.append(tag)
+39 -39
View File
@@ -51,7 +51,7 @@ DEFAULT_GENREADING = "fs" # default is yes to both lrf and lrs
from ebook_converter import __appname__, __version__ from ebook_converter import __appname__, __version__
from ebook_converter import entity_to_unicode from ebook_converter import entity_to_unicode
from ebook_converter.polyglot.builtins import string_or_bytes, unicode_type, iteritems, native_string_type from ebook_converter.polyglot.builtins import string_or_bytes, iteritems, native_string_type
class LrsError(Exception): class LrsError(Exception):
@@ -226,7 +226,7 @@ class LrsAttributes(object):
raise LrsError("%s does not support setting %s" % raise LrsError("%s does not support setting %s" %
(self.__class__.__name__, name)) (self.__class__.__name__, name))
if isinstance(value, int): if isinstance(value, int):
value = unicode_type(value) value = str(value)
self.attrs[name] = value self.attrs[name] = value
@@ -330,13 +330,13 @@ class LrsObject(object):
def lrsObjectElement(self, name, objlabel="objlabel", labelName=None, def lrsObjectElement(self, name, objlabel="objlabel", labelName=None,
labelDecorate=True, **settings): labelDecorate=True, **settings):
element = Element(name) element = Element(name)
element.attrib["objid"] = unicode_type(self.objId) element.attrib["objid"] = str(self.objId)
if labelName is None: if labelName is None:
labelName = name labelName = name
if labelDecorate: if labelDecorate:
label = "%s.%d" % (labelName, self.objId) label = "%s.%d" % (labelName, self.objId)
else: else:
label = unicode_type(self.objId) label = str(self.objId)
element.attrib[objlabel] = label element.attrib[objlabel] = label
element.attrib.update(settings) element.attrib.update(settings)
return element return element
@@ -562,7 +562,7 @@ class Book(Delegator):
factor = base_font_size / old_base_font_size factor = base_font_size / old_base_font_size
def rescale(old): def rescale(old):
return unicode_type(int(int(old) * factor)) return str(int(int(old) * factor))
text_blocks = list(main.get_all(lambda x: isinstance(x, TextBlock))) text_blocks = list(main.get_all(lambda x: isinstance(x, TextBlock)))
for tb in text_blocks: for tb in text_blocks:
@@ -693,7 +693,7 @@ class TableOfContents(object):
def addTocEntry(self, tocLabel, textBlock): def addTocEntry(self, tocLabel, textBlock):
if not isinstance(textBlock, (Canvas, TextBlock, ImageBlock, RuledLine)): if not isinstance(textBlock, (Canvas, TextBlock, ImageBlock, RuledLine)):
raise LrsError("TOC destination must be a Canvas, TextBlock, ImageBlock or RuledLine"+ raise LrsError("TOC destination must be a Canvas, TextBlock, ImageBlock or RuledLine"+
" not a " + unicode_type(type(textBlock))) " not a " + str(type(textBlock)))
if textBlock.parent is None: if textBlock.parent is None:
raise LrsError("TOC text block must be already appended to a page") raise LrsError("TOC text block must be already appended to a page")
@@ -743,8 +743,8 @@ class TocLabel(object):
def toElement(self, se): def toElement(self, se):
return ElementWithText("TocLabel", self.label, return ElementWithText("TocLabel", self.label,
refobj=unicode_type(self.textBlock.objId), refobj=str(self.textBlock.objId),
refpage=unicode_type(self.textBlock.parent.objId)) refpage=str(self.textBlock.parent.objId))
class BookInfo(object): class BookInfo(object):
@@ -805,7 +805,7 @@ class DocInfo(object):
self.thumbnail = None self.thumbnail = None
self.language = "en" self.language = "en"
self.creator = None self.creator = None
self.creationdate = unicode_type(isoformat(date.today())) self.creationdate = str(isoformat(date.today()))
self.producer = "%s v%s"%(__appname__, __version__) self.producer = "%s v%s"%(__appname__, __version__)
self.numberofpages = "0" self.numberofpages = "0"
@@ -829,7 +829,7 @@ class DocInfo(object):
docInfo.append(ElementWithText("Creator", self.creator)) docInfo.append(ElementWithText("Creator", self.creator))
docInfo.append(ElementWithText("CreationDate", self.creationdate)) docInfo.append(ElementWithText("CreationDate", self.creationdate))
docInfo.append(ElementWithText("Producer", self.producer)) docInfo.append(ElementWithText("Producer", self.producer))
docInfo.append(ElementWithText("SumPage", unicode_type(self.numberofpages))) docInfo.append(ElementWithText("SumPage", str(self.numberofpages)))
return docInfo return docInfo
@@ -1091,7 +1091,7 @@ class LrsStyle(LrsObject, LrsAttributes, LrsContainer):
self.elementName = elementName self.elementName = elementName
self.objectsAppended = False self.objectsAppended = False
# self.label = "%s.%d" % (elementName, self.objId) # self.label = "%s.%d" % (elementName, self.objId)
# self.label = unicode_type(self.objId) # self.label = str(self.objId)
# self.parent = None # self.parent = None
def update(self, settings): def update(self, settings):
@@ -1101,11 +1101,11 @@ class LrsStyle(LrsObject, LrsAttributes, LrsContainer):
self.attrs[name] = value self.attrs[name] = value
def getLabel(self): def getLabel(self):
return unicode_type(self.objId) return str(self.objId)
def toElement(self, se): def toElement(self, se):
element = Element(self.elementName, stylelabel=self.getLabel(), element = Element(self.elementName, stylelabel=self.getLabel(),
objid=unicode_type(self.objId)) objid=str(self.objId))
element.attrib.update(self.attrs) element.attrib.update(self.attrs)
return element return element
@@ -1236,14 +1236,14 @@ class PageStyle(LrsStyle):
del settings[evenbase] del settings[evenbase]
if evenObj.parent is None: if evenObj.parent is None:
parent.append(evenObj) parent.append(evenObj)
settings[evenbase + "id"] = unicode_type(evenObj.objId) settings[evenbase + "id"] = str(evenObj.objId)
if oddbase in settings: if oddbase in settings:
oddObj = settings[oddbase] oddObj = settings[oddbase]
del settings[oddbase] del settings[oddbase]
if oddObj.parent is None: if oddObj.parent is None:
parent.append(oddObj) parent.append(oddObj)
settings[oddbase + "id"] = unicode_type(oddObj.objId) settings[oddbase + "id"] = str(oddObj.objId)
def appendReferencedObjects(self, parent): def appendReferencedObjects(self, parent):
if self.objectsAppended: if self.objectsAppended:
@@ -1486,7 +1486,7 @@ class Paragraph(LrsContainer):
def __init__(self, text=None): def __init__(self, text=None):
LrsContainer.__init__(self, [Text, CR, DropCaps, CharButton, LrsContainer.__init__(self, [Text, CR, DropCaps, CharButton,
LrsSimpleChar1, bytes, unicode_type]) LrsSimpleChar1, bytes, str])
if text is not None: if text is not None:
if isinstance(text, string_or_bytes): if isinstance(text, string_or_bytes):
text = Text(text) text = Text(text)
@@ -1521,7 +1521,7 @@ class Paragraph(LrsContainer):
class LrsTextTag(LrsContainer): class LrsTextTag(LrsContainer):
def __init__(self, text, validContents): def __init__(self, text, validContents):
LrsContainer.__init__(self, [Text, bytes, unicode_type] + validContents) LrsContainer.__init__(self, [Text, bytes, str] + validContents)
if text is not None: if text is not None:
self.append(text) self.append(text)
@@ -1580,7 +1580,7 @@ class DropCaps(LrsTextTag):
return self.text is None or not self.text.strip() return self.text is None or not self.text.strip()
def toElement(self, se): def toElement(self, se):
elem = Element('DrawChar', line=unicode_type(self.line)) elem = Element('DrawChar', line=str(self.line))
appendTextElements(elem, self.contents, se) appendTextElements(elem, self.contents, se)
return elem return elem
@@ -1656,7 +1656,7 @@ class JumpTo(LrsContainer):
self.textBlock = textBlock self.textBlock = textBlock
def toElement(self, se): def toElement(self, se):
return Element("JumpTo", refpage=unicode_type(self.textBlock.parent.objId), refobj=unicode_type(self.textBlock.objId)) return Element("JumpTo", refpage=str(self.textBlock.parent.objId), refobj=str(self.textBlock.objId))
class Plot(LrsSimpleChar1, LrsContainer): class Plot(LrsSimpleChar1, LrsContainer):
@@ -1688,8 +1688,8 @@ class Plot(LrsSimpleChar1, LrsContainer):
parent.append(self.obj) parent.append(self.obj)
def toElement(self, se): def toElement(self, se):
elem = Element('Plot', xsize=unicode_type(self.xsize), ysize=unicode_type(self.ysize), elem = Element('Plot', xsize=str(self.xsize), ysize=str(self.ysize),
refobj=unicode_type(self.obj.objId)) refobj=str(self.obj.objId))
if self.adjustment: if self.adjustment:
elem.set('adjustment', self.adjustment) elem.set('adjustment', self.adjustment)
return elem return elem
@@ -1771,7 +1771,7 @@ class Space(LrsSimpleChar1, LrsContainer):
if self.xsize == 0: if self.xsize == 0:
return return
return Element("Space", xsize=unicode_type(self.xsize)) return Element("Space", xsize=str(self.xsize))
def toLrfContainer(self, lrfWriter, container): def toLrfContainer(self, lrfWriter, container):
if self.xsize != 0: if self.xsize != 0:
@@ -1785,7 +1785,7 @@ class Box(LrsSimpleChar1, LrsContainer):
""" """
def __init__(self, linetype="solid"): def __init__(self, linetype="solid"):
LrsContainer.__init__(self, [Text, bytes, unicode_type]) LrsContainer.__init__(self, [Text, bytes, str])
if linetype not in LINE_TYPE_ENCODING: if linetype not in LINE_TYPE_ENCODING:
raise LrsError(linetype + " is not a valid line type") raise LrsError(linetype + " is not a valid line type")
self.linetype = linetype self.linetype = linetype
@@ -1805,7 +1805,7 @@ class Box(LrsSimpleChar1, LrsContainer):
class Span(LrsSimpleChar1, LrsContainer): class Span(LrsSimpleChar1, LrsContainer):
def __init__(self, text=None, **attrs): def __init__(self, text=None, **attrs):
LrsContainer.__init__(self, [LrsSimpleChar1, Text, bytes, unicode_type]) LrsContainer.__init__(self, [LrsSimpleChar1, Text, bytes, str])
if text is not None: if text is not None:
if isinstance(text, string_or_bytes): if isinstance(text, string_or_bytes):
text = Text(text) text = Text(text)
@@ -1858,7 +1858,7 @@ class Span(LrsSimpleChar1, LrsContainer):
def toElement(self, se): def toElement(self, se):
element = Element('Span') element = Element('Span')
for (key, value) in self.attrs.items(): for (key, value) in self.attrs.items():
element.set(key, unicode_type(value)) element.set(key, str(value))
appendTextElements(element, self.contents, se) appendTextElements(element, self.contents, se)
return element return element
@@ -1871,9 +1871,9 @@ class EmpLine(LrsTextTag, LrsSimpleChar1):
def __init__(self, text=None, emplineposition='before', emplinetype='solid'): def __init__(self, text=None, emplineposition='before', emplinetype='solid'):
LrsTextTag.__init__(self, text, [LrsSimpleChar1]) LrsTextTag.__init__(self, text, [LrsSimpleChar1])
if emplineposition not in self.__class__.emplinepositions: if emplineposition not in self.__class__.emplinepositions:
raise LrsError('emplineposition for an EmpLine must be one of: '+unicode_type(self.__class__.emplinepositions)) raise LrsError('emplineposition for an EmpLine must be one of: '+str(self.__class__.emplinepositions))
if emplinetype not in self.__class__.emplinetypes: if emplinetype not in self.__class__.emplinetypes:
raise LrsError('emplinetype for an EmpLine must be one of: '+unicode_type(self.__class__.emplinetypes)) raise LrsError('emplinetype for an EmpLine must be one of: '+str(self.__class__.emplinetypes))
self.emplinetype = emplinetype self.emplinetype = emplinetype
self.emplineposition = emplineposition self.emplineposition = emplineposition
@@ -1933,9 +1933,9 @@ class BlockSpace(LrsContainer):
element = Element("BlockSpace") element = Element("BlockSpace")
if self.xspace != 0: if self.xspace != 0:
element.attrib["xspace"] = unicode_type(self.xspace) element.attrib["xspace"] = str(self.xspace)
if self.yspace != 0: if self.yspace != 0:
element.attrib["yspace"] = unicode_type(self.yspace) element.attrib["yspace"] = str(self.yspace)
return element return element
@@ -1949,7 +1949,7 @@ class CharButton(LrsSimpleChar1, LrsContainer):
""" """
def __init__(self, button, text=None): def __init__(self, button, text=None):
LrsContainer.__init__(self, [bytes, unicode_type, Text, LrsSimpleChar1]) LrsContainer.__init__(self, [bytes, str, Text, LrsSimpleChar1])
self.button = None self.button = None
if button is not None: if button is not None:
self.setButton(button) self.setButton(button)
@@ -1979,7 +1979,7 @@ class CharButton(LrsSimpleChar1, LrsContainer):
container.appendLrfTag(LrfTag("CharButtonEnd")) container.appendLrfTag(LrfTag("CharButtonEnd"))
def toElement(self, se): def toElement(self, se):
cb = Element("CharButton", refobj=unicode_type(self.button.objId)) cb = Element("CharButton", refobj=str(self.button.objId))
appendTextElements(cb, self.contents, se) appendTextElements(cb, self.contents, se)
return cb return cb
@@ -2081,8 +2081,8 @@ class JumpButton(LrsObject, LrsContainer):
b = self.lrsObjectElement("Button") b = self.lrsObjectElement("Button")
pb = SubElement(b, "PushButton") pb = SubElement(b, "PushButton")
SubElement(pb, "JumpTo", SubElement(pb, "JumpTo",
refpage=unicode_type(self.textBlock.parent.objId), refpage=str(self.textBlock.parent.objId),
refobj=unicode_type(self.textBlock.objId)) refobj=str(self.textBlock.objId))
return b return b
@@ -2230,8 +2230,8 @@ class PutObj(LrsContainer):
self.content.objId))) self.content.objId)))
def toElement(self, se): def toElement(self, se):
el = Element("PutObj", x1=unicode_type(self.x1), y1=unicode_type(self.y1), el = Element("PutObj", x1=str(self.x1), y1=str(self.y1),
refobj=unicode_type(self.content.objId)) refobj=str(self.content.objId))
return el return el
@@ -2313,9 +2313,9 @@ class Image(LrsObject, LrsContainer, LrsAttributes):
def toElement(self, se): def toElement(self, se):
element = self.lrsObjectElement("Image", **self.attrs) element = self.lrsObjectElement("Image", **self.attrs)
element.set("refstream", unicode_type(self.refstream.objId)) element.set("refstream", str(self.refstream.objId))
for name in ["x0", "y0", "x1", "y1", "xsize", "ysize"]: for name in ["x0", "y0", "x1", "y1", "xsize", "ysize"]:
element.set(name, unicode_type(getattr(self, name))) element.set(name, str(getattr(self, name)))
return element return element
def toLrf(self, lrfWriter): def toLrf(self, lrfWriter):
@@ -2396,9 +2396,9 @@ class ImageBlock(LrsObject, LrsContainer, LrsAttributes):
def toElement(self, se): def toElement(self, se):
element = self.lrsObjectElement("ImageBlock", **self.attrs) element = self.lrsObjectElement("ImageBlock", **self.attrs)
element.set("refstream", unicode_type(self.refstream.objId)) element.set("refstream", str(self.refstream.objId))
for name in ["x0", "y0", "x1", "y1", "xsize", "ysize"]: for name in ["x0", "y0", "x1", "y1", "xsize", "ysize"]:
element.set(name, unicode_type(getattr(self, name))) element.set(name, str(getattr(self, name)))
element.text = self.alttext element.text = self.alttext
return element return element
+8 -8
View File
@@ -9,7 +9,7 @@ import urllib.parse
from ebook_converter import relpath, guess_type, prints, force_unicode from ebook_converter import relpath, guess_type, prints, force_unicode
from ebook_converter.utils.config_base import tweaks from ebook_converter.utils.config_base import tweaks
from ebook_converter.polyglot.builtins import codepoint_to_chr, unicode_type, getcwd, iteritems, itervalues, as_unicode from ebook_converter.polyglot.builtins import codepoint_to_chr, getcwd, iteritems, itervalues, as_unicode
from ebook_converter.polyglot.urllib import unquote from ebook_converter.polyglot.urllib import unquote
@@ -190,7 +190,7 @@ coding = list(zip(
def roman(num): def roman(num):
if num <= 0 or num >= 4000 or int(num) != num: if num <= 0 or num >= 4000 or int(num) != num:
return unicode_type(num) return str(num)
result = [] result = []
for d, r in coding: for d, r in coding:
while num >= d: while num >= d:
@@ -205,7 +205,7 @@ def fmt_sidx(i, fmt='%.2f', use_roman=False):
try: try:
i = float(i) i = float(i)
except TypeError: except TypeError:
return unicode_type(i) return str(i)
if int(i) == float(i): if int(i) == float(i):
return roman(int(i)) if use_roman else '%d'%int(i) return roman(int(i)) if use_roman else '%d'%int(i)
return fmt%i return fmt%i
@@ -249,7 +249,7 @@ class Resource(object):
self._href = href_or_path self._href = href_or_path
else: else:
pc = url[2] pc = url[2]
if isinstance(pc, unicode_type): if isinstance(pc, str):
pc = pc.encode('utf-8') pc = pc.encode('utf-8')
pc = unquote(pc).decode('utf-8') pc = unquote(pc).decode('utf-8')
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep))) self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
@@ -270,7 +270,7 @@ class Resource(object):
basedir = getcwd() basedir = getcwd()
if self.path is None: if self.path is None:
return self._href return self._href
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment f = self.fragment.encode('utf-8') if isinstance(self.fragment, str) else self.fragment
frag = '#'+as_unicode(urllib.parse.quote(f)) if self.fragment else '' frag = '#'+as_unicode(urllib.parse.quote(f)) if self.fragment else ''
if self.path == basedir: if self.path == basedir:
return ''+frag return ''+frag
@@ -278,7 +278,7 @@ class Resource(object):
rpath = relpath(self.path, basedir) rpath = relpath(self.path, basedir)
except OSError: # On windows path and basedir could be on different drives except OSError: # On windows path and basedir could be on different drives
rpath = self.path rpath = self.path
if isinstance(rpath, unicode_type): if isinstance(rpath, str):
rpath = rpath.encode('utf-8') rpath = rpath.encode('utf-8')
return as_unicode(urllib.parse.quote(rpath.replace(os.sep, '/')))+frag return as_unicode(urllib.parse.quote(rpath.replace(os.sep, '/')))+frag
@@ -315,7 +315,7 @@ class ResourceCollection(object):
return '[%s]'%', '.join(resources) return '[%s]'%', '.join(resources)
def __repr__(self): def __repr__(self):
return unicode_type(self) return str(self)
def append(self, resource): def append(self, resource):
if not isinstance(resource, Resource): if not isinstance(resource, Resource):
@@ -377,7 +377,7 @@ def check_isbn13(isbn):
check = 10 - (sum(products)%10) check = 10 - (sum(products)%10)
if check == 10: if check == 10:
check = 0 check = 0
if unicode_type(check) == isbn[12]: if str(check) == isbn[12]:
return isbn return isbn
except Exception: except Exception:
pass pass
+1 -2
View File
@@ -3,7 +3,6 @@ from contextlib import closing
from ebook_converter.customize import FileTypePlugin from ebook_converter.customize import FileTypePlugin
from ebook_converter.utils.localization import canonicalize_lang from ebook_converter.utils.localization import canonicalize_lang
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -167,7 +166,7 @@ def get_comic_book_info(d, mi, series_index='volume'):
from datetime import date from datetime import date
try: try:
dt = date(puby, 6 if pubm is None else pubm, 15) dt = date(puby, 6 if pubm is None else pubm, 15)
dt = parse_only_date(unicode_type(dt)) dt = parse_only_date(str(dt))
mi.pubdate = dt mi.pubdate = dt
except Exception: except Exception:
pass pass
+26 -26
View File
@@ -7,7 +7,7 @@ from ebook_converter.ebooks.metadata.book import (SC_COPYABLE_FIELDS,
TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS) TOP_LEVEL_IDENTIFIERS, ALL_METADATA_FIELDS)
from ebook_converter.library.field_metadata import FieldMetadata from ebook_converter.library.field_metadata import FieldMetadata
from ebook_converter.utils.icu import sort_key from ebook_converter.utils.icu import sort_key
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -22,7 +22,7 @@ SIMPLE_SET = frozenset(SIMPLE_GET - {'identifiers'})
def human_readable(size, precision=2): def human_readable(size, precision=2):
""" Convert a size in bytes into megabytes """ """ Convert a size in bytes into megabytes """
return ('%.'+unicode_type(precision)+'f'+ 'MB') % (size/(1024*1024),) return ('%.'+str(precision)+'f'+ 'MB') % (size/(1024*1024),)
NULL_VALUES = { NULL_VALUES = {
@@ -606,14 +606,14 @@ class Metadata(object):
return authors_to_string(self.authors) return authors_to_string(self.authors)
def format_tags(self): def format_tags(self):
return ', '.join([unicode_type(t) for t in sorted(self.tags, key=sort_key)]) return ', '.join([str(t) for t in sorted(self.tags, key=sort_key)])
def format_rating(self, v=None, divide_by=1): def format_rating(self, v=None, divide_by=1):
if v is None: if v is None:
if self.rating is not None: if self.rating is not None:
return unicode_type(self.rating/divide_by) return str(self.rating/divide_by)
return 'None' return 'None'
return unicode_type(v/divide_by) return str(v/divide_by)
def format_field(self, key, series_with_index=True): def format_field(self, key, series_with_index=True):
''' '''
@@ -637,15 +637,15 @@ class Metadata(object):
if cmeta and cmeta['datatype'] == 'series': if cmeta and cmeta['datatype'] == 'series':
if self.get(tkey): if self.get(tkey):
res = self.get_extra(tkey) res = self.get_extra(tkey)
return (unicode_type(cmeta['name']+'_index'), return (str(cmeta['name']+'_index'),
self.format_series_index(res), res, cmeta) self.format_series_index(res), res, cmeta)
else: else:
return (unicode_type(cmeta['name']+'_index'), '', '', cmeta) return (str(cmeta['name']+'_index'), '', '', cmeta)
if key in self.custom_field_keys(): if key in self.custom_field_keys():
res = self.get(key, None) # get evaluates all necessary composites res = self.get(key, None) # get evaluates all necessary composites
cmeta = self.get_user_metadata(key, make_copy=False) cmeta = self.get_user_metadata(key, make_copy=False)
name = unicode_type(cmeta['name']) name = str(cmeta['name'])
if res is None or res == '': # can't check "not res" because of numeric fields if res is None or res == '': # can't check "not res" because of numeric fields
return (name, res, None, None) return (name, res, None, None)
orig_res = res orig_res = res
@@ -668,7 +668,7 @@ class Metadata(object):
res = fmt.format(res) res = fmt.format(res)
except: except:
pass pass
return (name, unicode_type(res), orig_res, cmeta) return (name, str(res), orig_res, cmeta)
# convert top-level ids into their value # convert top-level ids into their value
if key in TOP_LEVEL_IDENTIFIERS: if key in TOP_LEVEL_IDENTIFIERS:
@@ -682,11 +682,11 @@ class Metadata(object):
if fmkey in field_metadata and field_metadata[fmkey]['kind'] == 'field': if fmkey in field_metadata and field_metadata[fmkey]['kind'] == 'field':
res = self.get(key, None) res = self.get(key, None)
fmeta = field_metadata[fmkey] fmeta = field_metadata[fmkey]
name = unicode_type(fmeta['name']) name = str(fmeta['name'])
if res is None or res == '': if res is None or res == '':
return (name, res, None, None) return (name, res, None, None)
orig_res = res orig_res = res
name = unicode_type(fmeta['name']) name = str(fmeta['name'])
datatype = fmeta['datatype'] datatype = fmeta['datatype']
if key == 'authors': if key == 'authors':
res = authors_to_string(res) res = authors_to_string(res)
@@ -704,7 +704,7 @@ class Metadata(object):
res = '%.2g'%(res/2) res = '%.2g'%(res/2)
elif key == 'size': elif key == 'size':
res = human_readable(res) res = human_readable(res)
return (name, unicode_type(res), orig_res, fmeta) return (name, str(res), orig_res, fmeta)
return (None, None, None, None) return (None, None, None, None)
@@ -718,7 +718,7 @@ class Metadata(object):
ans = [] ans = []
def fmt(x, y): def fmt(x, y):
ans.append('%-20s: %s'%(unicode_type(x), unicode_type(y))) ans.append('%-20s: %s'%(str(x), str(y)))
fmt('Title', self.title) fmt('Title', self.title)
if self.title_sort: if self.title_sort:
@@ -732,7 +732,7 @@ class Metadata(object):
if getattr(self, 'book_producer', False): if getattr(self, 'book_producer', False):
fmt('Book Producer', self.book_producer) fmt('Book Producer', self.book_producer)
if self.tags: if self.tags:
fmt('Tags', ', '.join([unicode_type(t) for t in self.tags])) fmt('Tags', ', '.join([str(t) for t in self.tags]))
if self.series: if self.series:
fmt('Series', self.series + ' #%s'%self.format_series_index()) fmt('Series', self.series + ' #%s'%self.format_series_index())
if not self.is_null('languages'): if not self.is_null('languages'):
@@ -745,7 +745,7 @@ class Metadata(object):
if self.pubdate is not None: if self.pubdate is not None:
fmt('Published', isoformat(self.pubdate)) fmt('Published', isoformat(self.pubdate))
if self.rights is not None: if self.rights is not None:
fmt('Rights', unicode_type(self.rights)) fmt('Rights', str(self.rights))
if self.identifiers: if self.identifiers:
fmt('Identifiers', ', '.join(['%s:%s'%(k, v) for k, v in fmt('Identifiers', ', '.join(['%s:%s'%(k, v) for k, v in
iteritems(self.identifiers)])) iteritems(self.identifiers)]))
@@ -756,7 +756,7 @@ class Metadata(object):
val = self.get(key, None) val = self.get(key, None)
if val: if val:
(name, val) = self.format_field(key) (name, val) = self.format_field(key)
fmt(name, unicode_type(val)) fmt(name, str(val))
return '\n'.join(ans) return '\n'.join(ans)
def to_html(self): def to_html(self):
@@ -765,22 +765,22 @@ class Metadata(object):
''' '''
from ebook_converter.ebooks.metadata import authors_to_string from ebook_converter.ebooks.metadata import authors_to_string
from ebook_converter.utils.date import isoformat from ebook_converter.utils.date import isoformat
ans = [(_('Title'), unicode_type(self.title))] ans = [(_('Title'), str(self.title))]
ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))] ans += [(_('Author(s)'), (authors_to_string(self.authors) if self.authors else _('Unknown')))]
ans += [(_('Publisher'), unicode_type(self.publisher))] ans += [(_('Publisher'), str(self.publisher))]
ans += [(_('Producer'), unicode_type(self.book_producer))] ans += [(_('Producer'), str(self.book_producer))]
ans += [(_('Comments'), unicode_type(self.comments))] ans += [(_('Comments'), str(self.comments))]
ans += [('ISBN', unicode_type(self.isbn))] ans += [('ISBN', str(self.isbn))]
ans += [(_('Tags'), ', '.join([unicode_type(t) for t in self.tags]))] ans += [(_('Tags'), ', '.join([str(t) for t in self.tags]))]
if self.series: if self.series:
ans += [(_('Series'), unicode_type(self.series) + ' #%s'%self.format_series_index())] ans += [(_('Series'), str(self.series) + ' #%s'%self.format_series_index())]
ans += [(_('Languages'), ', '.join(self.languages))] ans += [(_('Languages'), ', '.join(self.languages))]
if self.timestamp is not None: if self.timestamp is not None:
ans += [(_('Timestamp'), unicode_type(isoformat(self.timestamp, as_utc=False, sep=' ')))] ans += [(_('Timestamp'), str(isoformat(self.timestamp, as_utc=False, sep=' ')))]
if self.pubdate is not None: if self.pubdate is not None:
ans += [(_('Published'), unicode_type(isoformat(self.pubdate, as_utc=False, sep=' ')))] ans += [(_('Published'), str(isoformat(self.pubdate, as_utc=False, sep=' ')))]
if self.rights is not None: if self.rights is not None:
ans += [(_('Rights'), unicode_type(self.rights))] ans += [(_('Rights'), str(self.rights))]
for key in self.custom_field_keys(): for key in self.custom_field_keys():
val = self.get(key, None) val = self.get(key, None)
if val: if val:
+3 -4
View File
@@ -14,7 +14,6 @@ from ebook_converter.utils.imghdr import identify
from ebook_converter import guess_type, guess_all_extensions, prints, force_unicode from ebook_converter import guess_type, guess_all_extensions, prints, force_unicode
from ebook_converter.ebooks.metadata import MetaInformation, check_isbn from ebook_converter.ebooks.metadata import MetaInformation, check_isbn
from ebook_converter.ebooks.chardet import xml_to_unicode from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.polyglot.builtins import unicode_type
from ebook_converter.polyglot.binary import as_base64_unicode from ebook_converter.polyglot.binary import as_base64_unicode
@@ -115,7 +114,7 @@ def get_metadata(stream):
# fallback for book_title # fallback for book_title
if book_title: if book_title:
book_title = unicode_type(book_title) book_title = str(book_title)
else: else:
book_title = force_unicode(os.path.splitext( book_title = force_unicode(os.path.splitext(
os.path.basename(getattr(stream, 'name', os.path.basename(getattr(stream, 'name',
@@ -252,7 +251,7 @@ def _parse_tags(root, mi, ctx):
# -- i18n Translations-- ? # -- i18n Translations-- ?
tags = ctx.XPath('//fb:%s/fb:genre/text()' % genre_sec)(root) tags = ctx.XPath('//fb:%s/fb:genre/text()' % genre_sec)(root)
if tags: if tags:
mi.tags = list(map(unicode_type, tags)) mi.tags = list(map(str, tags))
break break
@@ -304,7 +303,7 @@ def _parse_pubdate(root, mi, ctx):
year = ctx.XPath('number(//fb:publish-info/fb:year/text())')(root) year = ctx.XPath('number(//fb:publish-info/fb:year/text())')(root)
if float.is_integer(year): if float.is_integer(year):
# only year is available, so use 2nd of June # only year is available, so use 2nd of June
mi.pubdate = parse_only_date(unicode_type(int(year))) mi.pubdate = parse_only_date(str(int(year)))
def _parse_language(root, mi, ctx): def _parse_language(root, mi, ctx):
+2 -2
View File
@@ -6,7 +6,7 @@ from ebook_converter.ebooks.metadata.opf2 import OPF
from ebook_converter import isbytestring from ebook_converter import isbytestring
from ebook_converter.customize.ui import get_file_type_metadata, set_file_type_metadata from ebook_converter.customize.ui import get_file_type_metadata, set_file_type_metadata
from ebook_converter.ebooks.metadata import MetaInformation, string_to_authors from ebook_converter.ebooks.metadata import MetaInformation, string_to_authors
from ebook_converter.polyglot.builtins import getcwd, unicode_type from ebook_converter.polyglot.builtins import getcwd
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -229,7 +229,7 @@ def forked_read_metadata(path, tdir):
f.seek(0, 2) f.seek(0, 2)
sz = f.tell() sz = f.tell()
with lopen(os.path.join(tdir, 'size.txt'), 'wb') as s: with lopen(os.path.join(tdir, 'size.txt'), 'wb') as s:
s.write(unicode_type(sz).encode('ascii')) s.write(str(sz).encode('ascii'))
f.seek(0) f.seek(0)
mi = get_metadata(f, fmt) mi = get_metadata(f, fmt)
if mi.cover_data and mi.cover_data[1]: if mi.cover_data and mi.cover_data[1]:
+25 -25
View File
@@ -27,7 +27,7 @@ from ebook_converter import prints, guess_type
from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
from ebook_converter.utils.config import tweaks from ebook_converter.utils.config import tweaks
from ebook_converter.utils.xml_parse import safe_xml_fromstring from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.polyglot.builtins import iteritems, unicode_type, getcwd from ebook_converter.polyglot.builtins import iteritems, getcwd
from ebook_converter.polyglot.urllib import unquote from ebook_converter.polyglot.urllib import unquote
@@ -91,7 +91,7 @@ class Resource(object): # {{{
self._href = href_or_path self._href = href_or_path
else: else:
pc = url[2] pc = url[2]
if isinstance(pc, unicode_type): if isinstance(pc, str):
pc = pc.encode('utf-8') pc = pc.encode('utf-8')
pc = pc.decode('utf-8') pc = pc.decode('utf-8')
self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep))) self.path = os.path.abspath(os.path.join(basedir, pc.replace('/', os.sep)))
@@ -159,7 +159,7 @@ class ResourceCollection(object): # {{{
__unicode__ = __str__ __unicode__ = __str__
def __repr__(self): def __repr__(self):
return unicode_type(self) return str(self)
def append(self, resource): def append(self, resource):
if not isinstance(resource, Resource): if not isinstance(resource, Resource):
@@ -219,10 +219,10 @@ class ManifestItem(Resource): # {{{
__unicode__ = __unicode__representation__ __unicode__ = __unicode__representation__
def __str__(self): def __str__(self):
return unicode_type(self).encode('utf-8') return str(self).encode('utf-8')
def __repr__(self): def __repr__(self):
return unicode_type(self) return str(self)
def __getitem__(self, index): def __getitem__(self, index):
if index == 0: if index == 0:
@@ -425,7 +425,7 @@ class Guide(ResourceCollection): # {{{
class MetadataField(object): class MetadataField(object):
def __init__(self, name, is_dc=True, formatter=None, none_is=None, def __init__(self, name, is_dc=True, formatter=None, none_is=None,
renderer=lambda x: unicode_type(x)): renderer=lambda x: str(x)):
self.name = name self.name = name
self.is_dc = is_dc self.is_dc = is_dc
self.formatter = formatter self.formatter = formatter
@@ -806,7 +806,7 @@ class OPF(object): # {{{
def unquote_urls(self): def unquote_urls(self):
def get_href(item): def get_href(item):
raw = unquote(item.get('href', '')) raw = unquote(item.get('href', ''))
if not isinstance(raw, unicode_type): if not isinstance(raw, str):
raw = raw.decode('utf-8') raw = raw.decode('utf-8')
return raw return raw
for item in self.itermanifest(): for item in self.itermanifest():
@@ -835,7 +835,7 @@ class OPF(object): # {{{
titles = () titles = ()
if val: if val:
title = titles[0] if titles else self.create_metadata_element('title') title = titles[0] if titles else self.create_metadata_element('title')
title.text = re.sub(r'\s+', ' ', unicode_type(val)) title.text = re.sub(r'\s+', ' ', str(val))
@property @property
def authors(self): def authors(self):
@@ -878,7 +878,7 @@ class OPF(object): # {{{
for key in matches[0].attrib: for key in matches[0].attrib:
if key.endswith('file-as'): if key.endswith('file-as'):
matches[0].attrib.pop(key) matches[0].attrib.pop(key)
matches[0].set('{%s}file-as'%self.NAMESPACES['opf'], unicode_type(val)) matches[0].set('{%s}file-as'%self.NAMESPACES['opf'], str(val))
@property @property
def tags(self): def tags(self):
@@ -895,7 +895,7 @@ class OPF(object): # {{{
tag.getparent().remove(tag) tag.getparent().remove(tag)
for tag in val: for tag in val:
elem = self.create_metadata_element('subject') elem = self.create_metadata_element('subject')
self.set_text(elem, unicode_type(tag)) self.set_text(elem, str(tag))
@property @property
def pubdate(self): def pubdate(self):
@@ -951,7 +951,7 @@ class OPF(object): # {{{
xid = x.get('id', None) xid = x.get('id', None)
is_package_identifier = uuid_id is not None and uuid_id == xid is_package_identifier = uuid_id is not None and uuid_id == xid
if is_package_identifier: if is_package_identifier:
self.set_text(x, unicode_type(uuid.uuid4())) self.set_text(x, str(uuid.uuid4()))
for attr in x.attrib: for attr in x.attrib:
if attr.endswith('scheme'): if attr.endswith('scheme'):
x.attrib[attr] = 'uuid' x.attrib[attr] = 'uuid'
@@ -962,7 +962,7 @@ class OPF(object): # {{{
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'ISBN'} attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'ISBN'}
matches = [self.create_metadata_element('identifier', matches = [self.create_metadata_element('identifier',
attrib=attrib)] attrib=attrib)]
self.set_text(matches[0], unicode_type(val)) self.set_text(matches[0], str(val))
def get_identifiers(self): def get_identifiers(self):
identifiers = {} identifiers = {}
@@ -1015,7 +1015,7 @@ class OPF(object): # {{{
for typ, val in iteritems(identifiers): for typ, val in iteritems(identifiers):
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: typ.upper()} attrib = {'{%s}scheme'%self.NAMESPACES['opf']: typ.upper()}
self.set_text(self.create_metadata_element( self.set_text(self.create_metadata_element(
'identifier', attrib=attrib), unicode_type(val)) 'identifier', attrib=attrib), str(val))
@property @property
def application_id(self): def application_id(self):
@@ -1038,7 +1038,7 @@ class OPF(object): # {{{
if uuid_id and uuid_id in removed_ids: if uuid_id and uuid_id in removed_ids:
attrib['id'] = uuid_id attrib['id'] = uuid_id
self.set_text(self.create_metadata_element( self.set_text(self.create_metadata_element(
'identifier', attrib=attrib), unicode_type(val)) 'identifier', attrib=attrib), str(val))
@property @property
def uuid(self): def uuid(self):
@@ -1052,7 +1052,7 @@ class OPF(object): # {{{
attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'uuid'} attrib = {'{%s}scheme'%self.NAMESPACES['opf']: 'uuid'}
matches = [self.create_metadata_element('identifier', matches = [self.create_metadata_element('identifier',
attrib=attrib)] attrib=attrib)]
self.set_text(matches[0], unicode_type(val)) self.set_text(matches[0], str(val))
@property @property
def language(self): def language(self):
@@ -1083,7 +1083,7 @@ class OPF(object): # {{{
for lang in val: for lang in val:
l = self.create_metadata_element('language') l = self.create_metadata_element('language')
self.set_text(l, unicode_type(lang)) self.set_text(l, str(lang))
@property @property
def raw_languages(self): def raw_languages(self):
@@ -1103,7 +1103,7 @@ class OPF(object): # {{{
if not matches: if not matches:
matches = [self.create_metadata_element('contributor')] matches = [self.create_metadata_element('contributor')]
matches[0].set('{%s}role'%self.NAMESPACES['opf'], 'bkp') matches[0].set('{%s}role'%self.NAMESPACES['opf'], 'bkp')
self.set_text(matches[0], unicode_type(val)) self.set_text(matches[0], str(val))
def identifier_iter(self): def identifier_iter(self):
for item in self.identifier_path(self.metadata): for item in self.identifier_path(self.metadata):
@@ -1363,7 +1363,7 @@ class OPFCreator(Metadata):
self.page_progression_direction = None self.page_progression_direction = None
self.primary_writing_mode = None self.primary_writing_mode = None
if self.application_id is None: if self.application_id is None:
self.application_id = unicode_type(uuid.uuid4()) self.application_id = str(uuid.uuid4())
if not isinstance(self.toc, TOC): if not isinstance(self.toc, TOC):
self.toc = None self.toc = None
if not self.authors: if not self.authors:
@@ -1487,7 +1487,7 @@ class OPFCreator(Metadata):
a(DC_ELEM('contributor', '%s (%s) [%s]'%(__appname__, __version__, a(DC_ELEM('contributor', '%s (%s) [%s]'%(__appname__, __version__,
'https://calibre-ebook.com'), opf_attrs={'role':'bkp', 'https://calibre-ebook.com'), opf_attrs={'role':'bkp',
'file-as':__appname__})) 'file-as':__appname__}))
a(DC_ELEM('identifier', unicode_type(self.application_id), a(DC_ELEM('identifier', str(self.application_id),
opf_attrs={'scheme':__appname__}, opf_attrs={'scheme':__appname__},
dc_attrs={'id':__appname__+'_id'})) dc_attrs={'id':__appname__+'_id'}))
if getattr(self, 'pubdate', None) is not None: if getattr(self, 'pubdate', None) is not None:
@@ -1515,7 +1515,7 @@ class OPFCreator(Metadata):
if self.title_sort: if self.title_sort:
a(CAL_ELEM('calibre:title_sort', self.title_sort)) a(CAL_ELEM('calibre:title_sort', self.title_sort))
if self.rating is not None: if self.rating is not None:
a(CAL_ELEM('calibre:rating', unicode_type(self.rating))) a(CAL_ELEM('calibre:rating', str(self.rating)))
if self.timestamp is not None: if self.timestamp is not None:
a(CAL_ELEM('calibre:timestamp', self.timestamp.isoformat())) a(CAL_ELEM('calibre:timestamp', self.timestamp.isoformat()))
if self.publication_type is not None: if self.publication_type is not None:
@@ -1532,7 +1532,7 @@ class OPFCreator(Metadata):
href = ref.href() href = ref.href()
if isinstance(href, bytes): if isinstance(href, bytes):
href = href.decode('utf-8') href = href.decode('utf-8')
item = E.item(id=unicode_type(ref.id), href=href) item = E.item(id=str(ref.id), href=href)
item.set('media-type', ref.mime_type) item.set('media-type', ref.mime_type)
manifest.append(item) manifest.append(item)
spine = E.spine() spine = E.spine()
@@ -1583,10 +1583,10 @@ def metadata_to_opf(mi, as_string=True, default_lang=None):
from ebook_converter.ebooks.oeb.base import OPF, DC from ebook_converter.ebooks.oeb.base import OPF, DC
if not mi.application_id: if not mi.application_id:
mi.application_id = unicode_type(uuid.uuid4()) mi.application_id = str(uuid.uuid4())
if not mi.uuid: if not mi.uuid:
mi.uuid = unicode_type(uuid.uuid4()) mi.uuid = str(uuid.uuid4())
if not mi.book_producer: if not mi.book_producer:
mi.book_producer = __appname__ + ' (%s) '%__version__ + \ mi.book_producer = __appname__ + ' (%s) '%__version__ + \
@@ -1667,7 +1667,7 @@ def metadata_to_opf(mi, as_string=True, default_lang=None):
if mi.series_index is not None: if mi.series_index is not None:
meta('series_index', mi.format_series_index()) meta('series_index', mi.format_series_index())
if mi.rating is not None: if mi.rating is not None:
meta('rating', unicode_type(mi.rating)) meta('rating', str(mi.rating))
if hasattr(mi.timestamp, 'isoformat'): if hasattr(mi.timestamp, 'isoformat'):
meta('timestamp', isoformat(mi.timestamp)) meta('timestamp', isoformat(mi.timestamp))
if mi.publication_type: if mi.publication_type:
@@ -1682,7 +1682,7 @@ def metadata_to_opf(mi, as_string=True, default_lang=None):
metadata[-1].tail = '\n' +(' '*4) metadata[-1].tail = '\n' +(' '*4)
if mi.cover: if mi.cover:
if not isinstance(mi.cover, unicode_type): if not isinstance(mi.cover, str):
mi.cover = mi.cover.decode(filesystem_encoding) mi.cover = mi.cover.decode(filesystem_encoding)
guide.text = '\n'+(' '*8) guide.text = '\n'+(' '*8)
r = guide.makeelement(OPF('reference'), r = guide.makeelement(OPF('reference'),
+3 -3
View File
@@ -10,7 +10,7 @@ from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.ebooks.metadata import ( from ebook_converter.ebooks.metadata import (
MetaInformation, string_to_authors, check_isbn, check_doi) MetaInformation, string_to_authors, check_isbn, check_doi)
from ebook_converter.utils.ipc.simple_worker import fork_job, WorkerError from ebook_converter.utils.ipc.simple_worker import fork_job, WorkerError
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -94,8 +94,8 @@ def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg', pr
args['creationflags'] = w.HIGH_PRIORITY_CLASS | w.CREATE_NO_WINDOW args['creationflags'] = w.HIGH_PRIORITY_CLASS | w.CREATE_NO_WINDOW
try: try:
subprocess.check_call([ subprocess.check_call([
pdftoppm, '-cropbox', '-' + image_format, '-f', unicode_type(first), pdftoppm, '-cropbox', '-' + image_format, '-f', str(first),
'-l', unicode_type(last), pdfpath, os.path.join(outputdir, prefix) '-l', str(last), pdfpath, os.path.join(outputdir, prefix)
], **args) ], **args)
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
raise ValueError('Failed to render PDF, pdftoppm errorcode: %s'%e.returncode) raise ValueError('Failed to render PDF, pdftoppm errorcode: %s'%e.returncode)
+2 -2
View File
@@ -6,7 +6,7 @@ import re
from ebook_converter import force_unicode from ebook_converter import force_unicode
from ebook_converter.ebooks.metadata import MetaInformation from ebook_converter.ebooks.metadata import MetaInformation
from ebook_converter.polyglot.builtins import codepoint_to_chr, string_or_bytes, unicode_type, int_to_byte from ebook_converter.polyglot.builtins import codepoint_to_chr, string_or_bytes, int_to_byte
title_pat = re.compile(br'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL) title_pat = re.compile(br'\{\\info.*?\{\\title(.*?)(?<!\\)\}', re.DOTALL)
author_pat = re.compile(br'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL) author_pat = re.compile(br'\{\\info.*?\{\\author(.*?)(?<!\\)\}', re.DOTALL)
@@ -74,7 +74,7 @@ def detect_codepage(stream):
def encode(unistr): def encode(unistr):
if not isinstance(unistr, unicode_type): if not isinstance(unistr, str):
unistr = force_unicode(unistr) unistr = force_unicode(unistr)
return ''.join(c if ord(c) < 128 else '\\u{}?'.format(ord(c)) for c in unistr) return ''.join(c if ord(c) < 128 else '\\u{}?'.format(ord(c)) for c in unistr)
+6 -6
View File
@@ -12,7 +12,7 @@ from ebook_converter.constants import __appname__, __version__
from ebook_converter.ebooks.chardet import xml_to_unicode from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.utils.xml_parse import safe_xml_fromstring from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.utils.cleantext import clean_xml_chars from ebook_converter.utils.cleantext import clean_xml_chars
from ebook_converter.polyglot.builtins import unicode_type, getcwd from ebook_converter.polyglot.builtins import getcwd
from ebook_converter.polyglot.urllib import unquote from ebook_converter.polyglot.urllib import unquote
@@ -67,7 +67,7 @@ class TOC(list):
def __str__(self): def __str__(self):
lines = ['TOC: %s#%s %s'%(self.href, self.fragment, self.text)] lines = ['TOC: %s#%s %s'%(self.href, self.fragment, self.text)]
for child in self: for child in self:
c = unicode_type(child).splitlines() c = str(child).splitlines()
for l in c: for l in c:
lines.append('\t'+l) lines.append('\t'+l)
return '\n'.join(lines) return '\n'.join(lines)
@@ -245,8 +245,8 @@ class TOC(list):
def render(self, stream, uid): def render(self, stream, uid):
root = E.ncx( root = E.ncx(
E.head( E.head(
E.meta(name='dtb:uid', content=unicode_type(uid)), E.meta(name='dtb:uid', content=str(uid)),
E.meta(name='dtb:depth', content=unicode_type(self.depth())), E.meta(name='dtb:depth', content=str(self.depth())),
E.meta(name='dtb:generator', content='%s (%s)'%(__appname__, E.meta(name='dtb:generator', content='%s (%s)'%(__appname__,
__version__)), __version__)),
E.meta(name='dtb:totalPageCount', content='0'), E.meta(name='dtb:totalPageCount', content='0'),
@@ -268,10 +268,10 @@ class TOC(list):
text = clean_xml_chars(text) text = clean_xml_chars(text)
elem = E.navPoint( elem = E.navPoint(
E.navLabel(E.text(re.sub(r'\s+', ' ', text))), E.navLabel(E.text(re.sub(r'\s+', ' ', text))),
E.content(src=unicode_type(np.href)+(('#' + unicode_type(np.fragment)) E.content(src=str(np.href)+(('#' + str(np.fragment))
if np.fragment else '')), if np.fragment else '')),
id=item_id, id=item_id,
playOrder=unicode_type(np.play_order) playOrder=str(np.play_order)
) )
au = getattr(np, 'author', None) au = getattr(np, 'author', None)
if au: if au:
+10 -10
View File
@@ -11,7 +11,7 @@ from ebook_converter.ebooks.oeb.stylizer import Stylizer
from ebook_converter.ebooks.oeb.transforms.flatcss import KeyMapper from ebook_converter.ebooks.oeb.transforms.flatcss import KeyMapper
from ebook_converter.ebooks.mobi.utils import convert_color_for_font_tag from ebook_converter.ebooks.mobi.utils import convert_color_for_font_tag
from ebook_converter.utils.imghdr import identify from ebook_converter.utils.imghdr import identify
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes from ebook_converter.polyglot.builtins import string_or_bytes
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -151,7 +151,7 @@ class MobiMLizer(object):
return "%dem" % int(round(ptsize / embase)) return "%dem" % int(round(ptsize / embase))
def preize_text(self, text, pre_wrap=False): def preize_text(self, text, pre_wrap=False):
text = unicode_type(text) text = str(text)
if pre_wrap: if pre_wrap:
# Replace n consecutive spaces with n-1 NBSP + space # Replace n consecutive spaces with n-1 NBSP + space
text = re.sub(r' {2,}', lambda m:('\xa0'*(len(m.group())-1) + ' '), text) text = re.sub(r' {2,}', lambda m:('\xa0'*(len(m.group())-1) + ' '), text)
@@ -199,7 +199,7 @@ class MobiMLizer(object):
bstate.nested.append(para) bstate.nested.append(para)
if tag == 'li' and len(istates) > 1: if tag == 'li' and len(istates) > 1:
istates[-2].list_num += 1 istates[-2].list_num += 1
para.attrib['value'] = unicode_type(istates[-2].list_num) para.attrib['value'] = str(istates[-2].list_num)
elif tag in NESTABLE_TAGS and istate.rendered: elif tag in NESTABLE_TAGS and istate.rendered:
para = wrapper = bstate.nested[-1] para = wrapper = bstate.nested[-1]
elif not self.opts.mobi_ignore_margins and left > 0 and indent >= 0: elif not self.opts.mobi_ignore_margins and left > 0 and indent >= 0:
@@ -228,7 +228,7 @@ class MobiMLizer(object):
while vspace > 0: while vspace > 0:
wrapper.addprevious(etree.Element(XHTML('br'))) wrapper.addprevious(etree.Element(XHTML('br')))
vspace -= 1 vspace -= 1
if istate.halign != 'auto' and isinstance(istate.halign, (bytes, unicode_type)): if istate.halign != 'auto' and isinstance(istate.halign, (bytes, str)):
if isinstance(istate.halign, bytes): if isinstance(istate.halign, bytes):
istate.halign = istate.halign.decode('utf-8') istate.halign = istate.halign.decode('utf-8')
para.attrib['align'] = istate.halign para.attrib['align'] = istate.halign
@@ -285,7 +285,7 @@ class MobiMLizer(object):
if fsize != 3: if fsize != 3:
inline = etree.SubElement(inline, XHTML('font'), inline = etree.SubElement(inline, XHTML('font'),
size=unicode_type(fsize)) size=str(fsize))
if istate.family == 'monospace': if istate.family == 'monospace':
inline = etree.SubElement(inline, XHTML('tt')) inline = etree.SubElement(inline, XHTML('tt'))
if istate.italic: if istate.italic:
@@ -447,7 +447,7 @@ class MobiMLizer(object):
(72/self.profile.dpi))) (72/self.profile.dpi)))
except: except:
continue continue
result = unicode_type(pixs) result = str(pixs)
istate.attrib[prop] = result istate.attrib[prop] = result
if 'width' not in istate.attrib or 'height' not in istate.attrib: if 'width' not in istate.attrib or 'height' not in istate.attrib:
href = self.current_spine_item.abshref(elem.attrib['src']) href = self.current_spine_item.abshref(elem.attrib['src'])
@@ -464,8 +464,8 @@ class MobiMLizer(object):
else: else:
if 'width' not in istate.attrib and 'height' not in \ if 'width' not in istate.attrib and 'height' not in \
istate.attrib: istate.attrib:
istate.attrib['width'] = unicode_type(width) istate.attrib['width'] = str(width)
istate.attrib['height'] = unicode_type(height) istate.attrib['height'] = str(height)
else: else:
ar = width / height ar = width / height
if 'width' not in istate.attrib: if 'width' not in istate.attrib:
@@ -473,13 +473,13 @@ class MobiMLizer(object):
width = int(istate.attrib['height'])*ar width = int(istate.attrib['height'])*ar
except: except:
pass pass
istate.attrib['width'] = unicode_type(int(width)) istate.attrib['width'] = str(int(width))
else: else:
try: try:
height = int(istate.attrib['width'])/ar height = int(istate.attrib['width'])/ar
except: except:
pass pass
istate.attrib['height'] = unicode_type(int(height)) istate.attrib['height'] = str(int(height))
item.unload_data_from_memory() item.unload_data_from_memory()
elif tag == 'hr' and asfloat(style['width']) > 0 and style._get('width') not in {'100%', 'auto'}: elif tag == 'hr' and asfloat(style['width']) > 0 and style._get('width') not in {'100%', 'auto'}:
raww = style._get('width') raww = style._get('width')
@@ -8,7 +8,6 @@ from ebook_converter.ebooks.mobi.langcodes import main_language, sub_language, m
from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
from ebook_converter.utils.localization import canonicalize_lang from ebook_converter.utils.localization import canonicalize_lang
from ebook_converter.utils.config_base import tweaks from ebook_converter.utils.config_base import tweaks
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -245,7 +244,7 @@ class BookHeader(object):
self.exth_flag, = struct.unpack('>L', raw[0x80:0x84]) self.exth_flag, = struct.unpack('>L', raw[0x80:0x84])
self.exth = None self.exth = None
if not isinstance(self.title, unicode_type): if not isinstance(self.title, str):
self.title = self.title.decode(self.codec, 'replace') self.title = self.title.decode(self.codec, 'replace')
if self.exth_flag & 0x40: if self.exth_flag & 0x40:
try: try:
+1 -2
View File
@@ -1,7 +1,6 @@
import re, os import re, os
from ebook_converter.ebooks.chardet import strip_encoding_declarations from ebook_converter.ebooks.chardet import strip_encoding_declarations
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -127,7 +126,7 @@ def update_flow_links(mobi8_reader, resource_map, log):
flows.append(flow) flows.append(flow)
continue continue
if not isinstance(flow, unicode_type): if not isinstance(flow, str):
try: try:
flow = flow.decode(mr.header.codec) flow = flow.decode(mr.header.codec)
except UnicodeDecodeError: except UnicodeDecodeError:
+2 -2
View File
@@ -16,7 +16,7 @@ from ebook_converter.ebooks.metadata.toc import TOC
from ebook_converter.ebooks.mobi.reader.headers import BookHeader from ebook_converter.ebooks.mobi.reader.headers import BookHeader
from ebook_converter.utils.img import save_cover_data_to, gif_data_to_png_data, AnimatedGIF from ebook_converter.utils.img import save_cover_data_to, gif_data_to_png_data, AnimatedGIF
from ebook_converter.utils.imghdr import what from ebook_converter.utils.imghdr import what
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -287,7 +287,7 @@ class MobiReader(object):
pass pass
def write_as_utf8(path, data): def write_as_utf8(path, data):
if isinstance(data, unicode_type): if isinstance(data, str):
data = data.encode('utf-8') data = data.encode('utf-8')
with lopen(path, 'wb') as f: with lopen(path, 'wb') as f:
f.write(data) f.write(data)
+2 -2
View File
@@ -18,7 +18,7 @@ from ebook_converter.ebooks.metadata.toc import TOC
from ebook_converter.ebooks.mobi.utils import read_font_record from ebook_converter.ebooks.mobi.utils import read_font_record
from ebook_converter.ebooks.oeb.parse_utils import parse_html from ebook_converter.ebooks.oeb.parse_utils import parse_html
from ebook_converter.ebooks.oeb.base import XPath, XHTML, xml2text from ebook_converter.ebooks.oeb.base import XPath, XHTML, xml2text
from ebook_converter.polyglot.builtins import unicode_type, getcwd, as_unicode from ebook_converter.polyglot.builtins import getcwd, as_unicode
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -224,7 +224,7 @@ class Mobi8Reader(object):
self.parts.append(skeleton) self.parts.append(skeleton)
if divcnt < 1: if divcnt < 1:
# Empty file # Empty file
aidtext = unicode_type(uuid.uuid4()) aidtext = str(uuid.uuid4())
filename = aidtext + '.html' filename = aidtext + '.html'
self.partinfo.append(Part(skelnum, 'text', filename, skelpos, self.partinfo.append(Part(skelnum, 'text', filename, skelpos,
baseptr, aidtext)) baseptr, aidtext))
+6 -6
View File
@@ -5,7 +5,7 @@ from io import BytesIO
from ebook_converter.utils.img import save_cover_data_to, scale_image, image_to_data, image_from_data, resize_image, png_data_to_gif_data from ebook_converter.utils.img import save_cover_data_to, scale_image, image_to_data, image_from_data, resize_image, png_data_to_gif_data
from ebook_converter.utils.imghdr import what from ebook_converter.utils.imghdr import what
from ebook_converter.ebooks import normalize from ebook_converter.ebooks import normalize
from ebook_converter.polyglot.builtins import unicode_type, as_bytes from ebook_converter.polyglot.builtins import as_bytes
from ebook_converter.tinycss.color3 import parse_color_string from ebook_converter.tinycss.color3 import parse_color_string
@@ -20,17 +20,17 @@ RECORD_SIZE = 0x1000 # 4096 (Text record size (uncompressed))
class PolyglotDict(dict): class PolyglotDict(dict):
def __setitem__(self, key, val): def __setitem__(self, key, val):
if isinstance(key, unicode_type): if isinstance(key, str):
key = key.encode('utf-8') key = key.encode('utf-8')
dict.__setitem__(self, key, val) dict.__setitem__(self, key, val)
def __getitem__(self, key): def __getitem__(self, key):
if isinstance(key, unicode_type): if isinstance(key, str):
key = key.encode('utf-8') key = key.encode('utf-8')
return dict.__getitem__(self, key) return dict.__getitem__(self, key)
def __contains__(self, key): def __contains__(self, key):
if isinstance(key, unicode_type): if isinstance(key, str):
key = key.encode('utf-8') key = key.encode('utf-8')
return dict.__contains__(self, key) return dict.__contains__(self, key)
@@ -332,7 +332,7 @@ def utf8_text(text):
''' '''
if text and text.strip(): if text and text.strip():
text = text.strip() text = text.strip()
if not isinstance(text, unicode_type): if not isinstance(text, str):
text = text.decode('utf-8', 'replace') text = text.decode('utf-8', 'replace')
text = normalize(text).encode('utf-8') text = normalize(text).encode('utf-8')
else: else:
@@ -635,7 +635,7 @@ def is_guide_ref_start(ref):
def convert_color_for_font_tag(val): def convert_color_for_font_tag(val):
rgba = parse_color_string(unicode_type(val or '')) rgba = parse_color_string(str(val or ''))
if rgba is None or rgba == 'currentColor': if rgba is None or rgba == 'currentColor':
return val return val
clamp = lambda x: min(x, max(0, x), 1) clamp = lambda x: min(x, max(0, x), 1)
+5 -5
View File
@@ -10,7 +10,7 @@ from ebook_converter.ebooks.mobi.writer2 import (PALMDOC, UNCOMPRESSED)
from ebook_converter.ebooks.mobi.utils import (encint, encode_trailing_data, from ebook_converter.ebooks.mobi.utils import (encint, encode_trailing_data,
align_block, detect_periodical, RECORD_SIZE, create_text_record) align_block, detect_periodical, RECORD_SIZE, create_text_record)
from ebook_converter.ebooks.mobi.writer2.indexer import Indexer from ebook_converter.ebooks.mobi.writer2.indexer import Indexer
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -48,7 +48,7 @@ class MobiWriter(object):
self.log = oeb.log self.log = oeb.log
pt = None pt = None
if oeb.metadata.publication_type: if oeb.metadata.publication_type:
x = unicode_type(oeb.metadata.publication_type[0]).split(':') x = str(oeb.metadata.publication_type[0]).split(':')
if len(x) > 1: if len(x) > 1:
pt = x[1].lower() pt = x[1].lower()
self.publication_type = pt self.publication_type = pt
@@ -235,7 +235,7 @@ class MobiWriter(object):
0 # Unused 0 # Unused
)) # 0 - 15 (0x0 - 0xf) )) # 0 - 15 (0x0 - 0xf)
uid = random.randint(0, 0xffffffff) uid = random.randint(0, 0xffffffff)
title = normalize(unicode_type(metadata.title[0])).encode('utf-8') title = normalize(str(metadata.title[0])).encode('utf-8')
# 0x0 - 0x3 # 0x0 - 0x3
record0.write(b'MOBI') record0.write(b'MOBI')
@@ -278,7 +278,7 @@ class MobiWriter(object):
# 0x4c - 0x4f : Language specifier # 0x4c - 0x4f : Language specifier
record0.write(iana2mobi( record0.write(iana2mobi(
unicode_type(metadata.language[0]))) str(metadata.language[0])))
# 0x50 - 0x57 : Input language and Output language # 0x50 - 0x57 : Input language and Output language
record0.write(b'\0' * 8) record0.write(b'\0' * 8)
@@ -455,7 +455,7 @@ class MobiWriter(object):
''' '''
Write the PalmDB header Write the PalmDB header
''' '''
title = ascii_filename(unicode_type(self.oeb.metadata.title[0])).replace( title = ascii_filename(str(self.oeb.metadata.title[0])).replace(
' ', '_') ' ', '_')
if not isinstance(title, bytes): if not isinstance(title, bytes):
title = title.encode('ascii') title = title.encode('ascii')
@@ -8,7 +8,7 @@ from ebook_converter.ebooks import generate_masthead
from ebook_converter.ebooks.oeb.base import OEB_RASTER_IMAGES from ebook_converter.ebooks.oeb.base import OEB_RASTER_IMAGES
from ebook_converter.ptempfile import PersistentTemporaryFile from ebook_converter.ptempfile import PersistentTemporaryFile
from ebook_converter.utils.imghdr import what from ebook_converter.utils.imghdr import what
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -79,7 +79,7 @@ class Resources(object):
self.image_indices.add(0) self.image_indices.add(0)
elif self.is_periodical: elif self.is_periodical:
# Generate a default masthead # Generate a default masthead
data = generate_masthead(unicode_type(self.oeb.metadata['title'][0])) data = generate_masthead(str(self.oeb.metadata['title'][0]))
self.records.append(data) self.records.append(data)
self.used_image_indices.add(0) self.used_image_indices.add(0)
self.image_indices.add(0) self.image_indices.add(0)
@@ -87,8 +87,8 @@ class Resources(object):
cover_href = self.cover_offset = self.thumbnail_offset = None cover_href = self.cover_offset = self.thumbnail_offset = None
if (oeb.metadata.cover and if (oeb.metadata.cover and
unicode_type(oeb.metadata.cover[0]) in oeb.manifest.ids): str(oeb.metadata.cover[0]) in oeb.manifest.ids):
cover_id = unicode_type(oeb.metadata.cover[0]) cover_id = str(oeb.metadata.cover[0])
item = oeb.manifest.ids[cover_id] item = oeb.manifest.ids[cover_id]
cover_href = item.href cover_href = item.href
@@ -9,7 +9,7 @@ from ebook_converter.ebooks.mobi.utils import is_guide_ref_start
from ebook_converter.ebooks.oeb.base import ( from ebook_converter.ebooks.oeb.base import (
OEB_DOCS, XHTML, XHTML_NS, XML_NS, namespace, prefixname, urlnormalize OEB_DOCS, XHTML, XHTML_NS, XML_NS, namespace, prefixname, urlnormalize
) )
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes from ebook_converter.polyglot.builtins import string_or_bytes
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -20,7 +20,7 @@ __docformat__ = 'restructuredtext en'
class Buf(io.BytesIO): class Buf(io.BytesIO):
def write(self, x): def write(self, x):
if isinstance(x, unicode_type): if isinstance(x, str):
x = x.encode('utf-8') x = x.encode('utf-8')
io.BytesIO.write(self, x) io.BytesIO.write(self, x)
@@ -226,7 +226,7 @@ class Serializer(object):
buf.write(b'<div> <div height="1em"></div>') buf.write(b'<div> <div height="1em"></div>')
else: else:
t = tocref.title t = tocref.title
if isinstance(t, unicode_type): if isinstance(t, str):
t = t.encode('utf-8') t = t.encode('utf-8')
buf.write(b'<div></div> <div> <h2 height="1em"><font size="+2"><b>' + t + buf.write(b'<div></div> <div> <h2 height="1em"><font size="+2"><b>' + t +
b'</b></font></h2> <div height="1em"></div>') b'</b></font></h2> <div height="1em"></div>')
@@ -246,7 +246,7 @@ class Serializer(object):
buf.write(b'0000000000') buf.write(b'0000000000')
buf.write(b' ><font size="+1"><b><u>') buf.write(b' ><font size="+1"><b><u>')
t = tocitem.title t = tocitem.title
if isinstance(t, unicode_type): if isinstance(t, str):
t = t.encode('utf-8') t = t.encode('utf-8')
buf.write(t) buf.write(t)
buf.write(b'</u></b></font></a></li>') buf.write(b'</u></b></font></a></li>')
@@ -364,7 +364,7 @@ class Serializer(object):
text = text.replace(u'\u00AD', '') # Soft-hyphen text = text.replace(u'\u00AD', '') # Soft-hyphen
if quot: if quot:
text = text.replace('"', '&quot;') text = text.replace('"', '&quot;')
if isinstance(text, unicode_type): if isinstance(text, str):
text = unicodedata.normalize('NFC', text) text = unicodedata.normalize('NFC', text)
self.buf.write(text.encode('utf-8')) self.buf.write(text.encode('utf-8'))
+11 -11
View File
@@ -6,7 +6,7 @@ from ebook_converter.constants import iswindows, isosx
from ebook_converter.ebooks.mobi.utils import (utf8_text, to_base) from ebook_converter.ebooks.mobi.utils import (utf8_text, to_base)
from ebook_converter.utils.localization import lang_as_iso639_1 from ebook_converter.utils.localization import lang_as_iso639_1
from ebook_converter.ebooks.metadata import authors_to_sort_string from ebook_converter.ebooks.metadata import authors_to_sort_string
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -59,14 +59,14 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
items = metadata[term] items = metadata[term]
if term == 'creator': if term == 'creator':
if prefer_author_sort: if prefer_author_sort:
creators = [authors_to_sort_string([unicode_type(c)]) for c in creators = [authors_to_sort_string([str(c)]) for c in
items] items]
else: else:
creators = [unicode_type(c) for c in items] creators = [str(c) for c in items]
items = creators items = creators
elif term == 'rights': elif term == 'rights':
try: try:
rights = utf8_text(unicode_type(metadata.rights[0])) rights = utf8_text(str(metadata.rights[0]))
except: except:
rights = b'Unknown' rights = b'Unknown'
exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8)) exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8))
@@ -75,7 +75,7 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
continue continue
for item in items: for item in items:
data = unicode_type(item) data = str(item)
if term != 'description': if term != 'description':
data = COLLAPSE_RE.sub(' ', data) data = COLLAPSE_RE.sub(' ', data)
if term == 'identifier': if term == 'identifier':
@@ -99,14 +99,14 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
from ebook_converter.ebooks.oeb.base import OPF from ebook_converter.ebooks.oeb.base import OPF
for x in metadata['identifier']: for x in metadata['identifier']:
if (x.get(OPF('scheme'), None).lower() == 'uuid' or if (x.get(OPF('scheme'), None).lower() == 'uuid' or
unicode_type(x).startswith('urn:uuid:')): str(x).startswith('urn:uuid:')):
uuid = unicode_type(x).split(':')[-1] uuid = str(x).split(':')[-1]
break break
if uuid is None: if uuid is None:
from uuid import uuid4 from uuid import uuid4
uuid = unicode_type(uuid4()) uuid = str(uuid4())
if isinstance(uuid, unicode_type): if isinstance(uuid, str):
uuid = uuid.encode('utf-8') uuid = uuid.encode('utf-8')
if not share_not_sync: if not share_not_sync:
exth.write(pack(b'>II', 113, len(uuid) + 8)) exth.write(pack(b'>II', 113, len(uuid) + 8))
@@ -134,9 +134,9 @@ def build_exth(metadata, prefer_author_sort=False, is_periodical=False,
# Add a publication date entry # Add a publication date entry
if metadata['date']: if metadata['date']:
datestr = unicode_type(metadata['date'][0]) datestr = str(metadata['date'][0])
elif metadata['timestamp']: elif metadata['timestamp']:
datestr = unicode_type(metadata['timestamp'][0]) datestr = str(metadata['timestamp'][0])
if datestr is None: if datestr is None:
raise ValueError("missing date or timestamp") raise ValueError("missing date or timestamp")
+2 -2
View File
@@ -15,7 +15,7 @@ from odf.namespaces import TEXTNS as odTEXTNS
from ebook_converter import CurrentDir, walk from ebook_converter import CurrentDir, walk
from ebook_converter.ebooks.oeb.base import _css_logger from ebook_converter.ebooks.oeb.base import _css_logger
from ebook_converter.utils.xml_parse import safe_xml_fromstring from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes, getcwd, as_bytes from ebook_converter.polyglot.builtins import string_or_bytes, getcwd, as_bytes
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -174,7 +174,7 @@ class Extract(ODF2XHTML):
css = style.text css = style.text
if css: if css:
css, sel_map = self.do_filter_css(css) css, sel_map = self.do_filter_css(css)
if not isinstance(css, unicode_type): if not isinstance(css, str):
css = css.decode('utf-8', 'ignore') css = css.decode('utf-8', 'ignore')
style.text = css style.text = css
for x in root.xpath('//*[@class]'): for x in root.xpath('//*[@class]'):
+28 -28
View File
@@ -17,7 +17,7 @@ from ebook_converter import (isbytestring, as_unicode, get_types_map)
from ebook_converter.ebooks.oeb.parse_utils import barename, XHTML_NS, namespace, XHTML, parse_html, NotHTML from ebook_converter.ebooks.oeb.parse_utils import barename, XHTML_NS, namespace, XHTML, parse_html, NotHTML
from ebook_converter.utils.cleantext import clean_xml_chars from ebook_converter.utils.cleantext import clean_xml_chars
from ebook_converter.utils.short_uuid import uuid4 from ebook_converter.utils.short_uuid import uuid4
from ebook_converter.polyglot.builtins import iteritems, unicode_type, string_or_bytes, itervalues, codepoint_to_chr from ebook_converter.polyglot.builtins import iteritems, string_or_bytes, itervalues, codepoint_to_chr
from ebook_converter.polyglot.urllib import unquote as urlunquote from ebook_converter.polyglot.urllib import unquote as urlunquote
@@ -121,7 +121,7 @@ def as_string_type(pat, for_unicode):
if isinstance(pat, bytes): if isinstance(pat, bytes):
pat = pat.decode('utf-8') pat = pat.decode('utf-8')
else: else:
if isinstance(pat, unicode_type): if isinstance(pat, str):
pat = pat.encode('utf-8') pat = pat.encode('utf-8')
return pat return pat
@@ -140,7 +140,7 @@ def self_closing_pat(for_unicode):
def close_self_closing_tags(raw): def close_self_closing_tags(raw):
for_unicode = isinstance(raw, unicode_type) for_unicode = isinstance(raw, str)
repl = as_string_type(r'<\g<tag>\g<arg>></\g<tag>>', for_unicode) repl = as_string_type(r'<\g<tag>\g<arg>></\g<tag>>', for_unicode)
pat = self_closing_pat(for_unicode) pat = self_closing_pat(for_unicode)
return pat.sub(repl, raw) return pat.sub(repl, raw)
@@ -421,11 +421,11 @@ def serialize(data, media_type, pretty_print=False):
# incorrectly by some browser based renderers # incorrectly by some browser based renderers
ans = close_self_closing_tags(ans) ans = close_self_closing_tags(ans)
return ans return ans
if isinstance(data, unicode_type): if isinstance(data, str):
return data.encode('utf-8') return data.encode('utf-8')
if hasattr(data, 'cssText'): if hasattr(data, 'cssText'):
data = data.cssText data = data.cssText
if isinstance(data, unicode_type): if isinstance(data, str):
data = data.encode('utf-8') data = data.encode('utf-8')
return data + b'\n' return data + b'\n'
return bytes(data) return bytes(data)
@@ -567,7 +567,7 @@ class DirContainer(object):
# If it runs on a unicode object, it returns a double encoded unicode # If it runs on a unicode object, it returns a double encoded unicode
# string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8') # string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
# and the latter is correct # and the latter is correct
if isinstance(path, unicode_type): if isinstance(path, str):
path = path.encode('utf-8') path = path.encode('utf-8')
return urlunquote(path).decode('utf-8') return urlunquote(path).decode('utf-8')
@@ -759,7 +759,7 @@ class Metadata(object):
return as_unicode(self.value) return as_unicode(self.value)
else: else:
def __str__(self): def __str__(self):
return unicode_type(self.value).encode('ascii', 'xmlcharrefreplace') return str(self.value).encode('ascii', 'xmlcharrefreplace')
def __unicode__(self): def __unicode__(self):
return as_unicode(self.value) return as_unicode(self.value)
@@ -918,9 +918,9 @@ class Manifest(object):
""" """
def __init__(self, oeb, id, href, media_type, def __init__(self, oeb, id, href, media_type,
fallback=None, loader=unicode_type, data=None): fallback=None, loader=str, data=None):
if href: if href:
href = unicode_type(href) href = str(href)
self.oeb = oeb self.oeb = oeb
self.id = id self.id = id
self.href = self.path = urlnormalize(href) self.href = self.path = urlnormalize(href)
@@ -973,7 +973,7 @@ class Manifest(object):
title = self.oeb.metadata.title title = self.oeb.metadata.title
if title: if title:
title = unicode_type(title[0]) title = str(title[0])
else: else:
title = _('Unknown') title = _('Unknown')
@@ -1006,7 +1006,7 @@ class Manifest(object):
self.oeb.logger.warn('CSS import of non-CSS file %r' % path) self.oeb.logger.warn('CSS import of non-CSS file %r' % path)
return (None, None) return (None, None)
data = item.data.cssText data = item.data.cssText
enc = None if isinstance(data, unicode_type) else 'utf-8' enc = None if isinstance(data, str) else 'utf-8'
return (enc, data) return (enc, data)
# }}} # }}}
@@ -1087,11 +1087,11 @@ class Manifest(object):
data = self.data data = self.data
if isinstance(data, etree._Element): if isinstance(data, etree._Element):
return xml2text(data, pretty_print=self.oeb.pretty_print) return xml2text(data, pretty_print=self.oeb.pretty_print)
if isinstance(data, unicode_type): if isinstance(data, str):
return data return data
if hasattr(data, 'cssText'): if hasattr(data, 'cssText'):
return css_text(data) return css_text(data)
return unicode_type(data) return str(data)
@property @property
def bytes_representation(self): def bytes_representation(self):
@@ -1211,7 +1211,7 @@ class Manifest(object):
base = id base = id
index = 1 index = 1
while id in self.ids: while id in self.ids:
id = base + unicode_type(index) id = base + str(index)
index += 1 index += 1
if href is not None: if href is not None:
href = urlnormalize(href) href = urlnormalize(href)
@@ -1219,9 +1219,9 @@ class Manifest(object):
index = 1 index = 1
lhrefs = {x.lower() for x in self.hrefs} lhrefs = {x.lower() for x in self.hrefs}
while href.lower() in lhrefs: while href.lower() in lhrefs:
href = base + unicode_type(index) + ext href = base + str(index) + ext
index += 1 index += 1
return id, unicode_type(href) return id, str(href)
def __iter__(self): def __iter__(self):
for item in self.items: for item in self.items:
@@ -1435,7 +1435,7 @@ class Guide(object):
def add(self, type, title, href): def add(self, type, title, href):
"""Add a new reference to the `Guide`.""" """Add a new reference to the `Guide`."""
if href: if href:
href = unicode_type(href) href = str(href)
ref = self.Reference(self.oeb, type, title, href) ref = self.Reference(self.oeb, type, title, href)
self.refs[type] = ref self.refs[type] = ref
return ref return ref
@@ -1641,7 +1641,7 @@ class TOC(object):
po = node.play_order po = node.play_order
if po == 0: if po == 0:
po = 1 po = 1
attrib = {'id': id, 'playOrder': unicode_type(po)} attrib = {'id': id, 'playOrder': str(po)}
if node.klass: if node.klass:
attrib['class'] = node.klass attrib['class'] = node.klass
point = element(parent, NCX('navPoint'), attrib=attrib) point = element(parent, NCX('navPoint'), attrib=attrib)
@@ -1712,7 +1712,7 @@ class PageList(object):
TYPES = {'front', 'normal', 'special'} TYPES = {'front', 'normal', 'special'}
def __init__(self, name, href, type='normal', klass=None, id=None): def __init__(self, name, href, type='normal', klass=None, id=None):
self.name = unicode_type(name) self.name = str(name)
self.href = urlnormalize(href) self.href = urlnormalize(href)
self.type = type if type in self.TYPES else 'normal' self.type = type if type in self.TYPES else 'normal'
self.id = id self.id = id
@@ -1749,7 +1749,7 @@ class PageList(object):
for page in self.pages: for page in self.pages:
id = page.id or uuid_id() id = page.id or uuid_id()
type = page.type type = page.type
value = unicode_type(next(values[type])) value = str(next(values[type]))
attrib = {'id': id, 'value': value, 'type': type, 'playOrder': '0'} attrib = {'id': id, 'value': value, 'type': type, 'playOrder': '0'}
if page.klass: if page.klass:
attrib['class'] = page.klass attrib['class'] = page.klass
@@ -1848,7 +1848,7 @@ class OEBBook(object):
"""Automatically decode :param:`data` into a `unicode` object.""" """Automatically decode :param:`data` into a `unicode` object."""
def fix_data(d): def fix_data(d):
return d.replace('\r\n', '\n').replace('\r', '\n') return d.replace('\r\n', '\n').replace('\r', '\n')
if isinstance(data, unicode_type): if isinstance(data, str):
return fix_data(data) return fix_data(data)
bom_enc = None bom_enc = None
if data[:4] in (b'\0\0\xfe\xff', b'\xff\xfe\0\0'): if data[:4] in (b'\0\0\xfe\xff', b'\xff\xfe\0\0'):
@@ -1922,36 +1922,36 @@ class OEBBook(object):
for i, elem in enumerate(xpath(ncx, '//*[@playOrder and ./ncx:content[@src]]')): for i, elem in enumerate(xpath(ncx, '//*[@playOrder and ./ncx:content[@src]]')):
href = urlnormalize(selector(elem)[0]) href = urlnormalize(selector(elem)[0])
order = playorder.get(href, i) order = playorder.get(href, i)
elem.attrib['playOrder'] = unicode_type(order) elem.attrib['playOrder'] = str(order)
return return
def _to_ncx(self): def _to_ncx(self):
lang = unicode_type(self.metadata.language[0]) lang = str(self.metadata.language[0])
lang = lang.replace('_', '-') lang = lang.replace('_', '-')
ncx = etree.Element(NCX('ncx'), ncx = etree.Element(NCX('ncx'),
attrib={'version': '2005-1', XML('lang'): lang}, attrib={'version': '2005-1', XML('lang'): lang},
nsmap={None: NCX_NS}) nsmap={None: NCX_NS})
head = etree.SubElement(ncx, NCX('head')) head = etree.SubElement(ncx, NCX('head'))
etree.SubElement(head, NCX('meta'), etree.SubElement(head, NCX('meta'),
name='dtb:uid', content=unicode_type(self.uid)) name='dtb:uid', content=str(self.uid))
etree.SubElement(head, NCX('meta'), etree.SubElement(head, NCX('meta'),
name='dtb:depth', content=unicode_type(self.toc.depth())) name='dtb:depth', content=str(self.toc.depth()))
generator = ''.join(['calibre (', __version__, ')']) generator = ''.join(['calibre (', __version__, ')'])
etree.SubElement(head, NCX('meta'), etree.SubElement(head, NCX('meta'),
name='dtb:generator', content=generator) name='dtb:generator', content=generator)
etree.SubElement(head, NCX('meta'), etree.SubElement(head, NCX('meta'),
name='dtb:totalPageCount', content=unicode_type(len(self.pages))) name='dtb:totalPageCount', content=str(len(self.pages)))
maxpnum = etree.SubElement(head, NCX('meta'), maxpnum = etree.SubElement(head, NCX('meta'),
name='dtb:maxPageNumber', content='0') name='dtb:maxPageNumber', content='0')
title = etree.SubElement(ncx, NCX('docTitle')) title = etree.SubElement(ncx, NCX('docTitle'))
text = etree.SubElement(title, NCX('text')) text = etree.SubElement(title, NCX('text'))
text.text = unicode_type(self.metadata.title[0]) text.text = str(self.metadata.title[0])
navmap = etree.SubElement(ncx, NCX('navMap')) navmap = etree.SubElement(ncx, NCX('navMap'))
self.toc.to_ncx(navmap) self.toc.to_ncx(navmap)
if len(self.pages) > 0: if len(self.pages) > 0:
plist = self.pages.to_ncx(ncx) plist = self.pages.to_ncx(ncx)
value = max(int(x) for x in xpath(plist, '//@value')) value = max(int(x) for x in xpath(plist, '//@value'))
maxpnum.attrib['content'] = unicode_type(value) maxpnum.attrib['content'] = str(value)
self._update_playorder(ncx) self._update_playorder(ncx)
return ncx return ncx
+2 -2
View File
@@ -5,7 +5,7 @@ from css_parser.css import PropertyValue
from css_parser import profile as cssprofiles, CSSParser from css_parser import profile as cssprofiles, CSSParser
from ebook_converter.tinycss.fonts3 import parse_font, serialize_font_family from ebook_converter.tinycss.fonts3 import parse_font, serialize_font_family
from ebook_converter.ebooks.oeb.base import css_text from ebook_converter.ebooks.oeb.base import css_text
from ebook_converter.polyglot.builtins import iteritems, string_or_bytes, unicode_type from ebook_converter.polyglot.builtins import iteritems, string_or_bytes
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -390,7 +390,7 @@ def test_normalization(return_tests=False): # {{{
tuple('0 0 0 0'.split()) : '0', tuple('0 0 0 0'.split()) : '0',
}): }):
for prefix in ('margin', 'padding'): for prefix in ('margin', 'padding'):
css = {'%s-%s' % (prefix, x) : unicode_type(y)+'pt' if isinstance(y, numbers.Number) else y css = {'%s-%s' % (prefix, x) : str(y)+'pt' if isinstance(y, numbers.Number) else y
for x, y in zip(('left', 'top', 'right', 'bottom'), s)} for x, y in zip(('left', 'top', 'right', 'bottom'), s)}
css = '; '.join(('%s:%s' % (k, v) for k, v in iteritems(css))) css = '; '.join(('%s:%s' % (k, v) for k, v in iteritems(css)))
style = parseStyle(css) style = parseStyle(css)
+3 -3
View File
@@ -6,7 +6,7 @@ from ebook_converter import xml_replace_entities, force_unicode
from ebook_converter.utils.xml_parse import safe_xml_fromstring from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.constants import filesystem_encoding from ebook_converter.constants import filesystem_encoding
from ebook_converter.ebooks.chardet import xml_to_unicode, strip_encoding_declarations from ebook_converter.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
from ebook_converter.polyglot.builtins import iteritems, itervalues, unicode_type, string_or_bytes from ebook_converter.polyglot.builtins import iteritems, itervalues, string_or_bytes
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -162,7 +162,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
filename = force_unicode(filename, enc=filesystem_encoding) filename = force_unicode(filename, enc=filesystem_encoding)
if not isinstance(data, unicode_type): if not isinstance(data, str):
if decoder is not None: if decoder is not None:
data = decoder(data) data = decoder(data)
else: else:
@@ -244,7 +244,7 @@ def parse_html(data, log=None, decoder=None, preprocessor=None,
nroot = safe_xml_fromstring('<html></html>') nroot = safe_xml_fromstring('<html></html>')
has_body = False has_body = False
for child in list(data): for child in list(data):
if isinstance(child.tag, (unicode_type, bytes)) and barename(child.tag) == 'body': if isinstance(child.tag, (str, bytes)) and barename(child.tag) == 'body':
has_body = True has_body = True
break break
parent = nroot parent = nroot
@@ -49,7 +49,7 @@ from ebook_converter.utils.ipc.simple_worker import WorkerError, fork_job
from ebook_converter.utils.logging import default_log from ebook_converter.utils.logging import default_log
from ebook_converter.utils.xml_parse import safe_xml_fromstring from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.utils.zipfile import ZipFile from ebook_converter.utils.zipfile import ZipFile
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems
exists, join, relpath = os.path.exists, os.path.join, os.path.relpath exists, join, relpath = os.path.exists, os.path.join, os.path.relpath
@@ -163,7 +163,7 @@ class ContainerBase(object): # {{{
""" """
def fix_data(d): def fix_data(d):
return d.replace('\r\n', '\n').replace('\r', '\n') return d.replace('\r\n', '\n').replace('\r', '\n')
if isinstance(data, unicode_type): if isinstance(data, str):
return fix_data(data) return fix_data(data)
bom_enc = None bom_enc = None
if data[:4] in {b'\0\0\xfe\xff', b'\xff\xfe\0\0'}: if data[:4] in {b'\0\0\xfe\xff', b'\xff\xfe\0\0'}:
@@ -681,7 +681,7 @@ class Container(ContainerBase): # {{{
''' The names of all manifest items whose media-type matches predicate. ''' The names of all manifest items whose media-type matches predicate.
`predicate` can be a set, a list, a string or a function taking a single `predicate` can be a set, a list, a string or a function taking a single
argument, which will be called with the media-type. ''' argument, which will be called with the media-type. '''
if isinstance(predicate, unicode_type): if isinstance(predicate, str):
predicate = predicate.__eq__ predicate = predicate.__eq__
elif hasattr(predicate, '__contains__'): elif hasattr(predicate, '__contains__'):
predicate = predicate.__contains__ predicate = predicate.__contains__
+3 -3
View File
@@ -10,7 +10,7 @@ from ebook_converter.ebooks.oeb.normalize_css import normalize_filter_css, norma
from ebook_converter.ebooks.oeb.polish.pretty import pretty_script_or_style, pretty_xml_tree, serialize from ebook_converter.ebooks.oeb.polish.pretty import pretty_script_or_style, pretty_xml_tree, serialize
from ebook_converter.utils.icu import numeric_sort_key from ebook_converter.utils.icu import numeric_sort_key
from ebook_converter.css_selectors import Select, SelectorError from ebook_converter.css_selectors import Select, SelectorError
from ebook_converter.polyglot.builtins import iteritems, itervalues, unicode_type from ebook_converter.polyglot.builtins import iteritems, itervalues
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -342,10 +342,10 @@ def sort_sheet(container, sheet_or_text):
''' Sort the rules in a stylesheet. Note that in the general case this can ''' Sort the rules in a stylesheet. Note that in the general case this can
change the effective styles, but for most common sheets, it should be safe. change the effective styles, but for most common sheets, it should be safe.
''' '''
sheet = container.parse_css(sheet_or_text) if isinstance(sheet_or_text, unicode_type) else sheet_or_text sheet = container.parse_css(sheet_or_text) if isinstance(sheet_or_text, str) else sheet_or_text
def text_sort_key(x): def text_sort_key(x):
return numeric_sort_key(unicode_type(x or '')) return numeric_sort_key(str(x or ''))
def selector_sort_key(x): def selector_sort_key(x):
return (x.specificity, text_sort_key(x.selectorText)) return (x.specificity, text_sort_key(x.selectorText))
+1 -2
View File
@@ -7,7 +7,6 @@ from ebook_converter import xml_replace_entities
from ebook_converter.utils.xml_parse import safe_xml_fromstring from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.ebooks.chardet import xml_to_unicode, strip_encoding_declarations from ebook_converter.ebooks.chardet import xml_to_unicode, strip_encoding_declarations
from ebook_converter.utils.cleantext import clean_xml_chars from ebook_converter.utils.cleantext import clean_xml_chars
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -81,7 +80,7 @@ def parse(raw, decoder=None, log=None, line_numbers=True, linenumber_attribute=N
if linenumber_attribute: if linenumber_attribute:
for elem in ans.iter(LxmlElement): for elem in ans.iter(LxmlElement):
if elem.sourceline is not None: if elem.sourceline is not None:
elem.set(linenumber_attribute, unicode_type(elem.sourceline)) elem.set(linenumber_attribute, str(elem.sourceline))
return ans return ans
except Exception: except Exception:
if log is not None: if log is not None:
+3 -3
View File
@@ -6,7 +6,7 @@ from ebook_converter.ebooks.oeb.base import barename, XPNSMAP, XPath, OPF, XHTML
from ebook_converter.ebooks.oeb.polish.errors import MalformedMarkup from ebook_converter.ebooks.oeb.polish.errors import MalformedMarkup
from ebook_converter.ebooks.oeb.polish.toc import node_from_loc from ebook_converter.ebooks.oeb.polish.toc import node_from_loc
from ebook_converter.ebooks.oeb.polish.replace import LinkRebaser from ebook_converter.ebooks.oeb.polish.replace import LinkRebaser
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -181,7 +181,7 @@ def split(container, name, loc_or_xpath, before=True, totals=None):
''' '''
root = container.parsed(name) root = container.parsed(name)
if isinstance(loc_or_xpath, unicode_type): if isinstance(loc_or_xpath, str):
split_point = root.xpath(loc_or_xpath)[0] split_point = root.xpath(loc_or_xpath)[0]
else: else:
try: try:
@@ -279,7 +279,7 @@ def multisplit(container, name, xpath, before=True):
raise AbortError('Cannot split on the <body> tag') raise AbortError('Cannot split on the <body> tag')
for i, tag in enumerate(nodes): for i, tag in enumerate(nodes):
tag.set('calibre-split-point', unicode_type(i)) tag.set('calibre-split-point', str(i))
current = name current = name
all_names = [name] all_names = [name]
+7 -7
View File
@@ -16,7 +16,7 @@ from ebook_converter.ebooks.oeb.polish.utils import guess_type, extract
from ebook_converter.ebooks.oeb.polish.opf import set_guide_item, get_book_language from ebook_converter.ebooks.oeb.polish.opf import set_guide_item, get_book_language
from ebook_converter.ebooks.oeb.polish.pretty import pretty_html_tree from ebook_converter.ebooks.oeb.polish.pretty import pretty_html_tree
from ebook_converter.utils.localization import get_lang, canonicalize_lang, lang_as_iso639_1 from ebook_converter.utils.localization import get_lang, canonicalize_lang, lang_as_iso639_1
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -170,11 +170,11 @@ def parse_ncx(container, ncx_name):
toc_root.lang = toc_root.uid = None toc_root.lang = toc_root.uid = None
for attr, val in iteritems(root.attrib): for attr, val in iteritems(root.attrib):
if attr.endswith('lang'): if attr.endswith('lang'):
toc_root.lang = unicode_type(val) toc_root.lang = str(val)
break break
for uid in root.xpath('//*[calibre:lower-case(local-name()) = "meta" and @name="dtb:uid"]/@content'): for uid in root.xpath('//*[calibre:lower-case(local-name()) = "meta" and @name="dtb:uid"]/@content'):
if uid: if uid:
toc_root.uid = unicode_type(uid) toc_root.uid = str(uid)
break break
for pl in root.xpath('//*[calibre:lower-case(local-name()) = "pagelist"]'): for pl in root.xpath('//*[calibre:lower-case(local-name()) = "pagelist"]'):
for pt in pl.xpath('descendant::*[calibre:lower-case(local-name()) = "pagetarget"]'): for pt in pl.xpath('descendant::*[calibre:lower-case(local-name()) = "pagetarget"]'):
@@ -581,9 +581,9 @@ def create_ncx(toc, to_href, btitle, lang, uid):
nsmap={None: NCX_NS}) nsmap={None: NCX_NS})
head = etree.SubElement(ncx, NCX('head')) head = etree.SubElement(ncx, NCX('head'))
etree.SubElement(head, NCX('meta'), etree.SubElement(head, NCX('meta'),
name='dtb:uid', content=unicode_type(uid)) name='dtb:uid', content=str(uid))
etree.SubElement(head, NCX('meta'), etree.SubElement(head, NCX('meta'),
name='dtb:depth', content=unicode_type(toc.depth)) name='dtb:depth', content=str(toc.depth))
generator = ''.join(['calibre (', __version__, ')']) generator = ''.join(['calibre (', __version__, ')'])
etree.SubElement(head, NCX('meta'), etree.SubElement(head, NCX('meta'),
name='dtb:generator', content=generator) name='dtb:generator', content=generator)
@@ -601,7 +601,7 @@ def create_ncx(toc, to_href, btitle, lang, uid):
for child in toc_parent: for child in toc_parent:
play_order['c'] += 1 play_order['c'] += 1
point = etree.SubElement(xml_parent, NCX('navPoint'), id='num_%d' % play_order['c'], point = etree.SubElement(xml_parent, NCX('navPoint'), id='num_%d' % play_order['c'],
playOrder=unicode_type(play_order['c'])) playOrder=str(play_order['c']))
label = etree.SubElement(point, NCX('navLabel')) label = etree.SubElement(point, NCX('navLabel'))
title = child.title title = child.title
if title: if title:
@@ -770,7 +770,7 @@ def commit_nav_toc(container, toc, lang=None, landmarks=None, previous_nav=None)
for entry in toc.page_list: for entry in toc.page_list:
if container.has_name(entry['dest']) and container.mime_map[entry['dest']] in OEB_DOCS: if container.has_name(entry['dest']) and container.mime_map[entry['dest']] in OEB_DOCS:
a = create_li(ol, entry) a = create_li(ol, entry)
a.text = unicode_type(entry['pagenum']) a.text = str(entry['pagenum'])
pretty_xml_tree(nav) pretty_xml_tree(nav)
collapse_li(nav) collapse_li(nav)
container.replace(tocname, root) container.replace(tocname, root)
+3 -4
View File
@@ -23,7 +23,6 @@ from ebook_converter.utils.localization import get_lang
from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.constants import __appname__, __version__ from ebook_converter.constants import __appname__, __version__
from ebook_converter import guess_type, xml_replace_entities from ebook_converter import guess_type, xml_replace_entities
from ebook_converter.polyglot.builtins import unicode_type
from ebook_converter.polyglot.urllib import unquote from ebook_converter.polyglot.urllib import unquote
@@ -144,7 +143,7 @@ class OEBReader(object):
dict(a=__appname__, v=__version__) dict(a=__appname__, v=__version__)
meta_info_to_oeb_metadata(mi, self.oeb.metadata, self.logger) meta_info_to_oeb_metadata(mi, self.oeb.metadata, self.logger)
m = self.oeb.metadata m = self.oeb.metadata
m.add('identifier', unicode_type(uuid.uuid4()), id='uuid_id', scheme='uuid') m.add('identifier', str(uuid.uuid4()), id='uuid_id', scheme='uuid')
self.oeb.uid = self.oeb.metadata.identifier[-1] self.oeb.uid = self.oeb.metadata.identifier[-1]
if not m.title: if not m.title:
m.add('title', self.oeb.translate(__('Unknown'))) m.add('title', self.oeb.translate(__('Unknown')))
@@ -447,7 +446,7 @@ class OEBReader(object):
ncx = item.data ncx = item.data
title = ''.join(xpath(ncx, 'ncx:docTitle/ncx:text/text()')) title = ''.join(xpath(ncx, 'ncx:docTitle/ncx:text/text()'))
title = COLLAPSE_RE.sub(' ', title.strip()) title = COLLAPSE_RE.sub(' ', title.strip())
title = title or unicode_type(self.oeb.metadata.title[0]) title = title or str(self.oeb.metadata.title[0])
toc = self.oeb.toc toc = self.oeb.toc
toc.title = title toc.title = title
navmaps = xpath(ncx, 'ncx:navMap') navmaps = xpath(ncx, 'ncx:navMap')
@@ -634,7 +633,7 @@ class OEBReader(object):
def _locate_cover_image(self): def _locate_cover_image(self):
if self.oeb.metadata.cover: if self.oeb.metadata.cover:
id = unicode_type(self.oeb.metadata.cover[0]) id = str(self.oeb.metadata.cover[0])
item = self.oeb.manifest.ids.get(id, None) item = self.oeb.manifest.ids.get(id, None)
if item is not None and item.media_type in OEB_IMAGES: if item is not None and item.media_type in OEB_IMAGES:
return item return item
+8 -8
View File
@@ -15,7 +15,7 @@ from ebook_converter.ebooks import unit_convert
from ebook_converter.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES, xpath, urlnormalize from ebook_converter.ebooks.oeb.base import XHTML, XHTML_NS, CSS_MIME, OEB_STYLES, xpath, urlnormalize
from ebook_converter.ebooks.oeb.normalize_css import DEFAULTS, normalizers from ebook_converter.ebooks.oeb.normalize_css import DEFAULTS, normalizers
from ebook_converter.css_selectors import Select, SelectorError, INAPPROPRIATE_PSEUDO_CLASSES from ebook_converter.css_selectors import Select, SelectorError, INAPPROPRIATE_PSEUDO_CLASSES
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems
from ebook_converter.tinycss.media3 import CSSMedia3Parser from ebook_converter.tinycss.media3 import CSSMedia3Parser
@@ -317,7 +317,7 @@ class Stylizer(object):
for x in elem.iter('*'): for x in elem.iter('*'):
if x.text: if x.text:
punctuation_chars = [] punctuation_chars = []
text = unicode_type(x.text) text = str(x.text)
while text: while text:
category = unicodedata.category(text[0]) category = unicodedata.category(text[0])
if category[0] not in {'P', 'Z'}: if category[0] not in {'P', 'Z'}:
@@ -591,7 +591,7 @@ class Style(object):
x = self._style.get(attr) x = self._style.get(attr)
if x is not None: if x is not None:
if x == 'auto': if x == 'auto':
ans = self._unit_convert(unicode_type(img_size) + 'px', base=base) ans = self._unit_convert(str(img_size) + 'px', base=base)
else: else:
x = self._unit_convert(x, base=base) x = self._unit_convert(x, base=base)
if isinstance(x, numbers.Number): if isinstance(x, numbers.Number):
@@ -603,7 +603,7 @@ class Style(object):
if isinstance(x, numbers.Number): if isinstance(x, numbers.Number):
ans = x ans = x
if ans is None: if ans is None:
ans = self._unit_convert(unicode_type(img_size) + 'px', base=base) ans = self._unit_convert(str(img_size) + 'px', base=base)
maa = self._style.get('max-' + attr) maa = self._style.get('max-' + attr)
if maa is not None: if maa is not None:
x = self._unit_convert(maa, base=base) x = self._unit_convert(maa, base=base)
@@ -639,12 +639,12 @@ class Style(object):
result = base result = base
else: else:
result = self._unit_convert(width, base=base) result = self._unit_convert(width, base=base)
if isinstance(result, (unicode_type, bytes)): if isinstance(result, (str, bytes)):
result = self._profile.width result = self._profile.width
self._width = result self._width = result
if 'max-width' in self._style: if 'max-width' in self._style:
result = self._unit_convert(self._style['max-width'], base=base) result = self._unit_convert(self._style['max-width'], base=base)
if isinstance(result, (unicode_type, bytes)): if isinstance(result, (str, bytes)):
result = self._width result = self._width
if result < self._width: if result < self._width:
self._width = result self._width = result
@@ -676,12 +676,12 @@ class Style(object):
result = base result = base
else: else:
result = self._unit_convert(height, base=base) result = self._unit_convert(height, base=base)
if isinstance(result, (unicode_type, bytes)): if isinstance(result, (str, bytes)):
result = self._profile.height result = self._profile.height
self._height = result self._height = result
if 'max-height' in self._style: if 'max-height' in self._style:
result = self._unit_convert(self._style['max-height'], base=base) result = self._unit_convert(self._style['max-height'], base=base)
if isinstance(result, (unicode_type, bytes)): if isinstance(result, (str, bytes)):
result = self._height result = self._height
if result < self._height: if result < self._height:
self._height = result self._height = result
@@ -4,7 +4,6 @@ import urllib.parse
from ebook_converter import guess_type from ebook_converter import guess_type
from ebook_converter.utils.imghdr import identify from ebook_converter.utils.imghdr import identify
from ebook_converter.utils.xml_parse import safe_xml_fromstring from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.polyglot.builtins import unicode_type
from ebook_converter.polyglot.urllib import unquote from ebook_converter.polyglot.urllib import unquote
@@ -111,9 +110,9 @@ class CoverManager(object):
self.svg_template = self.svg_template.replace('__viewbox__', self.svg_template = self.svg_template.replace('__viewbox__',
'0 0 %d %d'%(width, height)) '0 0 %d %d'%(width, height))
self.svg_template = self.svg_template.replace('__width__', self.svg_template = self.svg_template.replace('__width__',
unicode_type(width)) str(width))
self.svg_template = self.svg_template.replace('__height__', self.svg_template = self.svg_template.replace('__height__',
unicode_type(height)) str(height))
if href is not None: if href is not None:
templ = self.non_svg_template if self.no_svg_cover \ templ = self.non_svg_template if self.no_svg_cover \
@@ -16,7 +16,7 @@ from ebook_converter.ebooks.oeb.base import (XHTML, XHTML_NS, CSS_MIME, OEB_STYL
from ebook_converter.ebooks.oeb.stylizer import Stylizer from ebook_converter.ebooks.oeb.stylizer import Stylizer
from ebook_converter.utils.filenames import ascii_filename, ascii_text from ebook_converter.utils.filenames import ascii_filename, ascii_text
from ebook_converter.utils.icu import numeric_sort_key from ebook_converter.utils.icu import numeric_sort_key
from ebook_converter.polyglot.builtins import iteritems, unicode_type, string_or_bytes from ebook_converter.polyglot.builtins import iteritems, string_or_bytes
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -250,7 +250,7 @@ class CSSFlattener(object):
cfont = { cfont = {
'font-family': '"%s"'%font['font-family'], 'font-family': '"%s"'%font['font-family'],
'panose-1': ' '.join(map(unicode_type, font['panose'])), 'panose-1': ' '.join(map(str, font['panose'])),
'src': 'url(%s)'%item.href, 'src': 'url(%s)'%item.href,
} }
@@ -476,7 +476,7 @@ class CSSFlattener(object):
minlh = self.context.minimum_line_height / 100. minlh = self.context.minimum_line_height / 100.
slh = style['line-height'] slh = style['line-height']
if not is_drop_cap and isinstance(slh, numbers.Number) and slh < minlh * fsize: if not is_drop_cap and isinstance(slh, numbers.Number) and slh < minlh * fsize:
cssdict['line-height'] = unicode_type(minlh) cssdict['line-height'] = str(minlh)
except Exception: except Exception:
self.oeb.logger.exception('Failed to set minimum line-height') self.oeb.logger.exception('Failed to set minimum line-height')
@@ -538,7 +538,7 @@ class CSSFlattener(object):
if css in styles: if css in styles:
match = styles[css] match = styles[css]
else: else:
match = klass + unicode_type(names[klass] or '') match = klass + str(names[klass] or '')
styles[css] = match styles[css] = match
names[klass] += 1 names[klass] += 1
node.attrib['class'] = match node.attrib['class'] = match
@@ -558,7 +558,7 @@ class CSSFlattener(object):
# then the class attribute for a.x tags will contain both # then the class attribute for a.x tags will contain both
# that class and the class for a.x:hover, which is wrong. # that class and the class for a.x:hover, which is wrong.
klass = 'pcalibre' klass = 'pcalibre'
match = klass + unicode_type(names[klass] or '') match = klass + str(names[klass] or '')
pstyles[css] = match pstyles[css] = match
names[klass] += 1 names[klass] += 1
keep_classes.add(match) keep_classes.add(match)
@@ -4,7 +4,6 @@ HTML-TOC-adding transform.
from ebook_converter.ebooks.oeb.base import XML, XHTML, XHTML_NS from ebook_converter.ebooks.oeb.base import XML, XHTML, XHTML_NS
from ebook_converter.ebooks.oeb.base import XHTML_MIME, CSS_MIME from ebook_converter.ebooks.oeb.base import XHTML_MIME, CSS_MIME
from ebook_converter.ebooks.oeb.base import element, XPath from ebook_converter.ebooks.oeb.base import element, XPath
from ebook_converter.polyglot.builtins import unicode_type
__all__ = ['HTMLTOCAdder'] __all__ = ['HTMLTOCAdder']
@@ -93,7 +92,7 @@ class HTMLTOCAdder(object):
style = 'nested' style = 'nested'
id, css_href = oeb.manifest.generate('tocstyle', 'tocstyle.css') id, css_href = oeb.manifest.generate('tocstyle', 'tocstyle.css')
oeb.manifest.add(id, css_href, CSS_MIME, data=STYLE_CSS[style]) oeb.manifest.add(id, css_href, CSS_MIME, data=STYLE_CSS[style])
language = unicode_type(oeb.metadata.language[0]) language = str(oeb.metadata.language[0])
contents = element(None, XHTML('html'), nsmap={None: XHTML_NS}, contents = element(None, XHTML('html'), nsmap={None: XHTML_NS},
attrib={XML('lang'): language}) attrib={XML('lang'): language})
head = element(contents, XHTML('head')) head = element(contents, XHTML('head'))
@@ -11,7 +11,6 @@ from ebook_converter.library.comments import comments_to_html, markdown
from ebook_converter.utils.date import is_date_undefined, as_local_time from ebook_converter.utils.date import is_date_undefined, as_local_time
from ebook_converter.ebooks.chardet import strip_encoding_declarations from ebook_converter.ebooks.chardet import strip_encoding_declarations
from ebook_converter.ebooks.metadata import fmt_sidx, rating_to_stars from ebook_converter.ebooks.metadata import fmt_sidx, rating_to_stars
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -99,22 +98,22 @@ class Jacket(Base):
self.log('Inserting metadata into book...') self.log('Inserting metadata into book...')
try: try:
tags = list(map(unicode_type, self.oeb.metadata.subject)) tags = list(map(str, self.oeb.metadata.subject))
except Exception: except Exception:
tags = [] tags = []
try: try:
comments = unicode_type(self.oeb.metadata.description[0]) comments = str(self.oeb.metadata.description[0])
except: except:
comments = '' comments = ''
try: try:
title = unicode_type(self.oeb.metadata.title[0]) title = str(self.oeb.metadata.title[0])
except: except:
title = _('Unknown') title = _('Unknown')
try: try:
authors = list(map(unicode_type, self.oeb.metadata.creator)) authors = list(map(str, self.oeb.metadata.creator))
except: except:
authors = [_('Unknown')] authors = [_('Unknown')]
@@ -171,7 +170,7 @@ def get_rating(rating, rchar, e_rchar):
return ans return ans
class Series(unicode_type): class Series(str):
def __new__(self, series, series_index): def __new__(self, series, series_index):
if series and series_index is not None: if series and series_index is not None:
@@ -181,7 +180,7 @@ class Series(unicode_type):
escape(series), escape(fmt_sidx(series_index, use_roman=False))) escape(series), escape(fmt_sidx(series_index, use_roman=False)))
else: else:
combined = roman = escape(series or u'') combined = roman = escape(series or u'')
s = unicode_type.__new__(self, combined) s = str.__new__(self, combined)
s.roman = roman s.roman = roman
s.name = escape(series or '') s.name = escape(series or '')
s.number = escape(fmt_sidx(series_index or 1.0, use_roman=False)) s.number = escape(fmt_sidx(series_index or 1.0, use_roman=False))
@@ -189,11 +188,11 @@ class Series(unicode_type):
return s return s
class Tags(unicode_type): class Tags(str):
def __new__(self, tags, output_profile): def __new__(self, tags, output_profile):
tags = [escape(x) for x in tags or ()] tags = [escape(x) for x in tags or ()]
t = unicode_type.__new__(self, ', '.join(tags)) t = str.__new__(self, ', '.join(tags))
t.alphabetical = ', '.join(sorted(tags)) t.alphabetical = ', '.join(sorted(tags))
t.tags_list = tags t.tags_list = tags
return t return t
@@ -15,7 +15,6 @@ from ebook_converter.ebooks.oeb.base import urlnormalize
from ebook_converter.ebooks.oeb.stylizer import Stylizer from ebook_converter.ebooks.oeb.stylizer import Stylizer
from ebook_converter.ptempfile import PersistentTemporaryFile from ebook_converter.ptempfile import PersistentTemporaryFile
from ebook_converter.utils.imghdr import what from ebook_converter.utils.imghdr import what
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -78,7 +77,7 @@ class SVGRasterizer(object):
logger.info('Found SVG image height in %, trying to convert...') logger.info('Found SVG image height in %, trying to convert...')
try: try:
h = float(image.get('height').replace('%', ''))/100. h = float(image.get('height').replace('%', ''))/100.
image.set('height', unicode_type(h*sizes[1])) image.set('height', str(h*sizes[1]))
except: except:
logger.exception('Failed to convert percentage height:', logger.exception('Failed to convert percentage height:',
image.get('height')) image.get('height'))
@@ -224,11 +223,11 @@ class SVGRasterizer(object):
covers = self.oeb.metadata.cover covers = self.oeb.metadata.cover
if not covers: if not covers:
return return
if unicode_type(covers[0]) not in self.oeb.manifest.ids: if str(covers[0]) not in self.oeb.manifest.ids:
self.oeb.logger.warn('Cover not in manifest, skipping.') self.oeb.logger.warn('Cover not in manifest, skipping.')
self.oeb.metadata.clear('cover') self.oeb.metadata.clear('cover')
return return
cover = self.oeb.manifest.ids[unicode_type(covers[0])] cover = self.oeb.manifest.ids[str(covers[0])]
if not cover.media_type == SVG_MIME: if not cover.media_type == SVG_MIME:
return return
width = (self.profile.width / 72) * self.profile.dpi width = (self.profile.width / 72) * self.profile.dpi
@@ -15,7 +15,7 @@ from ebook_converter.ebooks.epub import rules
from ebook_converter.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES, from ebook_converter.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES,
rewrite_links, XHTML, urlnormalize) rewrite_links, XHTML, urlnormalize)
from ebook_converter.ebooks.oeb.polish.split import do_split from ebook_converter.ebooks.oeb.polish.split import do_split
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems
from ebook_converter.polyglot.urllib import unquote from ebook_converter.polyglot.urllib import unquote
from ebook_converter.css_selectors import Select, SelectorError from ebook_converter.css_selectors import Select, SelectorError
@@ -122,7 +122,7 @@ class Split(object):
for i, elem in enumerate(item.data.iter('*')): for i, elem in enumerate(item.data.iter('*')):
try: try:
elem.set('pb_order', unicode_type(i)) elem.set('pb_order', str(i))
except TypeError: # Cant set attributes on comment nodes etc. except TypeError: # Cant set attributes on comment nodes etc.
continue continue
@@ -7,7 +7,7 @@ from collections import OrderedDict, Counter
from ebook_converter.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text, barename from ebook_converter.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text, barename
from ebook_converter.ebooks import ConversionError from ebook_converter.ebooks import ConversionError
from ebook_converter.polyglot.builtins import itervalues, unicode_type from ebook_converter.polyglot.builtins import itervalues
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -122,7 +122,7 @@ class DetectStructure(object):
elem = matches[0] elem = matches[0]
eid = elem.get('id', None) eid = elem.get('id', None)
if not eid: if not eid:
eid = 'start_reading_at_'+unicode_type(uuid.uuid4()).replace('-', '') eid = 'start_reading_at_'+str(uuid.uuid4()).replace('-', '')
elem.set('id', eid) elem.set('id', eid)
if 'text' in self.oeb.guide: if 'text' in self.oeb.guide:
self.oeb.guide.remove('text') self.oeb.guide.remove('text')
@@ -2,7 +2,7 @@ from collections import defaultdict
from ebook_converter.ebooks.oeb.base import urlnormalize, css_text from ebook_converter.ebooks.oeb.base import urlnormalize, css_text
from ebook_converter.utils.fonts.sfnt.subset import subset, NoGlyphs, UnsupportedFont from ebook_converter.utils.fonts.sfnt.subset import subset, NoGlyphs, UnsupportedFont
from ebook_converter.polyglot.builtins import iteritems, itervalues, unicode_type from ebook_converter.polyglot.builtins import iteritems, itervalues
from ebook_converter.tinycss.fonts3 import parse_font_family from ebook_converter.tinycss.fonts3 import parse_font_family
@@ -32,7 +32,7 @@ def get_font_properties(rule, default=None):
except (IndexError, KeyError, AttributeError, TypeError, ValueError): except (IndexError, KeyError, AttributeError, TypeError, ValueError):
val = None if q in {'src', 'font-family'} else default val = None if q in {'src', 'font-family'} else default
if q in {'font-weight', 'font-stretch', 'font-style'}: if q in {'font-weight', 'font-stretch', 'font-style'}:
val = unicode_type(val).lower() if (val or val == 0) else val val = str(val).lower() if (val or val == 0) else val
if val == 'inherit': if val == 'inherit':
val = default val = default
if q == 'font-weight': if q == 'font-weight':
@@ -233,7 +233,7 @@ class SubsetFonts(object):
no match is found (can happen if no family matches). no match is found (can happen if no family matches).
''' '''
ff = style.get('font-family', []) ff = style.get('font-family', [])
lnames = {unicode_type(x).lower() for x in ff} lnames = {str(x).lower() for x in ff}
matching_set = [] matching_set = []
# Filter on font-family # Filter on font-family
@@ -11,7 +11,6 @@ from ebook_converter.ebooks import DRMError
from ebook_converter.ebooks.metadata.opf2 import OPFCreator from ebook_converter.ebooks.metadata.opf2 import OPFCreator
from ebook_converter.ebooks.pdb.ereader import EreaderError from ebook_converter.ebooks.pdb.ereader import EreaderError
from ebook_converter.ebooks.pdb.formatreader import FormatReader from ebook_converter.ebooks.pdb.formatreader import FormatReader
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -112,7 +111,7 @@ class Reader132(FormatReader):
os.makedirs(output_dir) os.makedirs(output_dir)
title = self.mi.title title = self.mi.title
if not isinstance(title, unicode_type): if not isinstance(title, str):
title = title.decode('utf-8', 'replace') title = title.decode('utf-8', 'replace')
html = '<html><head><title>%s</title></head><body>' % title html = '<html><head><title>%s</title></head><body>' % title
@@ -9,7 +9,6 @@ from ebook_converter import CurrentDir
from ebook_converter.ebooks.metadata.opf2 import OPFCreator from ebook_converter.ebooks.metadata.opf2 import OPFCreator
from ebook_converter.ebooks.pdb.formatreader import FormatReader from ebook_converter.ebooks.pdb.formatreader import FormatReader
from ebook_converter.ebooks.pdb.ereader import EreaderError from ebook_converter.ebooks.pdb.ereader import EreaderError
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -97,7 +96,7 @@ class Reader202(FormatReader):
pml += self.get_text_page(i) pml += self.get_text_page(i)
title = self.mi.title title = self.mi.title
if not isinstance(title, unicode_type): if not isinstance(title, str):
title = title.decode('utf-8', 'replace') title = title.decode('utf-8', 'replace')
html = '<html><head><title>%s</title></head><body>%s</body></html>' % \ html = '<html><head><title>%s</title></head><body>%s</body></html>' % \
+6 -6
View File
@@ -4,7 +4,7 @@ from datetime import datetime
from ebook_converter.constants import ispy3 from ebook_converter.constants import ispy3
from ebook_converter.utils.logging import default_log from ebook_converter.utils.logging import default_log
from ebook_converter.polyglot.builtins import iteritems, unicode_type, codepoint_to_chr from ebook_converter.polyglot.builtins import iteritems, codepoint_to_chr
from ebook_converter.polyglot.binary import as_hex_bytes from ebook_converter.polyglot.binary import as_hex_bytes
@@ -56,7 +56,7 @@ PAPER_SIZES = {k:globals()[k.upper()] for k in ('a0 a1 a2 a3 a4 a5 a6 b0 b1 b2'
def fmtnum(o): def fmtnum(o):
if isinstance(o, float): if isinstance(o, float):
return pdf_float(o) return pdf_float(o)
return unicode_type(o) return str(o)
def serialize(o, stream): def serialize(o, stream):
@@ -66,7 +66,7 @@ def serialize(o, stream):
# Must check bool before int as bools are subclasses of int # Must check bool before int as bools are subclasses of int
stream.write_raw(b'true' if o else b'false') stream.write_raw(b'true' if o else b'false')
elif isinstance(o, numbers.Integral): elif isinstance(o, numbers.Integral):
stream.write_raw(unicode_type(o).encode('ascii') if ispy3 else bytes(o)) stream.write_raw(str(o).encode('ascii') if ispy3 else bytes(o))
elif hasattr(o, 'pdf_serialize'): elif hasattr(o, 'pdf_serialize'):
o.pdf_serialize(stream) o.pdf_serialize(stream)
elif o is None: elif o is None:
@@ -80,7 +80,7 @@ def serialize(o, stream):
raise ValueError('Unknown object: %r'%o) raise ValueError('Unknown object: %r'%o)
class Name(unicode_type): class Name(str):
def pdf_serialize(self, stream): def pdf_serialize(self, stream):
raw = self.encode('ascii') raw = self.encode('ascii')
@@ -117,7 +117,7 @@ def escape_pdf_string(bytestring):
return bytes(ba) return bytes(ba)
class String(unicode_type): class String(str):
def pdf_serialize(self, stream): def pdf_serialize(self, stream):
try: try:
@@ -129,7 +129,7 @@ class String(unicode_type):
stream.write(b'('+escape_pdf_string(raw)+b')') stream.write(b'('+escape_pdf_string(raw)+b')')
class UTF16String(unicode_type): class UTF16String(str):
def pdf_serialize(self, stream): def pdf_serialize(self, stream):
raw = codecs.BOM_UTF16_BE + self.encode('utf-16-be') raw = codecs.BOM_UTF16_BE + self.encode('utf-16-be')
+1 -2
View File
@@ -25,7 +25,6 @@ from ebook_converter.ebooks.rtf2xml import headings_to_sections, \
body_styles, preamble_rest, group_styles, \ body_styles, preamble_rest, group_styles, \
inline inline
from ebook_converter.ebooks.rtf2xml.old_rtf import OldRtf from ebook_converter.ebooks.rtf2xml.old_rtf import OldRtf
from ebook_converter.polyglot.builtins import unicode_type
from . import open_for_read, open_for_write from . import open_for_read, open_for_write
@@ -249,7 +248,7 @@ class ParseRtf:
enc = encode_obj.get_codepage() enc = encode_obj.get_codepage()
# TODO: to check if cp is a good idea or if I should use a dict to convert # TODO: to check if cp is a good idea or if I should use a dict to convert
enc = 'cp' + enc enc = 'cp' + enc
msg = '%s\nException in token processing' % unicode_type(msg) msg = '%s\nException in token processing' % str(msg)
if check_encoding_obj.check_encoding(self.__file, enc): if check_encoding_obj.check_encoding(self.__file, enc):
file_name = self.__file if isinstance(self.__file, bytes) \ file_name = self.__file if isinstance(self.__file, bytes) \
else self.__file.encode('utf-8') else self.__file.encode('utf-8')
@@ -1,7 +1,5 @@
import sys import sys
from ebook_converter.polyglot.builtins import unicode_type
class CheckEncoding: class CheckEncoding:
@@ -15,7 +13,7 @@ class CheckEncoding:
try: try:
char.decode(encoding) char.decode(encoding)
except ValueError as msg: except ValueError as msg:
sys.stderr.write('line: %s char: %s\n%s\n' % (line_num, char_position, unicode_type(msg))) sys.stderr.write('line: %s char: %s\n%s\n' % (line_num, char_position, str(msg)))
def check_encoding(self, path, encoding='us-ascii', verbose=True): def check_encoding(self, path, encoding='us-ascii', verbose=True):
line_num = 0 line_num = 0
+2 -3
View File
@@ -14,7 +14,6 @@ import os
from ebook_converter.ebooks.rtf2xml import copy from ebook_converter.ebooks.rtf2xml import copy
from ebook_converter.ptempfile import better_mktemp from ebook_converter.ptempfile import better_mktemp
from ebook_converter.polyglot.builtins import unicode_type
from . import open_for_read, open_for_write from . import open_for_read, open_for_write
@@ -57,7 +56,7 @@ class Footnote:
if self.__first_line: if self.__first_line:
self.__first_line_func(line) self.__first_line_func(line)
if self.__token_info == 'cw<ci<footnot-mk': if self.__token_info == 'cw<ci<footnot-mk':
num = unicode_type(self.__footnote_count) num = str(self.__footnote_count)
self.__write_to_foot_obj.write(line) self.__write_to_foot_obj.write(line)
self.__write_to_foot_obj.write( self.__write_to_foot_obj.write(
'tx<nu<__________<%s\n' % num 'tx<nu<__________<%s\n' % num
@@ -94,7 +93,7 @@ class Footnote:
self.__found_footnote(line) self.__found_footnote(line)
self.__write_obj.write(line) self.__write_obj.write(line)
if self.__token_info == 'cw<ci<footnot-mk': if self.__token_info == 'cw<ci<footnot-mk':
num = unicode_type(self.__footnote_count + 1) num = str(self.__footnote_count + 1)
self.__write_obj.write( self.__write_obj.write(
'tx<nu<__________<%s\n' % num 'tx<nu<__________<%s\n' % num
) )
+1 -2
View File
@@ -15,7 +15,6 @@ import sys, os, io
from ebook_converter.ebooks.rtf2xml import get_char_map, copy from ebook_converter.ebooks.rtf2xml import get_char_map, copy
from ebook_converter.ebooks.rtf2xml.char_set import char_set from ebook_converter.ebooks.rtf2xml.char_set import char_set
from ebook_converter.ptempfile import better_mktemp from ebook_converter.ptempfile import better_mktemp
from ebook_converter.polyglot.builtins import unicode_type
from . import open_for_read, open_for_write from . import open_for_read, open_for_write
@@ -482,7 +481,7 @@ class Hex2Utf8:
the_string = '' the_string = ''
for letter in text: for letter in text:
hex_num = hex(ord(letter)) hex_num = hex(ord(letter))
hex_num = unicode_type(hex_num) hex_num = str(hex_num)
hex_num = hex_num.upper() hex_num = hex_num.upper()
hex_num = hex_num[2:] hex_num = hex_num[2:]
hex_num = '\'%s' % hex_num hex_num = '\'%s' % hex_num
+5 -7
View File
@@ -11,8 +11,6 @@
# # # #
######################################################################### #########################################################################
from ebook_converter.polyglot.builtins import unicode_type
class ListTable: class ListTable:
""" """
@@ -234,7 +232,7 @@ class ListTable:
""" """
num = line[18:] num = line[18:]
num = int(num, 16) num = int(num, 16)
level = unicode_type(round((num - 1)/2, 0)) level = str(round((num - 1)/2, 0))
level = level[:-2] level = level[:-2]
level = 'level%s-show-level' % level level = 'level%s-show-level' % level
self.__all_lists[-1][-1][0][level] = 'true' self.__all_lists[-1][-1][0][level] = 'true'
@@ -291,11 +289,11 @@ class ListTable:
num = line[18:] num = line[18:]
the_num = int(num, 16) the_num = int(num, 16)
if not self.__found_level_text_length: if not self.__found_level_text_length:
self.__all_lists[-1][-1][0]['list-text-length'] = unicode_type(the_num) self.__all_lists[-1][-1][0]['list-text-length'] = str(the_num)
self.__found_level_text_length = 1 self.__found_level_text_length = 1
else: else:
the_num += 1 the_num += 1
the_string = unicode_type(the_num) the_string = str(the_num)
level_marker = 'level%s-suffix' % the_string level_marker = 'level%s-suffix' % the_string
show_marker = 'show-level%s' % the_string show_marker = 'show-level%s' % the_string
self.__level_text_position = level_marker self.__level_text_position = level_marker
@@ -383,7 +381,7 @@ class ListTable:
for list in self.__all_lists: for list in self.__all_lists:
id += 1 id += 1
self.__list_table_final += 'mi<tg<open-att__<list-in-table' self.__list_table_final += 'mi<tg<open-att__<list-in-table'
# self.__list_table_final += '<list-id>%s' % (unicode_type(id)) # self.__list_table_final += '<list-id>%s' % (str(id))
the_dict = list[0] the_dict = list[0]
the_keys = the_dict.keys() the_keys = the_dict.keys()
for the_key in the_keys: for the_key in the_keys:
@@ -398,7 +396,7 @@ class ListTable:
for level in levels: for level in levels:
level_num += 1 level_num += 1
self.__list_table_final += 'mi<tg<empty-att_<level-in-table' self.__list_table_final += 'mi<tg<empty-att_<level-in-table'
self.__list_table_final += '<level>%s' % (unicode_type(level_num)) self.__list_table_final += '<level>%s' % (str(level_num))
the_dict2 = level[0] the_dict2 = level[0]
the_keys2 = the_dict2.keys() the_keys2 = the_dict2.keys()
is_bullet = 0 is_bullet = 0
+1 -2
View File
@@ -14,7 +14,6 @@ import sys, os, re
from ebook_converter.ebooks.rtf2xml import copy from ebook_converter.ebooks.rtf2xml import copy
from ebook_converter.ptempfile import better_mktemp from ebook_converter.ptempfile import better_mktemp
from ebook_converter.polyglot.builtins import unicode_type
from . import open_for_read, open_for_write from . import open_for_read, open_for_write
@@ -289,7 +288,7 @@ class MakeLists:
'mi<mk<list_start\n' 'mi<mk<list_start\n'
) )
# bogus levels are sometimes written for empty paragraphs # bogus levels are sometimes written for empty paragraphs
if unicode_type(self.__level) not in self.__allow_levels: if str(self.__level) not in self.__allow_levels:
lev_num = '0' lev_num = '0'
else: else:
lev_num = self.__level lev_num = self.__level
+1 -3
View File
@@ -12,8 +12,6 @@
######################################################################### #########################################################################
import sys import sys
from ebook_converter.polyglot.builtins import unicode_type
from . import open_for_read from . import open_for_read
@@ -138,7 +136,7 @@ class OldRtf:
if self.__run_level > 3: if self.__run_level > 3:
sys.stderr.write( sys.stderr.write(
'Old rtf construction %s (bracket %s, line %s)\n' % ( 'Old rtf construction %s (bracket %s, line %s)\n' % (
self.__inline_info, unicode_type(self.__ob_group), line_num) self.__inline_info, str(self.__ob_group), line_num)
) )
return True return True
self.__previous_token = line[6:16] self.__previous_token = line[6:16]
@@ -14,7 +14,6 @@ import sys, os
from ebook_converter.ebooks.rtf2xml import copy, border_parse from ebook_converter.ebooks.rtf2xml import copy, border_parse
from ebook_converter.ptempfile import better_mktemp from ebook_converter.ptempfile import better_mktemp
from ebook_converter.polyglot.builtins import unicode_type
from . import open_for_read, open_for_write from . import open_for_read, open_for_write
@@ -623,7 +622,7 @@ if another paragraph_def is found, the state changes to collect_tokens.
num = len(self.__style_num_strings) num = len(self.__style_num_strings)
new_style = 1 new_style = 1
num = '%04d' % num num = '%04d' % num
self.__att_val_dict['style-num'] = 's' + unicode_type(num) self.__att_val_dict['style-num'] = 's' + str(num)
if new_style: if new_style:
self.__write_body_styles() self.__write_body_styles()
+1 -2
View File
@@ -14,7 +14,6 @@ import sys, os
from ebook_converter.ebooks.rtf2xml import copy from ebook_converter.ebooks.rtf2xml import copy
from ebook_converter.ptempfile import better_mktemp from ebook_converter.ptempfile import better_mktemp
from ebook_converter.polyglot.builtins import unicode_type
from . import open_for_read, open_for_write from . import open_for_read, open_for_write
@@ -77,7 +76,7 @@ class Pict:
try: try:
os.mkdir(self.__dir_name) os.mkdir(self.__dir_name)
except OSError as msg: except OSError as msg:
msg = "%sCouldn't make directory '%s':\n" % (unicode_type(msg), self.__dir_name) msg = "%sCouldn't make directory '%s':\n" % (str(msg), self.__dir_name)
raise self.__bug_handler raise self.__bug_handler
else: else:
if self.__run_level > 1: if self.__run_level > 1:
@@ -14,7 +14,6 @@ import os, re
from ebook_converter.ebooks.rtf2xml import copy, check_brackets from ebook_converter.ebooks.rtf2xml import copy, check_brackets
from ebook_converter.ptempfile import better_mktemp from ebook_converter.ptempfile import better_mktemp
from ebook_converter.polyglot.builtins import unicode_type
from . import open_for_read, open_for_write from . import open_for_read, open_for_write
@@ -694,7 +693,7 @@ class ProcessTokens:
if num[-1] == ';': if num[-1] == ';':
num = num[:-1] num = num[:-1]
third_field = 'en' third_field = 'en'
num = unicode_type('%X' % int(num)) num = str('%X' % int(num))
if len(num) != 2: if len(num) != 2:
num = "0" + num num = "0" + num
return 'cw<%s<%s<%s<%s\n' % (pre, token, third_field, num) return 'cw<%s<%s<%s<%s\n' % (pre, token, third_field, num)
@@ -731,7 +730,7 @@ class ProcessTokens:
return 0 return 0
num = '%0.2f' % round(numerator/denominator, 2) num = '%0.2f' % round(numerator/denominator, 2)
return num return num
string_num = unicode_type(num) string_num = str(num)
if string_num[-2:] == ".0": if string_num[-2:] == ".0":
string_num = string_num[:-2] string_num = string_num[:-2]
return string_num return string_num
+6 -7
View File
@@ -14,7 +14,6 @@ import sys, os
from ebook_converter.ebooks.rtf2xml import copy from ebook_converter.ebooks.rtf2xml import copy
from ebook_converter.ptempfile import better_mktemp from ebook_converter.ptempfile import better_mktemp
from ebook_converter.polyglot.builtins import unicode_type
from . import open_for_read, open_for_write from . import open_for_read, open_for_write
@@ -275,8 +274,8 @@ class Sections:
my_string += 'mi<tg<close_____<section\n' my_string += 'mi<tg<close_____<section\n'
else: else:
self.__found_first_sec = 1 self.__found_first_sec = 1
my_string += 'mi<tg<open-att__<section<num>%s' % unicode_type(self.__section_num) my_string += 'mi<tg<open-att__<section<num>%s' % str(self.__section_num)
my_string += '<num-in-level>%s' % unicode_type(self.__section_num) my_string += '<num-in-level>%s' % str(self.__section_num)
my_string += '<type>rtf-native' my_string += '<type>rtf-native'
my_string += '<level>0' my_string += '<level>0'
keys = self.__section_values.keys() keys = self.__section_values.keys()
@@ -358,7 +357,7 @@ class Sections:
'<num-in-level>%s' '<num-in-level>%s'
'<type>rtf-native' '<type>rtf-native'
'<level>0\n' '<level>0\n'
% (unicode_type(self.__section_num), unicode_type(self.__section_num)) % (str(self.__section_num), str(self.__section_num))
) )
self.__found_first_sec = 1 self.__found_first_sec = 1
elif self.__token_info == 'tx<nu<__________': elif self.__token_info == 'tx<nu<__________':
@@ -369,7 +368,7 @@ class Sections:
'<num-in-level>%s' '<num-in-level>%s'
'<type>rtf-native' '<type>rtf-native'
'<level>0\n' '<level>0\n'
% (unicode_type(self.__section_num), unicode_type(self.__section_num)) % (str(self.__section_num), str(self.__section_num))
) )
self.__write_obj.write( self.__write_obj.write(
'cw<pf<par-def___<true\n' 'cw<pf<par-def___<true\n'
@@ -462,7 +461,7 @@ class Sections:
self.__field_num = self.__field_num[1:] self.__field_num = self.__field_num[1:]
self.__write_obj.write( self.__write_obj.write(
'mi<tg<close_____<section\n' 'mi<tg<close_____<section\n'
'mi<tg<open-att__<section<num>%s' % unicode_type(num) 'mi<tg<open-att__<section<num>%s' % str(num)
) )
if self.__list_of_sec_values: if self.__list_of_sec_values:
keys = self.__list_of_sec_values[0].keys() keys = self.__list_of_sec_values[0].keys()
@@ -472,7 +471,7 @@ class Sections:
self.__list_of_sec_values = self.__list_of_sec_values[1:] self.__list_of_sec_values = self.__list_of_sec_values[1:]
self.__write_obj.write('<level>0') self.__write_obj.write('<level>0')
self.__write_obj.write('<type>rtf-native') self.__write_obj.write('<type>rtf-native')
self.__write_obj.write('<num-in-level>%s' % unicode_type(self.__section_num)) self.__write_obj.write('<num-in-level>%s' % str(self.__section_num))
self.__write_obj.write('\n') self.__write_obj.write('\n')
# Look here # Look here
+3 -4
View File
@@ -14,7 +14,6 @@ import sys, os
from ebook_converter.ebooks.rtf2xml import copy, border_parse from ebook_converter.ebooks.rtf2xml import copy, border_parse
from ebook_converter.ptempfile import better_mktemp from ebook_converter.ptempfile import better_mktemp
from ebook_converter.polyglot.builtins import unicode_type
from . import open_for_read, open_for_write from . import open_for_read, open_for_write
@@ -399,13 +398,13 @@ class Table:
left_position = float(left_position) left_position = float(left_position)
width = new_cell_position - self.__last_cell_position - left_position width = new_cell_position - self.__last_cell_position - left_position
# width = round(width, 2) # width = round(width, 2)
width = unicode_type('%.2f' % width) width = str('%.2f' % width)
self.__last_cell_position = new_cell_position self.__last_cell_position = new_cell_position
widths_exists = self.__row_dict.get('widths') widths_exists = self.__row_dict.get('widths')
if widths_exists: if widths_exists:
self.__row_dict['widths'] += ', %s' % unicode_type(width) self.__row_dict['widths'] += ', %s' % str(width)
else: else:
self.__row_dict['widths'] = unicode_type(width) self.__row_dict['widths'] = str(width)
self.__cell_list[-1]['width'] = width self.__cell_list[-1]['width'] = width
self.__cell_list.append({}) self.__cell_list.append({})
self.__cell_widths.append(width) self.__cell_widths.append(width)
+2 -3
View File
@@ -9,7 +9,6 @@ import urllib.parse
import uuid import uuid
from ebook_converter.utils.smartypants import smartyPants from ebook_converter.utils.smartypants import smartyPants
from ebook_converter.polyglot.builtins import unicode_type
# Last upstream version basis # Last upstream version basis
@@ -683,7 +682,7 @@ class Textile(object):
def footnoteID(self, match): def footnoteID(self, match):
id, t = match.groups() id, t = match.groups()
if id not in self.fn: if id not in self.fn:
self.fn[id] = unicode_type(uuid.uuid4()) self.fn[id] = str(uuid.uuid4())
fnid = self.fn[id] fnid = self.fn[id]
if not t: if not t:
t = '' t = ''
@@ -788,7 +787,7 @@ class Textile(object):
return url return url
def shelve(self, text): def shelve(self, text):
id = unicode_type(uuid.uuid4()) + 'c' id = str(uuid.uuid4()) + 'c'
self.shelf[id] = text self.shelf[id] = text
return id return id
+2 -2
View File
@@ -8,7 +8,7 @@ from functools import partial
from ebook_converter.ebooks.htmlz.oeb2html import OEB2HTML from ebook_converter.ebooks.htmlz.oeb2html import OEB2HTML
from ebook_converter.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace, rewrite_links from ebook_converter.ebooks.oeb.base import XHTML, XHTML_NS, barename, namespace, rewrite_links
from ebook_converter.ebooks.oeb.stylizer import Stylizer from ebook_converter.ebooks.oeb.stylizer import Stylizer
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes from ebook_converter.polyglot.builtins import string_or_bytes
__license__ = 'GPL 3' __license__ = 'GPL 3'
@@ -225,7 +225,7 @@ class MarkdownMLizer(OEB2HTML):
text.append('+ ') text.append('+ ')
elif li['name'] == 'ol': elif li['name'] == 'ol':
li['num'] += 1 li['num'] += 1
text.append(unicode_type(li['num']) + '. ') text.append(str(li['num']) + '. ')
# Process tags that contain text. # Process tags that contain text.
if hasattr(elem, 'text') and elem.text: if hasattr(elem, 'text') and elem.text:
+2 -2
View File
@@ -8,7 +8,7 @@ from ebook_converter.ebooks.metadata.opf2 import OPFCreator
from ebook_converter.ebooks.conversion.preprocess import DocAnalysis from ebook_converter.ebooks.conversion.preprocess import DocAnalysis
from ebook_converter.utils.cleantext import clean_ascii_chars from ebook_converter.utils.cleantext import clean_ascii_chars
from ebook_converter.polyglot.builtins import iteritems, unicode_type, long_type from ebook_converter.polyglot.builtins import iteritems, long_type
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -58,7 +58,7 @@ def split_txt(txt, epub_split_size_kb=0):
''' '''
# Takes care if there is no point to split # Takes care if there is no point to split
if epub_split_size_kb > 0: if epub_split_size_kb > 0:
if isinstance(txt, unicode_type): if isinstance(txt, str):
txt = txt.encode('utf-8') txt = txt.encode('utf-8')
length_byte = len(txt) length_byte = len(txt)
# Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin) # Calculating the average chunk value for easy splitting as EPUB (+2 as a safe margin)

Some files were not shown because too many files have changed in this diff Show More