From 128705f258723c1d96002f25fd2f79f9f3fe46f9 Mon Sep 17 00:00:00 2001 From: gryf Date: Mon, 20 Apr 2020 19:25:28 +0200 Subject: [PATCH] Removed polyglots unicode_type usage --- ebook_converter/__init__.py | 18 +-- ebook_converter/constants.py | 17 ++- ebook_converter/css_selectors/parser.py | 6 +- ebook_converter/customize/__init__.py | 3 +- ebook_converter/customize/conversion.py | 5 +- ebook_converter/customize/ui.py | 4 +- ebook_converter/customize/zipplugin.py | 6 +- ebook_converter/ebooks/__init__.py | 5 +- ebook_converter/ebooks/chardet.py | 5 +- .../ebooks/conversion/plugins/chm_input.py | 6 +- .../ebooks/conversion/plugins/epub_output.py | 12 +- .../ebooks/conversion/plugins/html_input.py | 8 +- .../ebooks/conversion/plugins/html_output.py | 3 +- .../ebooks/conversion/plugins/htmlz_output.py | 5 +- .../ebooks/conversion/plugins/lrf_output.py | 11 +- .../ebooks/conversion/plugins/mobi_input.py | 3 +- .../ebooks/conversion/plugins/mobi_output.py | 5 +- .../ebooks/conversion/plugins/pdf_output.py | 6 +- .../ebooks/conversion/plugins/pml_output.py | 3 +- .../ebooks/conversion/plugins/recipe_input.py | 3 +- .../ebooks/conversion/plugins/snb_input.py | 3 +- .../ebooks/conversion/plugins/snb_output.py | 11 +- ebook_converter/ebooks/conversion/plumber.py | 8 +- .../ebooks/conversion/preprocess.py | 25 ++-- ebook_converter/ebooks/conversion/utils.py | 127 +++++++++--------- ebook_converter/ebooks/docx/footnotes.py | 6 +- ebook_converter/ebooks/docx/numbering.py | 4 +- ebook_converter/ebooks/docx/tables.py | 6 +- ebook_converter/ebooks/docx/to_html.py | 6 +- .../ebooks/docx/writer/container.py | 6 +- .../ebooks/docx/writer/from_html.py | 18 +-- ebook_converter/ebooks/docx/writer/images.py | 14 +- ebook_converter/ebooks/docx/writer/links.py | 5 +- ebook_converter/ebooks/docx/writer/lists.py | 14 +- ebook_converter/ebooks/docx/writer/styles.py | 58 ++++---- ebook_converter/ebooks/docx/writer/tables.py | 12 +- ebook_converter/ebooks/fb2/fb2ml.py | 16 +-- ebook_converter/ebooks/html/input.py | 5 +- ebook_converter/ebooks/html/to_zip.py | 3 +- ebook_converter/ebooks/htmlz/oeb2html.py | 4 +- .../ebooks/lrf/html/convert_from.py | 40 +++--- ebook_converter/ebooks/lrf/pylrs/elements.py | 8 +- ebook_converter/ebooks/lrf/pylrs/pylrf.py | 6 +- ebook_converter/ebooks/lrf/pylrs/pylrs.py | 78 +++++------ ebook_converter/ebooks/metadata/__init__.py | 16 +-- ebook_converter/ebooks/metadata/archive.py | 3 +- ebook_converter/ebooks/metadata/book/base.py | 52 +++---- ebook_converter/ebooks/metadata/fb2.py | 7 +- ebook_converter/ebooks/metadata/meta.py | 4 +- ebook_converter/ebooks/metadata/opf2.py | 50 +++---- ebook_converter/ebooks/metadata/pdf.py | 6 +- ebook_converter/ebooks/metadata/rtf.py | 4 +- ebook_converter/ebooks/metadata/toc.py | 12 +- ebook_converter/ebooks/mobi/mobiml.py | 20 +-- ebook_converter/ebooks/mobi/reader/headers.py | 3 +- ebook_converter/ebooks/mobi/reader/markup.py | 3 +- ebook_converter/ebooks/mobi/reader/mobi6.py | 4 +- ebook_converter/ebooks/mobi/reader/mobi8.py | 4 +- ebook_converter/ebooks/mobi/utils.py | 12 +- ebook_converter/ebooks/mobi/writer2/main.py | 10 +- .../ebooks/mobi/writer2/resources.py | 8 +- .../ebooks/mobi/writer2/serializer.py | 10 +- ebook_converter/ebooks/mobi/writer8/exth.py | 22 +-- ebook_converter/ebooks/odt/input.py | 4 +- ebook_converter/ebooks/oeb/base.py | 56 ++++---- ebook_converter/ebooks/oeb/normalize_css.py | 4 +- ebook_converter/ebooks/oeb/parse_utils.py | 6 +- .../ebooks/oeb/polish/container.py | 6 +- ebook_converter/ebooks/oeb/polish/css.py | 6 +- ebook_converter/ebooks/oeb/polish/parsing.py | 3 +- ebook_converter/ebooks/oeb/polish/split.py | 6 +- ebook_converter/ebooks/oeb/polish/toc.py | 14 +- ebook_converter/ebooks/oeb/reader.py | 7 +- ebook_converter/ebooks/oeb/stylizer.py | 16 +-- .../ebooks/oeb/transforms/cover.py | 5 +- .../ebooks/oeb/transforms/flatcss.py | 10 +- .../ebooks/oeb/transforms/htmltoc.py | 3 +- .../ebooks/oeb/transforms/jacket.py | 17 ++- .../ebooks/oeb/transforms/rasterize.py | 7 +- .../ebooks/oeb/transforms/split.py | 4 +- .../ebooks/oeb/transforms/structure.py | 4 +- .../ebooks/oeb/transforms/subset.py | 6 +- .../ebooks/pdb/ereader/reader132.py | 3 +- .../ebooks/pdb/ereader/reader202.py | 3 +- ebook_converter/ebooks/pdf/render/common.py | 12 +- ebook_converter/ebooks/rtf2xml/ParseRtf.py | 3 +- .../ebooks/rtf2xml/check_encoding.py | 4 +- ebook_converter/ebooks/rtf2xml/footnote.py | 5 +- ebook_converter/ebooks/rtf2xml/hex_2_utf8.py | 3 +- ebook_converter/ebooks/rtf2xml/list_table.py | 12 +- ebook_converter/ebooks/rtf2xml/make_lists.py | 3 +- ebook_converter/ebooks/rtf2xml/old_rtf.py | 4 +- .../ebooks/rtf2xml/paragraph_def.py | 3 +- ebook_converter/ebooks/rtf2xml/pict.py | 3 +- .../ebooks/rtf2xml/process_tokens.py | 5 +- ebook_converter/ebooks/rtf2xml/sections.py | 13 +- ebook_converter/ebooks/rtf2xml/table.py | 7 +- ebook_converter/ebooks/textile/functions.py | 5 +- ebook_converter/ebooks/txt/markdownml.py | 4 +- ebook_converter/ebooks/txt/processor.py | 4 +- .../ebooks/unihandecode/unidecoder.py | 9 +- ebook_converter/library/catalogs/bibtex.py | 6 +- ebook_converter/library/catalogs/csv_xml.py | 15 +-- ebook_converter/library/catalogs/epub_mobi.py | 7 +- ebook_converter/library/comments.py | 3 +- ebook_converter/polyglot/binary.py | 18 ++- ebook_converter/polyglot/builtins.py | 13 +- ebook_converter/spell/__init__.py | 7 +- ebook_converter/utils/config_base.py | 10 +- ebook_converter/utils/date.py | 8 +- ebook_converter/utils/filenames.py | 4 +- ebook_converter/utils/fonts/metadata.py | 3 +- ebook_converter/utils/fonts/scanner.py | 4 +- .../utils/fonts/sfnt/cff/dict_data.py | 3 +- ebook_converter/utils/fonts/sfnt/subset.py | 8 +- ebook_converter/utils/fonts/utils.py | 6 +- ebook_converter/utils/formatter.py | 6 +- ebook_converter/utils/formatter_functions.py | 24 ++-- ebook_converter/utils/icu.py | 8 +- ebook_converter/utils/img.py | 12 +- ebook_converter/utils/imghdr.py | 4 +- ebook_converter/utils/ipc/launch.py | 10 +- ebook_converter/utils/ipc/simple_worker.py | 4 +- ebook_converter/utils/localization.py | 4 +- ebook_converter/utils/logging.py | 4 +- ebook_converter/utils/serialize.py | 3 +- ebook_converter/utils/short_uuid.py | 4 +- ebook_converter/utils/speedups.py | 4 +- ebook_converter/utils/titlecase.py | 3 +- ebook_converter/utils/zipfile.py | 12 +- 130 files changed, 657 insertions(+), 716 deletions(-) diff --git a/ebook_converter/__init__.py b/ebook_converter/__init__.py index e77ba15..e49e2ca 100644 --- a/ebook_converter/__init__.py +++ b/ebook_converter/__init__.py @@ -14,7 +14,7 @@ import urllib.parse import urllib.request import warnings -from ebook_converter.polyglot.builtins import codepoint_to_chr, unicode_type, hasenv, native_string_type +from ebook_converter.polyglot.builtins import codepoint_to_chr, hasenv, native_string_type from functools import partial if not hasenv('CALIBRE_SHOW_DEPRECATION_WARNINGS'): @@ -80,7 +80,7 @@ def get_types_map(): def to_unicode(raw, encoding='utf-8', errors='strict'): - if isinstance(raw, unicode_type): + if isinstance(raw, str): return raw return raw.decode(encoding, errors) @@ -170,7 +170,7 @@ def prints(*args, **kwargs): safe_encode = kwargs.get('safe_encode', False) count = 0 for i, arg in enumerate(args): - if isinstance(arg, unicode_type): + if isinstance(arg, str): if iswindows: from ebook_converter.utils.terminal import Detect cs = Detect(file) @@ -194,8 +194,8 @@ def prints(*args, **kwargs): try: arg = native_string_type(arg) except ValueError: - arg = unicode_type(arg) - if isinstance(arg, unicode_type): + arg = str(arg) + if isinstance(arg, str): try: arg = arg.encode(enc) except UnicodeEncodeError: @@ -334,7 +334,7 @@ def get_parsed_proxy(typ='http', debug=True): traceback.print_exc() else: if debug: - prints('Using http proxy', unicode_type(ans)) + prints('Using http proxy', str(ans)) return ans @@ -517,7 +517,7 @@ def strftime(fmt, t=None): if isinstance(ans, bytes): ans = ans.decode(preferred_encoding, 'replace') if early_year: - ans = ans.replace('_early year hack##', unicode_type(orig_year)) + ans = ans.replace('_early year hack##', str(orig_year)) return ans @@ -629,7 +629,7 @@ def force_unicode(obj, enc=preferred_encoding): def as_unicode(obj, enc=preferred_encoding): if not isbytestring(obj): try: - obj = unicode_type(obj) + obj = str(obj) except Exception: try: obj = native_string_type(obj) @@ -652,7 +652,7 @@ def human_readable(size, sep=' '): if size < (1 << ((i + 1) * 10)): divisor, suffix = (1 << (i * 10)), candidate break - size = unicode_type(float(size)/divisor) + size = str(float(size)/divisor) if size.find(".") > -1: size = size[:size.find(".")+2] if size.endswith('.0'): diff --git a/ebook_converter/constants.py b/ebook_converter/constants.py index 945420e..1203d12 100644 --- a/ebook_converter/constants.py +++ b/ebook_converter/constants.py @@ -1,12 +1,15 @@ -#!/usr/bin/env python2 -# vim:fileencoding=utf-8 -# License: GPLv3 Copyright: 2015, Kovid Goyal -from ebook_converter.polyglot.builtins import unicode_type, environ_item, hasenv, getenv, as_unicode, native_string_type -import sys, locale, codecs, os, importlib, collections +import codecs +import collections +import importlib +import locale +import os +import sys + +from ebook_converter.polyglot.builtins import environ_item, hasenv, getenv, as_unicode, native_string_type __appname__ = 'calibre' numeric_version = (4, 12, 0) -__version__ = '.'.join(map(unicode_type, numeric_version)) +__version__ = '.'.join(map(str, numeric_version)) git_version = None __author__ = "Kovid Goyal " @@ -216,7 +219,7 @@ class Plugins(collections.Mapping): except Exception as err: p = None try: - plugin_err = unicode_type(err) + plugin_err = str(err) except Exception: plugin_err = as_unicode(native_string_type(err), encoding=preferred_encoding, errors='replace') self._plugins[name] = p, plugin_err diff --git a/ebook_converter/css_selectors/parser.py b/ebook_converter/css_selectors/parser.py index 9a623a6..7bf2128 100644 --- a/ebook_converter/css_selectors/parser.py +++ b/ebook_converter/css_selectors/parser.py @@ -15,7 +15,7 @@ import operator import string from ebook_converter.css_selectors.errors import SelectorSyntaxError, ExpressionError -from ebook_converter.polyglot.builtins import unicode_type, codepoint_to_chr +from ebook_converter.polyglot.builtins import codepoint_to_chr utab = {c:c+32 for c in range(ord(u'A'), ord(u'Z')+1)} @@ -25,7 +25,7 @@ if sys.version_info.major < 3: def ascii_lower(string): """Lower-case, but only in the ASCII range.""" - return string.translate(utab if isinstance(string, unicode_type) else tab) + return string.translate(utab if isinstance(string, str) else tab) def urepr(x): if isinstance(x, list): @@ -469,7 +469,7 @@ def parse_simple_selector(stream, inside_negation=False): if ident.lower() in special_pseudo_elements: # Special case: CSS 2.1 pseudo-elements can have a single ':' # Any new pseudo-element must have two. - pseudo_element = unicode_type(ident) + pseudo_element = str(ident) continue if stream.peek() != ('DELIM', '('): result = Pseudo(result, ident) diff --git a/ebook_converter/customize/__init__.py b/ebook_converter/customize/__init__.py index f0a7d83..bcc50ae 100644 --- a/ebook_converter/customize/__init__.py +++ b/ebook_converter/customize/__init__.py @@ -2,7 +2,6 @@ import os, sys, zipfile, importlib from ebook_converter.constants import numeric_version, iswindows, isosx from ebook_converter.ptempfile import PersistentTemporaryFile -from ebook_converter.polyglot.builtins import unicode_type __license__ = 'GPL v3' @@ -196,7 +195,7 @@ class Plugin(object): # {{{ config_dialog.exec_() if config_dialog.result() == QDialog.Accepted: - sc = unicode_type(sc.text()).strip() + sc = str(sc.text()).strip() customize_plugin(self, sc) geom = bytearray(config_dialog.saveGeometry()) diff --git a/ebook_converter/customize/conversion.py b/ebook_converter/customize/conversion.py index 63c334f..fe8ee8d 100644 --- a/ebook_converter/customize/conversion.py +++ b/ebook_converter/customize/conversion.py @@ -5,7 +5,6 @@ import re, os, shutil, numbers from ebook_converter import CurrentDir from ebook_converter.customize import Plugin -from ebook_converter.polyglot.builtins import unicode_type class ConversionOption(object): @@ -79,7 +78,7 @@ class OptionRecommendation(object): self.option.choices: raise ValueError('OpRec: %s: Recommended value not in choices'% self.option.name) - if not (isinstance(self.recommended_value, (numbers.Number, bytes, unicode_type)) or self.recommended_value is None): + if not (isinstance(self.recommended_value, (numbers.Number, bytes, str)) or self.recommended_value is None): raise ValueError('OpRec: %s:'%self.option.name + repr( self.recommended_value) + ' is not a string or a number') @@ -298,7 +297,7 @@ class OutputFormatPlugin(Plugin): @property def is_periodical(self): return self.oeb.metadata.publication_type and \ - unicode_type(self.oeb.metadata.publication_type[0]).startswith('periodical:') + str(self.oeb.metadata.publication_type[0]).startswith('periodical:') def specialize_options(self, log, opts, input_fmt): ''' diff --git a/ebook_converter/customize/ui.py b/ebook_converter/customize/ui.py index c5d8d46..770cf3d 100644 --- a/ebook_converter/customize/ui.py +++ b/ebook_converter/customize/ui.py @@ -18,7 +18,7 @@ from ebook_converter.utils.config import (make_config_dir, Config, ConfigProxy, plugin_dir, OptionParser) # from ebook_converter.ebooks.metadata.sources.base import Source from ebook_converter.constants import DEBUG, numeric_version -from ebook_converter.polyglot.builtins import iteritems, itervalues, unicode_type +from ebook_converter.polyglot.builtins import iteritems, itervalues __license__ = 'GPL v3' @@ -740,7 +740,7 @@ def build_plugin(path): from ebook_converter import prints from ebook_converter.ptempfile import PersistentTemporaryFile from ebook_converter.utils.zipfile import ZipFile, ZIP_STORED - path = unicode_type(path) + path = str(path) names = frozenset(os.listdir(path)) if '__init__.py' not in names: prints(path, ' is not a valid plugin') diff --git a/ebook_converter/customize/zipplugin.py b/ebook_converter/customize/zipplugin.py index 1bb5208..4113c4a 100644 --- a/ebook_converter/customize/zipplugin.py +++ b/ebook_converter/customize/zipplugin.py @@ -12,7 +12,7 @@ from ebook_converter.constants import ispy3 from ebook_converter.customize import (Plugin, numeric_version, platform, InvalidPlugin, PluginNotFound) from ebook_converter.polyglot.builtins import (itervalues, string_or_bytes, - unicode_type, reload) + reload) __license__ = 'GPL v3' @@ -215,7 +215,7 @@ class PluginLoader(object): if ans.minimum_calibre_version > numeric_version: raise InvalidPlugin( 'The plugin at %s needs a version of calibre >= %s' % - (as_unicode(path_to_zip_file), '.'.join(map(unicode_type, + (as_unicode(path_to_zip_file), '.'.join(map(str, ans.minimum_calibre_version)))) if platform not in ans.supported_platforms: @@ -230,7 +230,7 @@ class PluginLoader(object): raise def _locate_code(self, zf, path_to_zip_file): - names = [x if isinstance(x, unicode_type) else x.decode('utf-8') for x in + names = [x if isinstance(x, str) else x.decode('utf-8') for x in zf.namelist()] names = [x[1:] if x[0] == '/' else x for x in names] diff --git a/ebook_converter/ebooks/__init__.py b/ebook_converter/ebooks/__init__.py index e3c1831..66c1756 100644 --- a/ebook_converter/ebooks/__init__.py +++ b/ebook_converter/ebooks/__init__.py @@ -9,7 +9,6 @@ from various formats. import os, re, numbers, sys from ebook_converter import prints from ebook_converter.ebooks.chardet import xml_to_unicode -from ebook_converter.polyglot.builtins import unicode_type class ConversionError(Exception): @@ -80,7 +79,7 @@ def extract_calibre_cover(raw, base, log): if matches is None: body = soup.find('body') if body is not None: - text = u''.join(map(unicode_type, body.findAll(text=True))) + text = u''.join(map(str, body.findAll(text=True))) if text.strip(): # Body has text, abort return @@ -150,7 +149,7 @@ def check_ebook_format(stream, current_guess): def normalize(x): - if isinstance(x, unicode_type): + if isinstance(x, str): import unicodedata x = unicodedata.normalize('NFC', x) return x diff --git a/ebook_converter/ebooks/chardet.py b/ebook_converter/ebooks/chardet.py index 95a06cd..5428e45 100644 --- a/ebook_converter/ebooks/chardet.py +++ b/ebook_converter/ebooks/chardet.py @@ -1,5 +1,4 @@ import re, codecs -from ebook_converter.polyglot.builtins import unicode_type __license__ = 'GPL v3' @@ -131,7 +130,7 @@ def force_encoding(raw, verbose, assume_utf8=False): def detect_xml_encoding(raw, verbose=False, assume_utf8=False): - if not raw or isinstance(raw, unicode_type): + if not raw or isinstance(raw, str): return raw, None for x in ('utf8', 'utf-16-le', 'utf-16-be'): bom = getattr(codecs, 'BOM_'+x.upper().replace('-16', '16').replace( @@ -175,7 +174,7 @@ def xml_to_unicode(raw, verbose=False, strip_encoding_pats=False, return '', None raw, encoding = detect_xml_encoding(raw, verbose=verbose, assume_utf8=assume_utf8) - if not isinstance(raw, unicode_type): + if not isinstance(raw, str): raw = raw.decode(encoding, 'replace') if strip_encoding_pats: diff --git a/ebook_converter/ebooks/conversion/plugins/chm_input.py b/ebook_converter/ebooks/conversion/plugins/chm_input.py index e65dd70..2dd0705 100644 --- a/ebook_converter/ebooks/conversion/plugins/chm_input.py +++ b/ebook_converter/ebooks/conversion/plugins/chm_input.py @@ -10,7 +10,7 @@ from ebook_converter.ebooks.chardet import xml_to_unicode from ebook_converter.customize.conversion import InputFormatPlugin from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.constants import filesystem_encoding -from ebook_converter.polyglot.builtins import unicode_type, as_bytes +from ebook_converter.polyglot.builtins import as_bytes __license__ = 'GPL v3' __copyright__ = ('2008, Kovid Goyal , ' @@ -41,7 +41,7 @@ class CHMInput(InputFormatPlugin): log.debug('Processing CHM...') with TemporaryDirectory('_chm2oeb') as tdir: - if not isinstance(tdir, unicode_type): + if not isinstance(tdir, str): tdir = tdir.decode(filesystem_encoding) html_input = plugin_for_input_format('html') for opt in html_input.options: @@ -129,7 +129,7 @@ class CHMInput(InputFormatPlugin): base = os.path.dirname(os.path.abspath(htmlpath)) def unquote(x): - if isinstance(x, unicode_type): + if isinstance(x, str): x = x.encode('utf-8') return _unquote(x).decode('utf-8') diff --git a/ebook_converter/ebooks/conversion/plugins/epub_output.py b/ebook_converter/ebooks/conversion/plugins/epub_output.py index 20a454f..11294d9 100644 --- a/ebook_converter/ebooks/conversion/plugins/epub_output.py +++ b/ebook_converter/ebooks/conversion/plugins/epub_output.py @@ -7,7 +7,7 @@ from ebook_converter.customize.conversion import (OutputFormatPlugin, OptionRecommendation) from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter import CurrentDir -from ebook_converter.polyglot.builtins import unicode_type, as_bytes +from ebook_converter.polyglot.builtins import as_bytes __license__ = 'GPL v3' @@ -225,15 +225,15 @@ class EPUBOutput(OutputFormatPlugin): identifiers = oeb.metadata['identifier'] uuid = None for x in identifiers: - if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type(x).startswith('urn:uuid:'): - uuid = unicode_type(x).split(':')[-1] + if x.get(OPF('scheme'), None).lower() == 'uuid' or str(x).startswith('urn:uuid:'): + uuid = str(x).split(':')[-1] break encrypted_fonts = getattr(input_plugin, 'encrypted_fonts', []) if uuid is None: self.log.warn('No UUID identifier found') from uuid import uuid4 - uuid = unicode_type(uuid4()) + uuid = str(uuid4()) oeb.metadata.add('identifier', uuid, scheme='uuid', id=uuid) if encrypted_fonts and not uuid.startswith('urn:uuid:'): @@ -241,7 +241,7 @@ class EPUBOutput(OutputFormatPlugin): # for some absurd reason, or it will throw a hissy fit and refuse # to use the obfuscated fonts. for x in identifiers: - if unicode_type(x) == uuid: + if str(x) == uuid: x.content = 'urn:uuid:'+uuid with TemporaryDirectory('_epub_output') as tdir: @@ -336,7 +336,7 @@ class EPUBOutput(OutputFormatPlugin): f.write(bytes(bytearray(data[i] ^ key[i%16] for i in range(1024)))) else: self.log.warn('Font', path, 'is invalid, ignoring') - if not isinstance(uri, unicode_type): + if not isinstance(uri, str): uri = uri.decode('utf-8') fonts.append(''' diff --git a/ebook_converter/ebooks/conversion/plugins/html_input.py b/ebook_converter/ebooks/conversion/plugins/html_input.py index 9dafbd2..b453d96 100644 --- a/ebook_converter/ebooks/conversion/plugins/html_input.py +++ b/ebook_converter/ebooks/conversion/plugins/html_input.py @@ -10,7 +10,7 @@ from ebook_converter.customize.conversion import (InputFormatPlugin, from ebook_converter.utils.localization import get_lang from ebook_converter.utils.filenames import ascii_filename from ebook_converter.utils.imghdr import what -from ebook_converter.polyglot.builtins import unicode_type, getcwd, as_unicode +from ebook_converter.polyglot.builtins import getcwd, as_unicode __license__ = 'GPL v3' @@ -135,7 +135,7 @@ class HTMLInput(InputFormatPlugin): if not metadata.title: oeb.logger.warn('Title not specified') metadata.add('title', self.oeb.translate(__('Unknown'))) - bookid = unicode_type(uuid.uuid4()) + bookid = str(uuid.uuid4()) metadata.add('identifier', bookid, id='uuid_id', scheme='uuid') for ident in metadata.identifier: if 'id' in ident.attrib: @@ -225,7 +225,7 @@ class HTMLInput(InputFormatPlugin): def link_to_local_path(self, link_, base=None): from ebook_converter.ebooks.html.input import Link - if not isinstance(link_, unicode_type): + if not isinstance(link_, str): try: link_ = link_.decode('utf-8', 'error') except: @@ -288,7 +288,7 @@ class HTMLInput(InputFormatPlugin): # bhref refers to an already existing file. The read() method of # DirContainer will call unquote on it before trying to read the # file, therefore we quote it here. - if isinstance(bhref, unicode_type): + if isinstance(bhref, str): bhref = bhref.encode('utf-8') item.html_input_href = as_unicode(urllib.parse.quote(bhref)) if guessed in self.OEB_STYLES: diff --git a/ebook_converter/ebooks/conversion/plugins/html_output.py b/ebook_converter/ebooks/conversion/plugins/html_output.py index f34a373..de72192 100644 --- a/ebook_converter/ebooks/conversion/plugins/html_output.py +++ b/ebook_converter/ebooks/conversion/plugins/html_output.py @@ -8,7 +8,6 @@ from lxml import etree from ebook_converter import CurrentDir from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation from ebook_converter.ebooks.oeb.base import element -from ebook_converter.polyglot.builtins import unicode_type from ebook_converter.polyglot.urllib import unquote from ebook_converter.ptempfile import PersistentTemporaryDirectory from ebook_converter.utils.cleantext import clean_xml_chars @@ -155,7 +154,7 @@ class HTMLOutput(OutputFormatPlugin): toc=html_toc, meta=meta, nextLink=nextLink, tocUrl=tocUrl, cssLink=cssLink, firstContentPageLink=nextLink) - if isinstance(t, unicode_type): + if isinstance(t, str): t = t.encode('utf-8') f.write(t) diff --git a/ebook_converter/ebooks/conversion/plugins/htmlz_output.py b/ebook_converter/ebooks/conversion/plugins/htmlz_output.py index a47e71d..6c2c301 100644 --- a/ebook_converter/ebooks/conversion/plugins/htmlz_output.py +++ b/ebook_converter/ebooks/conversion/plugins/htmlz_output.py @@ -4,7 +4,6 @@ import os from ebook_converter.customize.conversion import OutputFormatPlugin, \ OptionRecommendation from ebook_converter.ptempfile import TemporaryDirectory -from ebook_converter.polyglot.builtins import unicode_type __license__ = 'GPL 3' @@ -78,9 +77,9 @@ class HTMLZOutput(OutputFormatPlugin): fname = u'index' if opts.htmlz_title_filename: from ebook_converter.utils.filenames import shorten_components_to - fname = shorten_components_to(100, (ascii_filename(unicode_type(oeb_book.metadata.title[0])),))[0] + fname = shorten_components_to(100, (ascii_filename(str(oeb_book.metadata.title[0])),))[0] with open(os.path.join(tdir, fname+u'.html'), 'wb') as tf: - if isinstance(html, unicode_type): + if isinstance(html, str): html = html.encode('utf-8') tf.write(html) diff --git a/ebook_converter/ebooks/conversion/plugins/lrf_output.py b/ebook_converter/ebooks/conversion/plugins/lrf_output.py index 5114f0b..4b16f83 100644 --- a/ebook_converter/ebooks/conversion/plugins/lrf_output.py +++ b/ebook_converter/ebooks/conversion/plugins/lrf_output.py @@ -2,7 +2,6 @@ import sys, os from ebook_converter.customize.conversion import OutputFormatPlugin from ebook_converter.customize.conversion import OptionRecommendation -from ebook_converter.polyglot.builtins import unicode_type __license__ = 'GPL v3' @@ -15,7 +14,7 @@ class LRFOptions(object): def __init__(self, output, opts, oeb): def f2s(f): try: - return unicode_type(f[0]) + return str(f[0]) except: return '' m = oeb.metadata @@ -29,13 +28,13 @@ class LRFOptions(object): self.title_sort = self.author_sort = '' for x in m.creator: if x.role == 'aut': - self.author = unicode_type(x) - fa = unicode_type(getattr(x, 'file_as', '')) + self.author = str(x) + fa = str(getattr(x, 'file_as', '')) if fa: self.author_sort = fa for x in m.title: - if unicode_type(x.file_as): - self.title_sort = unicode_type(x.file_as) + if str(x.file_as): + self.title_sort = str(x.file_as) self.freetext = f2s(m.description) self.category = f2s(m.subject) self.cover = None diff --git a/ebook_converter/ebooks/conversion/plugins/mobi_input.py b/ebook_converter/ebooks/conversion/plugins/mobi_input.py index 16145bc..84abd75 100644 --- a/ebook_converter/ebooks/conversion/plugins/mobi_input.py +++ b/ebook_converter/ebooks/conversion/plugins/mobi_input.py @@ -1,7 +1,6 @@ import os from ebook_converter.customize.conversion import InputFormatPlugin -from ebook_converter.polyglot.builtins import unicode_type __license__ = 'GPL 3' @@ -50,7 +49,7 @@ class MOBIInput(InputFormatPlugin): raw = parse_cache.pop('calibre_raw_mobi_markup', False) if raw: - if isinstance(raw, unicode_type): + if isinstance(raw, str): raw = raw.encode('utf-8') with lopen('debug-raw.html', 'wb') as f: f.write(raw) diff --git a/ebook_converter/ebooks/conversion/plugins/mobi_output.py b/ebook_converter/ebooks/conversion/plugins/mobi_output.py index fd0fcdb..5fff040 100644 --- a/ebook_converter/ebooks/conversion/plugins/mobi_output.py +++ b/ebook_converter/ebooks/conversion/plugins/mobi_output.py @@ -1,6 +1,5 @@ from ebook_converter.customize.conversion import (OutputFormatPlugin, OptionRecommendation) -from ebook_converter.polyglot.builtins import unicode_type __license__ = 'GPL v3' @@ -119,7 +118,7 @@ class MOBIOutput(OutputFormatPlugin): if not found: from ebook_converter.ebooks import generate_masthead self.oeb.log.debug('No masthead found in manifest, generating default mastheadImage...') - raw = generate_masthead(unicode_type(self.oeb.metadata['title'][0])) + raw = generate_masthead(str(self.oeb.metadata['title'][0])) id, href = self.oeb.manifest.generate('masthead', 'masthead') self.oeb.manifest.add(id, href, 'image/gif', data=raw) self.oeb.guide.add('masthead', 'Masthead Image', href) @@ -163,7 +162,7 @@ class MOBIOutput(OutputFormatPlugin): sec.nodes.remove(a) root = TOC(klass='periodical', href=self.oeb.spine[0].href, - title=unicode_type(self.oeb.metadata.title[0])) + title=str(self.oeb.metadata.title[0])) for s in sections: if articles[id(s)]: diff --git a/ebook_converter/ebooks/conversion/plugins/pdf_output.py b/ebook_converter/ebooks/conversion/plugins/pdf_output.py index bf24a8d..d1d17c3 100644 --- a/ebook_converter/ebooks/conversion/plugins/pdf_output.py +++ b/ebook_converter/ebooks/conversion/plugins/pdf_output.py @@ -6,7 +6,7 @@ import glob, os from ebook_converter.customize.conversion import (OutputFormatPlugin, OptionRecommendation) from ebook_converter.ptempfile import TemporaryDirectory -from ebook_converter.polyglot.builtins import iteritems, unicode_type +from ebook_converter.polyglot.builtins import iteritems __license__ = 'GPL 3' @@ -190,8 +190,8 @@ class PDFOutput(OutputFormatPlugin): def get_cover_data(self): oeb = self.oeb - if (oeb.metadata.cover and unicode_type(oeb.metadata.cover[0]) in oeb.manifest.ids): - cover_id = unicode_type(oeb.metadata.cover[0]) + if (oeb.metadata.cover and str(oeb.metadata.cover[0]) in oeb.manifest.ids): + cover_id = str(oeb.metadata.cover[0]) item = oeb.manifest.ids[cover_id] self.cover_data = item.data diff --git a/ebook_converter/ebooks/conversion/plugins/pml_output.py b/ebook_converter/ebooks/conversion/plugins/pml_output.py index 022c5b1..c2085c4 100644 --- a/ebook_converter/ebooks/conversion/plugins/pml_output.py +++ b/ebook_converter/ebooks/conversion/plugins/pml_output.py @@ -3,7 +3,6 @@ import os, io from ebook_converter.customize.conversion import (OutputFormatPlugin, OptionRecommendation) from ebook_converter.ptempfile import TemporaryDirectory -from ebook_converter.polyglot.builtins import unicode_type __license__ = 'GPL 3' @@ -40,7 +39,7 @@ class PMLOutput(OutputFormatPlugin): with TemporaryDirectory('_pmlz_output') as tdir: pmlmlizer = PMLMLizer(log) - pml = unicode_type(pmlmlizer.extract_content(oeb_book, opts)) + pml = str(pmlmlizer.extract_content(oeb_book, opts)) with lopen(os.path.join(tdir, 'index.pml'), 'wb') as out: out.write(pml.encode(opts.pml_output_encoding, 'replace')) diff --git a/ebook_converter/ebooks/conversion/plugins/recipe_input.py b/ebook_converter/ebooks/conversion/plugins/recipe_input.py index c0d0c54..0d73a52 100644 --- a/ebook_converter/ebooks/conversion/plugins/recipe_input.py +++ b/ebook_converter/ebooks/conversion/plugins/recipe_input.py @@ -3,7 +3,6 @@ import os from ebook_converter.customize.conversion import InputFormatPlugin, OptionRecommendation from ebook_converter.constants import numeric_version from ebook_converter import walk -from ebook_converter.polyglot.builtins import unicode_type __license__ = 'GPL v3' @@ -161,6 +160,6 @@ class RecipeInput(InputFormatPlugin): def save_download(self, zf): raw = self.recipe_source - if isinstance(raw, unicode_type): + if isinstance(raw, str): raw = raw.encode('utf-8') zf.writestr('download.recipe', raw) diff --git a/ebook_converter/ebooks/conversion/plugins/snb_input.py b/ebook_converter/ebooks/conversion/plugins/snb_input.py index b151cb2..51c7d7e 100644 --- a/ebook_converter/ebooks/conversion/plugins/snb_input.py +++ b/ebook_converter/ebooks/conversion/plugins/snb_input.py @@ -3,7 +3,6 @@ import os from ebook_converter.customize.conversion import InputFormatPlugin from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.utils.filenames import ascii_filename -from ebook_converter.polyglot.builtins import unicode_type __license__ = 'GPL 3' @@ -73,7 +72,7 @@ class SNBInput(InputFormatPlugin): if d['cover'] != '': oeb.guide.add('cover', 'Cover', d['cover']) - bookid = unicode_type(uuid.uuid4()) + bookid = str(uuid.uuid4()) oeb.metadata.add('identifier', bookid, id='uuid_id', scheme='uuid') for ident in oeb.metadata.identifier: if 'id' in ident.attrib: diff --git a/ebook_converter/ebooks/conversion/plugins/snb_output.py b/ebook_converter/ebooks/conversion/plugins/snb_output.py index bda81f8..9c9ab30 100644 --- a/ebook_converter/ebooks/conversion/plugins/snb_output.py +++ b/ebook_converter/ebooks/conversion/plugins/snb_output.py @@ -3,7 +3,6 @@ import os from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.constants import __appname__, __version__ -from ebook_converter.polyglot.builtins import unicode_type __license__ = 'GPL 3' @@ -73,20 +72,20 @@ class SNBOutput(OutputFormatPlugin): # Process Meta data meta = oeb_book.metadata if meta.title: - title = unicode_type(meta.title[0]) + title = str(meta.title[0]) else: title = '' - authors = [unicode_type(x) for x in meta.creator if x.role == 'aut'] + authors = [str(x) for x in meta.creator if x.role == 'aut'] if meta.publisher: - publishers = unicode_type(meta.publisher[0]) + publishers = str(meta.publisher[0]) else: publishers = '' if meta.language: - lang = unicode_type(meta.language[0]).upper() + lang = str(meta.language[0]).upper() else: lang = '' if meta.description: - abstract = unicode_type(meta.description[0]) + abstract = str(meta.description[0]) else: abstract = '' diff --git a/ebook_converter/ebooks/conversion/plumber.py b/ebook_converter/ebooks/conversion/plumber.py index ec56d13..c78145c 100644 --- a/ebook_converter/ebooks/conversion/plumber.py +++ b/ebook_converter/ebooks/conversion/plumber.py @@ -13,7 +13,7 @@ from ebook_converter.utils.zipfile import ZipFile from ebook_converter import (extract, walk, isbytestring, filesystem_encoding, get_types_map) from ebook_converter.constants import __version__ -from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes +from ebook_converter.polyglot.builtins import string_or_bytes __license__ = 'GPL 3' @@ -795,7 +795,7 @@ OptionRecommendation(name='search_replace', def unarchive(self, path, tdir): extract(path, tdir) files = list(walk(tdir)) - files = [f if isinstance(f, unicode_type) else f.decode(filesystem_encoding) + files = [f if isinstance(f, str) else f.decode(filesystem_encoding) for f in files] from ebook_converter.customize.ui import available_input_formats fmts = set(available_input_formats()) @@ -848,7 +848,7 @@ OptionRecommendation(name='search_replace', rec = self.get_option_by_name(name) help = getattr(rec, 'help', None) if help is not None: - return help.replace('%default', unicode_type(rec.recommended_value)) + return help.replace('%default', str(rec.recommended_value)) def get_all_help(self): ans = {} @@ -916,7 +916,7 @@ OptionRecommendation(name='search_replace', try: val = parse_date(val, assume_utc=x=='timestamp') except: - self.log.exception(_('Failed to parse date/time') + ' ' + unicode_type(val)) + self.log.exception(_('Failed to parse date/time') + ' ' + str(val)) continue setattr(mi, x, val) diff --git a/ebook_converter/ebooks/conversion/preprocess.py b/ebook_converter/ebooks/conversion/preprocess.py index 1e528dd..1cc42b4 100644 --- a/ebook_converter/ebooks/conversion/preprocess.py +++ b/ebook_converter/ebooks/conversion/preprocess.py @@ -2,7 +2,6 @@ import functools, re, json from math import ceil from ebook_converter import entity_to_unicode, as_unicode -from ebook_converter.polyglot.builtins import unicode_type __license__ = 'GPL v3' @@ -72,8 +71,8 @@ def smarten_punctuation(html, log=None): from ebook_converter.ebooks.conversion.utils import HeuristicProcessor preprocessor = HeuristicProcessor(log=log) from uuid import uuid4 - start = 'calibre-smartypants-'+unicode_type(uuid4()) - stop = 'calibre-smartypants-'+unicode_type(uuid4()) + start = 'calibre-smartypants-'+str(uuid4()) + stop = 'calibre-smartypants-'+str(uuid4()) html = html.replace('', stop) html = preprocessor.fix_nbsp_indents(html) @@ -149,20 +148,20 @@ class DocAnalysis(object): maxLineLength=1900 # Discard larger than this to stay in range buckets=20 # Each line is divided into a bucket based on length - # print("there are "+unicode_type(len(lines))+" lines") + # print("there are "+str(len(lines))+" lines") # max = 0 # for line in self.lines: # l = len(line) # if l > max: # max = l - # print("max line found is "+unicode_type(max)) + # print("max line found is "+str(max)) # Build the line length histogram hRaw = [0 for i in range(0,buckets)] for line in self.lines: l = len(line) if l > minLineLength and l < maxLineLength: l = int(l // 100) - # print("adding "+unicode_type(l)) + # print("adding "+str(l)) hRaw[l]+=1 # Normalize the histogram into percents @@ -171,8 +170,8 @@ class DocAnalysis(object): h = [float(count)/totalLines for count in hRaw] else: h = [] - # print("\nhRaw histogram lengths are: "+unicode_type(hRaw)) - # print(" percents are: "+unicode_type(h)+"\n") + # print("\nhRaw histogram lengths are: "+str(hRaw)) + # print(" percents are: "+str(h)+"\n") # Find the biggest bucket maxValue = 0 @@ -184,7 +183,7 @@ class DocAnalysis(object): # print("Line lengths are too variable. Not unwrapping.") return False else: - # print(unicode_type(maxValue)+" of the lines were in one bucket") + # print(str(maxValue)+" of the lines were in one bucket") return True @@ -220,8 +219,8 @@ class Dehyphenator(object): wraptags = match.group('wraptags') except: wraptags = '' - hyphenated = unicode_type(firsthalf) + "-" + unicode_type(secondhalf) - dehyphenated = unicode_type(firsthalf) + unicode_type(secondhalf) + hyphenated = str(firsthalf) + "-" + str(secondhalf) + dehyphenated = str(firsthalf) + str(secondhalf) if self.suffixes.match(secondhalf) is None: lookupword = self.removesuffixes.sub('', dehyphenated) else: @@ -327,7 +326,7 @@ class CSSPreProcessor(object): # are commented lines before the first @import or @charset rule. Since # the conversion will remove all stylesheets anyway, we don't lose # anything - data = re.sub(unicode_type(r'/\*.*?\*/'), '', data, flags=re.DOTALL) + data = re.sub(str(r'/\*.*?\*/'), '', data, flags=re.DOTALL) ans, namespaced = [], False for line in data.splitlines(): @@ -535,7 +534,7 @@ class HTMLPreProcessor(object): docanalysis = DocAnalysis('pdf', html) length = docanalysis.line_length(getattr(self.extra_opts, 'unwrap_factor')) if length: - # print("The pdf line length returned is " + unicode_type(length)) + # print("The pdf line length returned is " + str(length)) # unwrap em/en dashes end_rules.append((re.compile( r'(?<=.{%i}[–—])\s*

\s*(?=[\[a-z\d])' % length), lambda match: '')) diff --git a/ebook_converter/ebooks/conversion/utils.py b/ebook_converter/ebooks/conversion/utils.py index 5c8f7df..b9af3f9 100644 --- a/ebook_converter/ebooks/conversion/utils.py +++ b/ebook_converter/ebooks/conversion/utils.py @@ -3,7 +3,6 @@ from math import ceil from ebook_converter.ebooks.conversion.preprocess import DocAnalysis, Dehyphenator from ebook_converter.utils.logging import default_log from ebook_converter.utils.wordcount import get_wordcount_obj -from ebook_converter.polyglot.builtins import unicode_type __license__ = 'GPL v3' @@ -51,8 +50,8 @@ class HeuristicProcessor(object): title = match.group('title') if not title: self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log.debug("marked " + unicode_type(self.html_preprocess_sections) + - " chapters. - " + unicode_type(chap)) + self.log.debug("marked " + str(self.html_preprocess_sections) + + " chapters. - " + str(chap)) return '

'+chap+'

\n' else: delete_whitespace = re.compile('^\\s*(?P.*?)\\s*$') @@ -60,16 +59,16 @@ class HeuristicProcessor(object): txt_chap = delete_quotes.sub('', delete_whitespace.sub('\\g', html2text(chap))) txt_title = delete_quotes.sub('', delete_whitespace.sub('\\g', html2text(title))) self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log.debug("marked " + unicode_type(self.html_preprocess_sections) + - " chapters & titles. - " + unicode_type(chap) + ", " + unicode_type(title)) + self.log.debug("marked " + str(self.html_preprocess_sections) + + " chapters & titles. - " + str(chap) + ", " + str(title)) return '

'+chap+'

\n

'+title+'

\n' def chapter_break(self, match): chap = match.group('section') styles = match.group('styles') self.html_preprocess_sections = self.html_preprocess_sections + 1 - self.log.debug("marked " + unicode_type(self.html_preprocess_sections) + - " section markers based on punctuation. - " + unicode_type(chap)) + self.log.debug("marked " + str(self.html_preprocess_sections) + + " section markers based on punctuation. - " + str(chap)) return '<'+styles+' style="page-break-before:always">'+chap def analyze_title_matches(self, match): @@ -112,8 +111,8 @@ class HeuristicProcessor(object): line_end = line_end_ere.findall(raw) tot_htm_ends = len(htm_end) tot_ln_fds = len(line_end) - # self.log.debug("There are " + unicode_type(tot_ln_fds) + " total Line feeds, and " + - # unicode_type(tot_htm_ends) + " marked up endings") + # self.log.debug("There are " + str(tot_ln_fds) + " total Line feeds, and " + + # str(tot_htm_ends) + " marked up endings") if percent > 1: percent = 1 @@ -121,7 +120,7 @@ class HeuristicProcessor(object): percent = 0 min_lns = tot_ln_fds * percent - # self.log.debug("There must be fewer than " + unicode_type(min_lns) + " unmarked lines to add markup") + # self.log.debug("There must be fewer than " + str(min_lns) + " unmarked lines to add markup") return min_lns > tot_htm_ends def dump(self, raw, where): @@ -158,17 +157,17 @@ class HeuristicProcessor(object): ] ITALICIZE_STYLE_PATS = [ - unicode_type(r'(?msu)(?<=[\s>"“\'‘])_\*/(?P[^\*_]+)/\*_'), - unicode_type(r'(?msu)(?<=[\s>"“\'‘])~~(?P[^~]+)~~'), - unicode_type(r'(?msu)(?<=[\s>"“\'‘])_/(?P[^/_]+)/_'), - unicode_type(r'(?msu)(?<=[\s>"“\'‘])_\*(?P[^\*_]+)\*_'), - unicode_type(r'(?msu)(?<=[\s>"“\'‘])\*/(?P[^/\*]+)/\*'), - unicode_type(r'(?msu)(?<=[\s>"“\'‘])/:(?P[^:/]+):/'), - unicode_type(r'(?msu)(?<=[\s>"“\'‘])\|:(?P[^:\|]+):\|'), - unicode_type(r'(?msu)(?<=[\s>"“\'‘])\*(?P[^\*]+)\*'), - unicode_type(r'(?msu)(?<=[\s>"“\'‘])~(?P[^~]+)~'), - unicode_type(r'(?msu)(?<=[\s>"“\'‘])/(?P[^/\*><]+)/'), - unicode_type(r'(?msu)(?<=[\s>"“\'‘])_(?P[^_]+)_'), + str(r'(?msu)(?<=[\s>"“\'‘])_\*/(?P[^\*_]+)/\*_'), + str(r'(?msu)(?<=[\s>"“\'‘])~~(?P[^~]+)~~'), + str(r'(?msu)(?<=[\s>"“\'‘])_/(?P[^/_]+)/_'), + str(r'(?msu)(?<=[\s>"“\'‘])_\*(?P[^\*_]+)\*_'), + str(r'(?msu)(?<=[\s>"“\'‘])\*/(?P[^/\*]+)/\*'), + str(r'(?msu)(?<=[\s>"“\'‘])/:(?P[^:/]+):/'), + str(r'(?msu)(?<=[\s>"“\'‘])\|:(?P[^:\|]+):\|'), + str(r'(?msu)(?<=[\s>"“\'‘])\*(?P[^\*]+)\*'), + str(r'(?msu)(?<=[\s>"“\'‘])~(?P[^~]+)~'), + str(r'(?msu)(?<=[\s>"“\'‘])/(?P[^/\*><]+)/'), + str(r'(?msu)(?<=[\s>"“\'‘])_(?P[^_]+)_'), ] for word in ITALICIZE_WORDS: @@ -178,10 +177,10 @@ class HeuristicProcessor(object): search_text = re.sub(r'<[^>]*>', '', search_text) for pat in ITALICIZE_STYLE_PATS: for match in re.finditer(pat, search_text): - ital_string = unicode_type(match.group('words')) - # self.log.debug("italicising "+unicode_type(match.group(0))+" with "+ital_string+"") + ital_string = str(match.group('words')) + # self.log.debug("italicising "+str(match.group(0))+" with "+ital_string+"") try: - html = re.sub(re.escape(unicode_type(match.group(0))), '%s' % ital_string, html) + html = re.sub(re.escape(str(match.group(0))), '%s' % ital_string, html) except OverflowError: # match.group(0) was too large to be compiled into a regex continue @@ -206,10 +205,10 @@ class HeuristicProcessor(object): if wordcount > 200000: typical_chapters = 15000. self.min_chapters = int(ceil(wordcount / typical_chapters)) - self.log.debug("minimum chapters required are: "+unicode_type(self.min_chapters)) + self.log.debug("minimum chapters required are: "+str(self.min_chapters)) heading = re.compile(']*>', re.IGNORECASE) self.html_preprocess_sections = len(heading.findall(html)) - self.log.debug("found " + unicode_type(self.html_preprocess_sections) + " pre-existing headings") + self.log.debug("found " + str(self.html_preprocess_sections) + " pre-existing headings") # Build the Regular Expressions in pieces init_lookahead = "(?=<(p|div))" @@ -299,7 +298,7 @@ class HeuristicProcessor(object): if n_lookahead_req: n_lookahead = re.sub("(ou|in|cha)", "lookahead_", full_chapter_line) if not analyze: - self.log.debug("Marked " + unicode_type(self.html_preprocess_sections) + " headings, " + log_message) + self.log.debug("Marked " + str(self.html_preprocess_sections) + " headings, " + log_message) chapter_marker = arg_ignorecase+init_lookahead+full_chapter_line+blank_lines+lp_n_lookahead_open+n_lookahead+lp_n_lookahead_close+ \ lp_opt_title_open+title_line_open+title_header_open+lp_title+title_header_close+title_line_close+lp_opt_title_close @@ -313,10 +312,10 @@ class HeuristicProcessor(object): title_req = True strict_title = False self.log.debug( - unicode_type(type_name)+" had "+unicode_type(hits)+ - " hits - "+unicode_type(self.chapters_no_title)+" chapters with no title, "+ - unicode_type(self.chapters_with_title)+" chapters with titles, "+ - unicode_type(float(self.chapters_with_title) / float(hits))+" percent. ") + str(type_name)+" had "+str(hits)+ + " hits - "+str(self.chapters_no_title)+" chapters with no title, "+ + str(self.chapters_with_title)+" chapters with titles, "+ + str(float(self.chapters_with_title) / float(hits))+" percent. ") if type_name == 'common': analysis_result.append([chapter_type, n_lookahead_req, strict_title, ignorecase, title_req, log_message, type_name]) elif self.min_chapters <= hits < max_chapters or self.min_chapters < 3 > hits: @@ -333,8 +332,8 @@ class HeuristicProcessor(object): words_per_chptr = wordcount if words_per_chptr > 0 and self.html_preprocess_sections > 0: words_per_chptr = wordcount // self.html_preprocess_sections - self.log.debug("Total wordcount is: "+ unicode_type(wordcount)+", Average words per section is: "+ - unicode_type(words_per_chptr)+", Marked up "+unicode_type(self.html_preprocess_sections)+" chapters") + self.log.debug("Total wordcount is: "+ str(wordcount)+", Average words per section is: "+ + str(words_per_chptr)+", Marked up "+str(self.html_preprocess_sections)+" chapters") return html def punctuation_unwrap(self, length, content, format): @@ -364,8 +363,8 @@ class HeuristicProcessor(object): # define the pieces of the regex # (?)?\\s*()?" blanklines = "\\s*(?P<(p|span|div)[^>]*>\\s*(<(p|span|div)[^>]*>\\s*\\s*)\\s*){0,3}\\s*" @@ -425,18 +424,18 @@ class HeuristicProcessor(object): return html def fix_nbsp_indents(self, html): - txtindent = re.compile(unicode_type(r'<(?Pp|div)(?P[^>]*)>\s*(?P(]*>\s*)+)?\s*(\u00a0){2,}'), re.IGNORECASE) + txtindent = re.compile(str(r'<(?Pp|div)(?P[^>]*)>\s*(?P(]*>\s*)+)?\s*(\u00a0){2,}'), re.IGNORECASE) html = txtindent.sub(self.insert_indent, html) if self.found_indents > 1: - self.log.debug("replaced "+unicode_type(self.found_indents)+ " nbsp indents with inline styles") + self.log.debug("replaced "+str(self.found_indents)+ " nbsp indents with inline styles") return html def cleanup_markup(self, html): # remove remaining non-breaking spaces - html = re.sub(unicode_type(r'\u00a0'), ' ', html) + html = re.sub(str(r'\u00a0'), ' ', html) # Get rid of various common microsoft specific tags which can cause issues later # Get rid of empty tags to simplify other processing - html = re.sub(unicode_type(r'\s*\s*'), ' ', html) + html = re.sub(str(r'\s*\s*'), ' ', html) # Delete microsoft 'smart' tags html = re.sub('(?i)', '', html) # Re-open self closing paragraph tags @@ -476,8 +475,8 @@ class HeuristicProcessor(object): blanklines = self.blankreg.findall(html) lines = self.linereg.findall(html) if len(lines) > 1: - self.log.debug("There are " + unicode_type(len(blanklines)) + " blank lines. " + - unicode_type(float(len(blanklines)) / float(len(lines))) + " percent blank") + self.log.debug("There are " + str(len(blanklines)) + " blank lines. " + + str(float(len(blanklines)) / float(len(lines))) + " percent blank") if float(len(blanklines)) / float(len(lines)) > 0.40: return True @@ -499,11 +498,11 @@ class HeuristicProcessor(object): lines = float(len(self.single_blank.findall(to_merge))) - 1. em = base_em + (em_per_line * lines) if to_merge.find('whitespace'): - newline = self.any_multi_blank.sub('\n

', match.group(0)) + newline = self.any_multi_blank.sub('\n

', match.group(0)) else: - newline = self.any_multi_blank.sub('\n

', match.group(0)) + newline = self.any_multi_blank.sub('\n

', match.group(0)) return newline html = self.any_multi_blank.sub(merge_matches, html) @@ -527,9 +526,9 @@ class HeuristicProcessor(object): top_margin = '' bottom_margin = '' if initblanks is not None: - top_margin = 'margin-top:'+unicode_type(len(self.single_blank.findall(initblanks)))+'em;' + top_margin = 'margin-top:'+str(len(self.single_blank.findall(initblanks)))+'em;' if endblanks is not None: - bottom_margin = 'margin-bottom:'+unicode_type(len(self.single_blank.findall(endblanks)))+'em;' + bottom_margin = 'margin-bottom:'+str(len(self.single_blank.findall(endblanks)))+'em;' if initblanks is None and endblanks is None: return content @@ -606,7 +605,7 @@ class HeuristicProcessor(object): else: replacement_break = re.sub('(?i)(width=\\d+\\%?|width:\\s*\\d+(\\%|px|pt|em)?;?)', '', replacement_break) divpercent = (100 - width) // 2 - hr_open = re.sub('45', unicode_type(divpercent), hr_open) + hr_open = re.sub('45', str(divpercent), hr_open) scene_break = hr_open+replacement_break+'' else: scene_break = hr_open+'
' @@ -666,12 +665,12 @@ class HeuristicProcessor(object): else: styles = match.group('styles').split(';') is_paragraph = self.check_paragraph(content) - # print "styles for this line are: "+unicode_type(styles) + # print "styles for this line are: "+str(styles) split_styles = [] for style in styles: - # print "style is: "+unicode_type(style) + # print "style is: "+str(style) newstyle = style.split(':') - # print "newstyle is: "+unicode_type(newstyle) + # print "newstyle is: "+str(newstyle) split_styles.append(newstyle) styles = split_styles for style, setting in styles: @@ -682,7 +681,7 @@ class HeuristicProcessor(object): if 9 < setting < 14: text_indent = indented_text else: - text_indent = style+':'+unicode_type(setting)+'pt;' + text_indent = style+':'+str(setting)+'pt;' if style == 'padding': setting = re.sub('pt', '', setting).split(' ') if int(setting[1]) < 16 and int(setting[3]) < 16: @@ -703,23 +702,23 @@ class HeuristicProcessor(object): blockquote_open_loop = blockquote_open if debugabby: self.log.debug('\n\n******\n') - self.log.debug('padding top is: '+unicode_type(setting[0])) - self.log.debug('padding right is:' +unicode_type(setting[1])) - self.log.debug('padding bottom is: ' + unicode_type(setting[2])) - self.log.debug('padding left is: ' +unicode_type(setting[3])) + self.log.debug('padding top is: '+str(setting[0])) + self.log.debug('padding right is:' +str(setting[1])) + self.log.debug('padding bottom is: ' + str(setting[2])) + self.log.debug('padding left is: ' +str(setting[3])) - # print "text-align is: "+unicode_type(text_align) - # print "\n***\nline is:\n "+unicode_type(match.group(0))+'\n' + # print "text-align is: "+str(text_align) + # print "\n***\nline is:\n "+str(match.group(0))+'\n' if debugabby: - # print "this line is a paragraph = "+unicode_type(is_paragraph)+", previous line was "+unicode_type(self.previous_was_paragraph) + # print "this line is a paragraph = "+str(is_paragraph)+", previous line was "+str(self.previous_was_paragraph) self.log.debug("styles for this line were:", styles) self.log.debug('newline is:') self.log.debug(blockquote_open_loop+blockquote_close_loop+ paragraph_before+'

'+content+'

'+paragraph_after+'\n\n\n\n\n') - # print "is_paragraph is "+unicode_type(is_paragraph)+", previous_was_paragraph is "+unicode_type(self.previous_was_paragraph) + # print "is_paragraph is "+str(is_paragraph)+", previous_was_paragraph is "+str(self.previous_was_paragraph) self.previous_was_paragraph = is_paragraph - # print "previous_was_paragraph is now set to "+unicode_type(self.previous_was_paragraph)+"\n\n\n" + # print "previous_was_paragraph is now set to "+str(self.previous_was_paragraph)+"\n\n\n" return blockquote_open_loop+blockquote_close_loop+paragraph_before+'

'+content+'

'+paragraph_after html = abbyy_line.sub(convert_styles, html) @@ -802,12 +801,12 @@ class HeuristicProcessor(object): # more of the lines break in the same region of the document then unwrapping is required docanalysis = DocAnalysis(format, html) hardbreaks = docanalysis.line_histogram(.50) - self.log.debug("Hard line breaks check returned "+unicode_type(hardbreaks)) + self.log.debug("Hard line breaks check returned "+str(hardbreaks)) # Calculate Length unwrap_factor = getattr(self.extra_opts, 'html_unwrap_factor', 0.4) length = docanalysis.line_length(unwrap_factor) - self.log.debug("Median line length is " + unicode_type(length) + ", calculated with " + format + " format") + self.log.debug("Median line length is " + str(length) + ", calculated with " + format + " format") # ##### Unwrap lines ###### if getattr(self.extra_opts, 'unwrap_lines', False): @@ -829,7 +828,7 @@ class HeuristicProcessor(object): # If still no sections after unwrapping mark split points on lines with no punctuation if self.html_preprocess_sections < self.min_chapters and getattr(self.extra_opts, 'markup_chapter_headings', False): self.log.debug("Looking for more split points based on punctuation," - " currently have " + unicode_type(self.html_preprocess_sections)) + " currently have " + str(self.html_preprocess_sections)) chapdetect3 = re.compile( r'<(?P(p|div)[^>]*)>\s*(?P
(]*>)?\s*(?!([\W]+\s*)+)' r'(<[ibu][^>]*>){0,2}\s*(]*>)?\s*(<[ibu][^>]*>){0,2}\s*(]*>)?\s*' diff --git a/ebook_converter/ebooks/docx/footnotes.py b/ebook_converter/ebooks/docx/footnotes.py index d0af450..fe69211 100644 --- a/ebook_converter/ebooks/docx/footnotes.py +++ b/ebook_converter/ebooks/docx/footnotes.py @@ -1,5 +1,5 @@ from collections import OrderedDict -from ebook_converter.polyglot.builtins import iteritems, unicode_type +from ebook_converter.polyglot.builtins import iteritems __license__ = 'GPL v3' @@ -49,8 +49,8 @@ class Footnotes(object): if note is not None and note.type == 'normal': self.counter += 1 anchor = 'note_%d' % self.counter - self.notes[anchor] = (unicode_type(self.counter), note) - return anchor, unicode_type(self.counter) + self.notes[anchor] = (str(self.counter), note) + return anchor, str(self.counter) return None, None def __iter__(self): diff --git a/ebook_converter/ebooks/docx/numbering.py b/ebook_converter/ebooks/docx/numbering.py index 966f67d..a09c019 100644 --- a/ebook_converter/ebooks/docx/numbering.py +++ b/ebook_converter/ebooks/docx/numbering.py @@ -7,7 +7,7 @@ from lxml.html.builder import OL, UL, SPAN from ebook_converter.ebooks.docx.block_styles import ParagraphStyle from ebook_converter.ebooks.docx.char_styles import RunStyle, inherit from ebook_converter.ebooks.metadata import roman -from ebook_converter.polyglot.builtins import iteritems, unicode_type +from ebook_converter.polyglot.builtins import iteritems __license__ = 'GPL v3' @@ -288,7 +288,7 @@ class Numbering(object): seen_instances.add(num_id) p.tag = 'li' p.set('value', '%s' % counter[ilvl]) - p.set('list-lvl', unicode_type(ilvl)) + p.set('list-lvl', str(ilvl)) p.set('list-id', num_id) if lvl.num_template is not None: val = lvl.format_template(counter, ilvl, lvl.num_template) diff --git a/ebook_converter/ebooks/docx/tables.py b/ebook_converter/ebooks/docx/tables.py index cb778fa..8b0fd81 100644 --- a/ebook_converter/ebooks/docx/tables.py +++ b/ebook_converter/ebooks/docx/tables.py @@ -2,7 +2,7 @@ from lxml.html.builder import TABLE, TR, TD from ebook_converter.ebooks.docx.block_styles import inherit, read_shd as rs, read_border, binary_property, border_props, ParagraphStyle, border_to_css from ebook_converter.ebooks.docx.char_styles import RunStyle -from ebook_converter.polyglot.builtins import iteritems, itervalues, unicode_type +from ebook_converter.polyglot.builtins import iteritems, itervalues __license__ = 'GPL v3' @@ -643,9 +643,9 @@ class Table(object): td = TD() style_map[td] = s = self.style_map[tc] if s.col_span is not inherit: - td.set('colspan', unicode_type(s.col_span)) + td.set('colspan', str(s.col_span)) if s.row_span is not inherit: - td.set('rowspan', unicode_type(s.row_span)) + td.set('rowspan', str(s.row_span)) td.tail = '\n\t\t\t' tr.append(td) for x in self.namespace.XPath('./w:p|./w:tbl')(tc): diff --git a/ebook_converter/ebooks/docx/to_html.py b/ebook_converter/ebooks/docx/to_html.py index 68d28f3..55ad4ff 100644 --- a/ebook_converter/ebooks/docx/to_html.py +++ b/ebook_converter/ebooks/docx/to_html.py @@ -21,7 +21,7 @@ from ebook_converter.ebooks.docx.fields import Fields from ebook_converter.ebooks.docx.settings import Settings from ebook_converter.ebooks.metadata.opf2 import OPFCreator from ebook_converter.utils.localization import canonicalize_lang, lang_as_iso639_1 -from ebook_converter.polyglot.builtins import iteritems, itervalues, getcwd, unicode_type +from ebook_converter.polyglot.builtins import iteritems, itervalues, getcwd __license__ = 'GPL v3' @@ -476,7 +476,7 @@ class Convert(object): current_hyperlink = x elif x.tag.endswith('}instrText') and x.text and x.text.strip().startswith('TOC '): old_anchor = current_anchor - anchor = unicode_type(uuid.uuid4()) + anchor = str(uuid.uuid4()) self.anchor_map[anchor] = current_anchor = generate_anchor('toc', frozenset(itervalues(self.anchor_map))) self.toc_anchor = current_anchor if old_anchor is not None: @@ -493,7 +493,7 @@ class Convert(object): if m is not None: n = min(6, max(1, int(m.group(1)))) dest.tag = 'h%d' % n - dest.set('data-heading-level', unicode_type(n)) + dest.set('data-heading-level', str(n)) if style.bidi is True: dest.set('dir', 'rtl') diff --git a/ebook_converter/ebooks/docx/writer/container.py b/ebook_converter/ebooks/docx/writer/container.py index d4b4b71..64f0c46 100644 --- a/ebook_converter/ebooks/docx/writer/container.py +++ b/ebook_converter/ebooks/docx/writer/container.py @@ -11,7 +11,7 @@ from ebook_converter.ebooks.pdf.render.common import PAPER_SIZES from ebook_converter.utils.date import utcnow from ebook_converter.utils.localization import canonicalize_lang, lang_as_iso639_1 from ebook_converter.utils.zipfile import ZipFile -from ebook_converter.polyglot.builtins import iteritems, unicode_type, native_string_type +from ebook_converter.polyglot.builtins import iteritems, native_string_type __license__ = 'GPL v3' @@ -62,9 +62,9 @@ def create_skeleton(opts, namespaces=None): def margin(which): val = page_margin(opts, which) - return w(which), unicode_type(int(val * 20)) + return w(which), str(int(val * 20)) body.append(E.sectPr( - E.pgSz(**{w('w'):unicode_type(width), w('h'):unicode_type(height)}), + E.pgSz(**{w('w'):str(width), w('h'):str(height)}), E.pgMar(**dict(map(margin, 'left top right bottom'.split()))), E.cols(**{w('space'):'720'}), E.docGrid(**{w('linePitch'):"360"}), diff --git a/ebook_converter/ebooks/docx/writer/from_html.py b/ebook_converter/ebooks/docx/writer/from_html.py index 92254ab..b9f03a7 100644 --- a/ebook_converter/ebooks/docx/writer/from_html.py +++ b/ebook_converter/ebooks/docx/writer/from_html.py @@ -11,7 +11,7 @@ from ebook_converter.ebooks.docx.writer.lists import ListsManager from ebook_converter.ebooks.oeb.stylizer import Stylizer as Sz, Style as St from ebook_converter.ebooks.oeb.base import XPath, barename from ebook_converter.utils.localization import lang_as_iso639_1 -from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes +from ebook_converter.polyglot.builtins import string_or_bytes __license__ = 'GPL v3' @@ -98,7 +98,7 @@ class TextRun(object): for text, preserve_whitespace, bookmark in self.texts: if bookmark is not None: bid = links_manager.bookmark_id - makeelement(r, 'w:bookmarkStart', w_id=unicode_type(bid), w_name=bookmark) + makeelement(r, 'w:bookmarkStart', w_id=str(bid), w_name=bookmark) if text is None: makeelement(r, 'w:br', w_clear=preserve_whitespace) elif hasattr(text, 'xpath'): @@ -109,7 +109,7 @@ class TextRun(object): if preserve_whitespace: t.set('{http://www.w3.org/XML/1998/namespace}space', 'preserve') if bookmark is not None: - makeelement(r, 'w:bookmarkEnd', w_id=unicode_type(bid)) + makeelement(r, 'w:bookmarkEnd', w_id=str(bid)) def __repr__(self): return repr(self.texts) @@ -125,7 +125,7 @@ class TextRun(object): def style_weight(self): ans = 0 for text, preserve_whitespace, bookmark in self.texts: - if isinstance(text, unicode_type): + if isinstance(text, str): ans += len(text) return ans @@ -205,7 +205,7 @@ class Block(object): p = makeelement(body, 'w:p') end_bookmarks = [] for bmark in self.bookmarks: - end_bookmarks.append(unicode_type(self.links_manager.bookmark_id)) + end_bookmarks.append(str(self.links_manager.bookmark_id)) makeelement(p, 'w:bookmarkStart', w_id=end_bookmarks[-1], w_name=bmark) if self.block_lang: rpr = makeelement(p, 'w:rPr') @@ -218,8 +218,8 @@ class Block(object): self.float_spec.serialize(self, ppr) if self.numbering_id is not None: numpr = makeelement(ppr, 'w:numPr') - makeelement(numpr, 'w:ilvl', w_val=unicode_type(self.numbering_id[1])) - makeelement(numpr, 'w:numId', w_val=unicode_type(self.numbering_id[0])) + makeelement(numpr, 'w:ilvl', w_val=str(self.numbering_id[1])) + makeelement(numpr, 'w:numId', w_val=str(self.numbering_id[0])) if self.linked_style is not None: makeelement(ppr, 'w:pStyle', w_val=self.linked_style.id) elif self.style.id: @@ -439,8 +439,8 @@ class Convert(object): if self.add_toc: self.links_manager.process_toc_links(self.oeb) - if self.add_cover and self.oeb.metadata.cover and unicode_type(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids: - cover_id = unicode_type(self.oeb.metadata.cover[0]) + if self.add_cover and self.oeb.metadata.cover and str(self.oeb.metadata.cover[0]) in self.oeb.manifest.ids: + cover_id = str(self.oeb.metadata.cover[0]) item = self.oeb.manifest.ids[cover_id] self.cover_img = self.images_manager.read_image(item.href) diff --git a/ebook_converter/ebooks/docx/writer/images.py b/ebook_converter/ebooks/docx/writer/images.py index 90da7c2..20fa1db 100644 --- a/ebook_converter/ebooks/docx/writer/images.py +++ b/ebook_converter/ebooks/docx/writer/images.py @@ -2,7 +2,7 @@ import os import posixpath from collections import namedtuple from functools import partial -from ebook_converter.polyglot.builtins import iteritems, itervalues, unicode_type +from ebook_converter.polyglot.builtins import iteritems, itervalues from lxml import etree @@ -31,7 +31,7 @@ def get_image_margins(style): ans = {} for edge in 'Left Right Top Bottom'.split(): val = as_num(getattr(style, 'padding' + edge)) + as_num(getattr(style, 'margin' + edge)) - ans['dist' + edge[0]] = unicode_type(pt_to_emu(val)) + ans['dist' + edge[0]] = str(pt_to_emu(val)) return ans @@ -123,7 +123,7 @@ class ImagesManager(object): makeelement(parent, 'wp:simplePos', x='0', y='0') makeelement(makeelement(parent, 'wp:positionH', relativeFrom='margin'), 'wp:align').text = floating makeelement(makeelement(parent, 'wp:positionV', relativeFrom='line'), 'wp:align').text = 'top' - makeelement(parent, 'wp:extent', cx=unicode_type(width), cy=unicode_type(height)) + makeelement(parent, 'wp:extent', cx=str(width), cy=str(height)) if fake_margins: # DOCX does not support setting margins for inline images, so we # fake it by using effect extents to simulate margins @@ -141,7 +141,7 @@ class ImagesManager(object): def create_docx_image_markup(self, parent, name, alt, img_rid, width, height): makeelement, namespaces = self.document_relationships.namespace.makeelement, self.document_relationships.namespace.namespaces - makeelement(parent, 'wp:docPr', id=unicode_type(self.count), name=name, descr=alt) + makeelement(parent, 'wp:docPr', id=str(self.count), name=name, descr=alt) makeelement(makeelement(parent, 'wp:cNvGraphicFramePr'), 'a:graphicFrameLocks', noChangeAspect="1") g = makeelement(parent, 'a:graphic') gd = makeelement(g, 'a:graphicData', uri=namespaces['pic']) @@ -154,7 +154,7 @@ class ImagesManager(object): makeelement(makeelement(bf, 'a:stretch'), 'a:fillRect') spPr = makeelement(pic, 'pic:spPr') xfrm = makeelement(spPr, 'a:xfrm') - makeelement(xfrm, 'a:off', x='0', y='0'), makeelement(xfrm, 'a:ext', cx=unicode_type(width), cy=unicode_type(height)) + makeelement(xfrm, 'a:off', x='0', y='0'), makeelement(xfrm, 'a:ext', cx=str(width), cy=str(height)) makeelement(makeelement(spPr, 'a:prstGeom', prst='rect'), 'a:avLst') def create_filename(self, href, fmt): @@ -165,7 +165,7 @@ class ImagesManager(object): base = fname while fname.lower() in self.seen_filenames: num += 1 - fname = base + unicode_type(num) + fname = base + str(num) self.seen_filenames.add(fname.lower()) fname += os.extsep + fmt.lower() return fname @@ -200,7 +200,7 @@ class ImagesManager(object): makeelement(makeelement(parent, 'wp:positionH', relativeFrom='page'), 'wp:align').text = 'center' makeelement(makeelement(parent, 'wp:positionV', relativeFrom='page'), 'wp:align').text = 'center' width, height = map(pt_to_emu, (width, height)) - makeelement(parent, 'wp:extent', cx=unicode_type(width), cy=unicode_type(height)) + makeelement(parent, 'wp:extent', cx=str(width), cy=str(height)) makeelement(parent, 'wp:effectExtent', l='0', r='0', t='0', b='0') makeelement(parent, 'wp:wrapTopAndBottom') self.create_docx_image_markup(parent, 'cover.jpg', _('Cover'), img.rid, width, height) diff --git a/ebook_converter/ebooks/docx/writer/links.py b/ebook_converter/ebooks/docx/writer/links.py index b69f520..17f7116 100644 --- a/ebook_converter/ebooks/docx/writer/links.py +++ b/ebook_converter/ebooks/docx/writer/links.py @@ -4,7 +4,6 @@ import urllib.parse import uuid from ebook_converter.utils.filenames import ascii_text -from ebook_converter.polyglot.builtins import unicode_type __license__ = 'GPL v3' @@ -34,7 +33,7 @@ class TOCItem(object): p = makeelement(body, 'w:p', append=False) ppr = makeelement(p, 'w:pPr') makeelement(ppr, 'w:pStyle', w_val="Normal") - makeelement(ppr, 'w:ind', w_left='0', w_firstLineChars='0', w_firstLine='0', w_leftChars=unicode_type(200 * self.level)) + makeelement(ppr, 'w:ind', w_left='0', w_firstLineChars='0', w_firstLine='0', w_leftChars=str(200 * self.level)) if self.is_first: makeelement(ppr, 'w:pageBreakBefore', w_val='off') r = makeelement(p, 'w:r') @@ -68,7 +67,7 @@ class LinksManager(object): self.namespace = namespace self.log = log self.document_relationships = document_relationships - self.top_anchor = unicode_type(uuid.uuid4().hex) + self.top_anchor = str(uuid.uuid4().hex) self.anchor_map = {} self.used_bookmark_names = set() self.bmark_id = 0 diff --git a/ebook_converter/ebooks/docx/writer/lists.py b/ebook_converter/ebooks/docx/writer/lists.py index 438ff35..fa537bd 100644 --- a/ebook_converter/ebooks/docx/writer/lists.py +++ b/ebook_converter/ebooks/docx/writer/lists.py @@ -1,7 +1,7 @@ from collections import defaultdict from operator import attrgetter -from ebook_converter.polyglot.builtins import iteritems, itervalues, unicode_type +from ebook_converter.polyglot.builtins import iteritems, itervalues __license__ = 'GPL v3' @@ -80,7 +80,7 @@ class NumberingDefinition(object): def serialize(self, parent): makeelement = self.namespace.makeelement - an = makeelement(parent, 'w:abstractNum', w_abstractNumId=unicode_type(self.num_id)) + an = makeelement(parent, 'w:abstractNum', w_abstractNumId=str(self.num_id)) makeelement(an, 'w:multiLevelType', w_val='hybridMultilevel') makeelement(an, 'w:name', w_val='List %d' % (self.num_id + 1)) for level in self.levels: @@ -111,12 +111,12 @@ class Level(object): return hash((self.start, self.num_fmt, self.lvl_text)) def serialize(self, parent, makeelement): - lvl = makeelement(parent, 'w:lvl', w_ilvl=unicode_type(self.ilvl)) - makeelement(lvl, 'w:start', w_val=unicode_type(self.start)) + lvl = makeelement(parent, 'w:lvl', w_ilvl=str(self.ilvl)) + makeelement(lvl, 'w:start', w_val=str(self.start)) makeelement(lvl, 'w:numFmt', w_val=self.num_fmt) makeelement(lvl, 'w:lvlText', w_val=self.lvl_text) makeelement(lvl, 'w:lvlJc', w_val='left') - makeelement(makeelement(lvl, 'w:pPr'), 'w:ind', w_hanging='360', w_left=unicode_type(1152 + self.ilvl * 360)) + makeelement(makeelement(lvl, 'w:pPr'), 'w:ind', w_hanging='360', w_left=str(1152 + self.ilvl * 360)) if self.num_fmt == 'bullet': ff = {'\uf0b7':'Symbol', '\uf0a7':'Wingdings'}.get(self.lvl_text, 'Courier New') makeelement(makeelement(lvl, 'w:rPr'), 'w:rFonts', w_ascii=ff, w_hAnsi=ff, w_hint="default") @@ -162,5 +162,5 @@ class ListsManager(object): defn.serialize(parent) makeelement = self.namespace.makeelement for defn in self.definitions: - n = makeelement(parent, 'w:num', w_numId=unicode_type(defn.num_id + 1)) - makeelement(n, 'w:abstractNumId', w_val=unicode_type(defn.num_id)) + n = makeelement(parent, 'w:num', w_numId=str(defn.num_id + 1)) + makeelement(n, 'w:abstractNumId', w_val=str(defn.num_id)) diff --git a/ebook_converter/ebooks/docx/writer/styles.py b/ebook_converter/ebooks/docx/writer/styles.py index 8b414ee..a814618 100644 --- a/ebook_converter/ebooks/docx/writer/styles.py +++ b/ebook_converter/ebooks/docx/writer/styles.py @@ -7,7 +7,7 @@ from lxml import etree from ebook_converter.ebooks import parse_css_length from ebook_converter.ebooks.docx.writer.utils import convert_color, int_or_zero from ebook_converter.utils.localization import lang_as_iso639_1 -from ebook_converter.polyglot.builtins import iteritems, unicode_type +from ebook_converter.polyglot.builtins import iteritems from ebook_converter.tinycss.css21 import CSS21Parser @@ -73,7 +73,7 @@ class CombinedStyle(object): pPr = makeelement(block, 'w:pPr') self.bs.serialize_properties(pPr, normal_style.bs) if self.outline_level is not None: - makeelement(pPr, 'w:outlineLvl', w_val=unicode_type(self.outline_level + 1)) + makeelement(pPr, 'w:outlineLvl', w_val=str(self.outline_level + 1)) rPr = makeelement(block, 'w:rPr') self.rs.serialize_properties(rPr, normal_style.rs) @@ -106,16 +106,16 @@ class FloatSpec(object): def serialize(self, block, parent): if self.is_dropcaps: - attrs = dict(w_dropCap='drop', w_lines=unicode_type(self.dropcaps_lines), w_wrap='around', w_vAnchor='text', w_hAnchor='text') + attrs = dict(w_dropCap='drop', w_lines=str(self.dropcaps_lines), w_wrap='around', w_vAnchor='text', w_hAnchor='text') else: attrs = dict( w_wrap='around', w_vAnchor='text', w_hAnchor='text', w_xAlign=self.x_align, w_y='1', - w_hSpace=unicode_type(self.h_space), w_vSpace=unicode_type(self.v_space), w_hRule=self.h_rule + w_hSpace=str(self.h_space), w_vSpace=str(self.v_space), w_hRule=self.h_rule ) if self.w is not None: - attrs['w_w'] = unicode_type(self.w) + attrs['w_w'] = str(self.w) if self.h is not None: - attrs['w_h'] = unicode_type(self.h) + attrs['w_h'] = str(self.h) self.makeelement(parent, 'w:framePr', **attrs) # Margins are already applied by the frame style, so override them to # be zero on individual blocks @@ -135,7 +135,7 @@ class FloatSpec(object): width = getattr(self, 'border_%s_width' % edge) bstyle = getattr(self, 'border_%s_style' % edge) self.makeelement( - bdr, 'w:'+edge, w_space=unicode_type(padding), w_val=bstyle, w_sz=unicode_type(width), w_color=getattr(self, 'border_%s_color' % edge)) + bdr, 'w:'+edge, w_space=str(padding), w_val=bstyle, w_sz=str(width), w_color=getattr(self, 'border_%s_color' % edge)) class DOCXStyle(object): @@ -231,7 +231,7 @@ class TextStyle(DOCXStyle): self.spacing = None va = css.first_vertical_align if isinstance(va, numbers.Number): - self.vertical_align = unicode_type(int(va * 2)) + self.vertical_align = str(int(va * 2)) else: val = { 'top':'superscript', 'text-top':'superscript', 'sup':'superscript', 'super':'superscript', @@ -287,9 +287,9 @@ class TextStyle(DOCXStyle): w = self.w is_normal_style = self is normal_style if is_normal_style or self.padding != normal_style.padding: - bdr.set(w('space'), unicode_type(self.padding)) + bdr.set(w('space'), str(self.padding)) if is_normal_style or self.border_width != normal_style.border_width: - bdr.set(w('sz'), unicode_type(self.border_width)) + bdr.set(w('sz'), str(self.border_width)) if is_normal_style or self.border_style != normal_style.border_style: bdr.set(w('val'), self.border_style) if is_normal_style or self.border_color != normal_style.border_color: @@ -339,7 +339,7 @@ class TextStyle(DOCXStyle): if check_attr('shadow'): rPr.append(makeelement(rPr, 'shadow', val=bmap(self.shadow))) if check_attr('spacing'): - rPr.append(makeelement(rPr, 'spacing', val=unicode_type(self.spacing or 0))) + rPr.append(makeelement(rPr, 'spacing', val=str(self.spacing or 0))) if is_normal_style: rPr.append(makeelement(rPr, 'vertAlign', val=self.vertical_align if self.vertical_align in {'superscript', 'subscript'} else 'baseline')) elif self.vertical_align != normal_style.vertical_align: @@ -377,7 +377,7 @@ class DescendantTextStyle(object): for name, attr in (('sz', 'font_size'), ('b', 'bold'), ('i', 'italic')): pval, cval = vals(attr) if pval != cval: - val = 'on' if attr in {'bold', 'italic'} else unicode_type(cval) # bold, italic are toggle properties + val = 'on' if attr in {'bold', 'italic'} else str(cval) # bold, italic are toggle properties for suffix in ('', 'Cs'): add(name + suffix, val=val) @@ -398,7 +398,7 @@ class DescendantTextStyle(object): if check('shadow'): add('shadow', val='on') # toggle property if check('spacing'): - add('spacing', val=unicode_type(child_style.spacing or 0)) + add('spacing', val=str(child_style.spacing or 0)) if check('vertical_align'): val = child_style.vertical_align if val in {'superscript', 'subscript', 'baseline'}: @@ -408,9 +408,9 @@ class DescendantTextStyle(object): bdr = {} if check('padding'): - bdr['space'] = unicode_type(child_style.padding) + bdr['space'] = str(child_style.padding) if check('border_width'): - bdr['sz'] = unicode_type(child_style.border_width) + bdr['sz'] = str(child_style.border_width) if check('border_style'): bdr['val'] = child_style.border_style if check('border_color'): @@ -534,14 +534,14 @@ class BlockStyle(DOCXStyle): e = bdr.makeelement(w(edge)) padding = getattr(self, 'padding_' + edge) if (self is normal_style and padding > 0) or (padding != getattr(normal_style, 'padding_' + edge)): - e.set(w('space'), unicode_type(padding)) + e.set(w('space'), str(padding)) width = getattr(self, 'border_%s_width' % edge) bstyle = getattr(self, 'border_%s_style' % edge) if (self is normal_style and width > 0 and bstyle != 'none' ) or width != getattr(normal_style, 'border_%s_width' % edge ) or bstyle != getattr(normal_style, 'border_%s_style' % edge): e.set(w('val'), bstyle) - e.set(w('sz'), unicode_type(width)) + e.set(w('sz'), str(width)) e.set(w('color'), getattr(self, 'border_%s_color' % edge)) if e.attrib: bdr.append(e) @@ -565,15 +565,15 @@ class BlockStyle(DOCXStyle): if css_unit in ('em', 'ex'): lines = max(0, int(css_val * (50 if css_unit == 'ex' else 100))) if (self is normal_style and lines > 0) or getter(self) != getter(normal_style): - spacing.set(w(attr + 'Lines'), unicode_type(lines)) + spacing.set(w(attr + 'Lines'), str(lines)) else: getter = attrgetter('margin_' + edge) val = getter(self) if (self is normal_style and val > 0) or val != getter(normal_style): - spacing.set(w(attr), unicode_type(val)) + spacing.set(w(attr), str(val)) if self is normal_style or self.line_height != normal_style.line_height: - spacing.set(w('line'), unicode_type(self.line_height)) + spacing.set(w('line'), str(self.line_height)) spacing.set(w('lineRule'), 'atLeast') if spacing.attrib: @@ -586,31 +586,31 @@ class BlockStyle(DOCXStyle): if css_unit in ('em', 'ex'): chars = max(0, int(css_val * (50 if css_unit == 'ex' else 100))) if (self is normal_style and chars > 0) or getter(self) != getter(normal_style): - ind.set(w(edge + 'Chars'), unicode_type(chars)) + ind.set(w(edge + 'Chars'), str(chars)) else: getter = attrgetter('margin_' + edge) val = getter(self) if (self is normal_style and val > 0) or val != getter(normal_style): - ind.set(w(edge), unicode_type(val)) + ind.set(w(edge), str(val)) ind.set(w(edge + 'Chars'), '0') # This is needed to override any declaration in the parent style css_val, css_unit = parse_css_length(self.css_text_indent) if css_unit in ('em', 'ex'): chars = int(css_val * (50 if css_unit == 'ex' else 100)) if css_val >= 0: if (self is normal_style and chars > 0) or self.css_text_indent != normal_style.css_text_indent: - ind.set(w('firstLineChars'), unicode_type(chars)) + ind.set(w('firstLineChars'), str(chars)) else: if (self is normal_style and chars < 0) or self.css_text_indent != normal_style.css_text_indent: - ind.set(w('hangingChars'), unicode_type(abs(chars))) + ind.set(w('hangingChars'), str(abs(chars))) else: val = self.text_indent if val >= 0: if (self is normal_style and val > 0) or self.text_indent != normal_style.text_indent: - ind.set(w('firstLine'), unicode_type(val)) + ind.set(w('firstLine'), str(val)) ind.set(w('firstLineChars'), '0') # This is needed to override any declaration in the parent style else: if (self is normal_style and val < 0) or self.text_indent != normal_style.text_indent: - ind.set(w('hanging'), unicode_type(abs(val))) + ind.set(w('hanging'), str(abs(val))) ind.set(w('hangingChars'), '0') if ind.attrib: pPr.append(ind) @@ -684,7 +684,7 @@ class StylesManager(object): pure_block_styles.add(bs) self.pure_block_styles = sorted(pure_block_styles, key=block_counts.__getitem__) - bnum = len(unicode_type(max(1, len(pure_block_styles) - 1))) + bnum = len(str(max(1, len(pure_block_styles) - 1))) for i, bs in enumerate(self.pure_block_styles): bs.id = bs.name = '%0{}d Block'.format(bnum) % i bs.seq = i @@ -704,7 +704,7 @@ class StylesManager(object): heading_style = styles[-1] heading_style.outline_level = i - snum = len(unicode_type(max(1, len(counts) - 1))) + snum = len(str(max(1, len(counts) - 1))) heading_styles = [] for i, (style, count) in enumerate(counts.most_common()): if i == 0: @@ -732,7 +732,7 @@ class StylesManager(object): if run.descendant_style is None: run.descendant_style = descendant_style_map[ds] = ds ds_counts[run.descendant_style] += run.style_weight - rnum = len(unicode_type(max(1, len(ds_counts) - 1))) + rnum = len(str(max(1, len(ds_counts) - 1))) for i, (text_style, count) in enumerate(ds_counts.most_common()): text_style.id = 'Text%d' % i text_style.name = '%0{}d Text'.format(rnum) % i diff --git a/ebook_converter/ebooks/docx/writer/tables.py b/ebook_converter/ebooks/docx/writer/tables.py index f38a2c1..03d4ea8 100644 --- a/ebook_converter/ebooks/docx/writer/tables.py +++ b/ebook_converter/ebooks/docx/writer/tables.py @@ -2,7 +2,7 @@ from collections import namedtuple from ebook_converter.ebooks.docx.writer.utils import convert_color from ebook_converter.ebooks.docx.writer.styles import read_css_block_borders as rcbb, border_edges -from ebook_converter.polyglot.builtins import iteritems, unicode_type +from ebook_converter.polyglot.builtins import iteritems __license__ = 'GPL v3' @@ -112,7 +112,7 @@ class Cell(object): def serialize(self, parent, makeelement): tc = makeelement(parent, 'w:tc') tcPr = makeelement(tc, 'w:tcPr') - makeelement(tcPr, 'w:tcW', w_type=self.width[0], w_w=unicode_type(self.width[1])) + makeelement(tcPr, 'w:tcW', w_type=self.width[0], w_w=str(self.width[1])) # For some reason, Word 2007 refuses to honor at the table or row # level, despite what the specs say, so we inherit and apply at the # cell level @@ -123,7 +123,7 @@ class Cell(object): b = makeelement(tcPr, 'w:tcBorders', append=False) for edge, border in iteritems(self.borders): if border is not None and border.width > 0 and border.style != 'none': - makeelement(b, 'w:' + edge, w_val=border.style, w_sz=unicode_type(border.width), w_color=border.color) + makeelement(b, 'w:' + edge, w_val=border.style, w_sz=str(border.width), w_color=border.color) if len(b) > 0: tcPr.append(b) @@ -133,7 +133,7 @@ class Cell(object): if edge in {'top', 'bottom'} or (edge == 'left' and self is self.row.first_cell) or (edge == 'right' and self is self.row.last_cell): padding += getattr(self.row, 'padding_' + edge) if padding > 0: - makeelement(m, 'w:' + edge, w_type='dxa', w_w=unicode_type(int(padding * 20))) + makeelement(m, 'w:' + edge, w_type='dxa', w_w=str(int(padding * 20))) if len(m) > 0: tcPr.append(m) @@ -353,14 +353,14 @@ class Table(object): return tbl = makeelement(parent, 'w:tbl') tblPr = makeelement(tbl, 'w:tblPr') - makeelement(tblPr, 'w:tblW', w_type=self.width[0], w_w=unicode_type(self.width[1])) + makeelement(tblPr, 'w:tblW', w_type=self.width[0], w_w=str(self.width[1])) if self.float in {'left', 'right'}: kw = {'w_vertAnchor':'text', 'w_horzAnchor':'text', 'w_tblpXSpec':self.float} for edge in border_edges: val = getattr(self, 'margin_' + edge) or 0 if {self.float, edge} == {'left', 'right'}: val = max(val, 2) - kw['w_' + edge + 'FromText'] = unicode_type(max(0, int(val *20))) + kw['w_' + edge + 'FromText'] = str(max(0, int(val *20))) makeelement(tblPr, 'w:tblpPr', **kw) if self.jc is not None: makeelement(tblPr, 'w:jc', w_val=self.jc) diff --git a/ebook_converter/ebooks/fb2/fb2ml.py b/ebook_converter/ebooks/fb2/fb2ml.py index 8977eaa..540c603 100644 --- a/ebook_converter/ebooks/fb2/fb2ml.py +++ b/ebook_converter/ebooks/fb2/fb2ml.py @@ -15,7 +15,7 @@ from ebook_converter.utils.localization import lang_as_iso639_1 from ebook_converter.utils.xml_parse import safe_xml_fromstring from ebook_converter.utils.img import save_cover_data_to from ebook_converter.ebooks.oeb.base import urlnormalize -from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes +from ebook_converter.polyglot.builtins import string_or_bytes from ebook_converter.polyglot.binary import as_base64_unicode @@ -153,7 +153,7 @@ class FB2MLizer(object): metadata['author'] = '' metadata['keywords'] = '' - tags = list(map(unicode_type, self.oeb_book.metadata.subject)) + tags = list(map(str, self.oeb_book.metadata.subject)) if tags: tags = ', '.join(prepare_string_for_xml(x) for x in tags) metadata['keywords'] = '%s'%tags @@ -168,12 +168,12 @@ class FB2MLizer(object): year = publisher = isbn = '' identifiers = self.oeb_book.metadata['identifier'] for x in identifiers: - if x.get(OPF('scheme'), None).lower() == 'uuid' or unicode_type(x).startswith('urn:uuid:'): - metadata['id'] = unicode_type(x).split(':')[-1] + if x.get(OPF('scheme'), None).lower() == 'uuid' or str(x).startswith('urn:uuid:'): + metadata['id'] = str(x).split(':')[-1] break if metadata['id'] is None: self.log.warn('No UUID identifier found') - metadata['id'] = unicode_type(uuid.uuid4()) + metadata['id'] = str(uuid.uuid4()) try: date = self.oeb_book.metadata['date'][0] @@ -235,7 +235,7 @@ class FB2MLizer(object): ''') % metadata # Remove empty lines. - return '\n'.join(filter(unicode_type.strip, header.splitlines())) + return '\n'.join(filter(str.strip, header.splitlines())) def fb2_footer(self): return '' @@ -246,8 +246,8 @@ class FB2MLizer(object): cover_href = None # Get the raster cover if it's available. - if self.oeb_book.metadata.cover and unicode_type(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids: - id = unicode_type(self.oeb_book.metadata.cover[0]) + if self.oeb_book.metadata.cover and str(self.oeb_book.metadata.cover[0]) in self.oeb_book.manifest.ids: + id = str(self.oeb_book.metadata.cover[0]) cover_item = self.oeb_book.manifest.ids[id] if cover_item.media_type in OEB_RASTER_IMAGES: cover_href = cover_item.href diff --git a/ebook_converter/ebooks/html/input.py b/ebook_converter/ebooks/html/input.py index 0ef1445..6c35af0 100644 --- a/ebook_converter/ebooks/html/input.py +++ b/ebook_converter/ebooks/html/input.py @@ -11,7 +11,6 @@ from ebook_converter.ebooks.oeb.base import urlunquote from ebook_converter.ebooks.chardet import detect_xml_encoding from ebook_converter.constants import iswindows from ebook_converter import unicode_path, as_unicode, replace_entities -from ebook_converter.polyglot.builtins import unicode_type __license__ = 'GPL v3' @@ -44,7 +43,7 @@ class Link(object): :param base: The base directory that relative URLs are with respect to. Must be a unicode string. ''' - assert isinstance(url, unicode_type) and isinstance(base, unicode_type) + assert isinstance(url, str) and isinstance(base, str) self.url = url self.parsed_url = urllib.parse.urlparse(self.url) self.is_local = self.parsed_url.scheme in ('', 'file') @@ -149,7 +148,7 @@ class HTMLFile(object): return 'HTMLFile:%d:%s:%s'%(self.level, 'b' if self.is_binary else 'a', self.path) def __repr__(self): - return unicode_type(self) + return str(self) def find_links(self, src): for match in self.LINK_PAT.finditer(src): diff --git a/ebook_converter/ebooks/html/to_zip.py b/ebook_converter/ebooks/html/to_zip.py index 65c557a..18c945d 100644 --- a/ebook_converter/ebooks/html/to_zip.py +++ b/ebook_converter/ebooks/html/to_zip.py @@ -2,7 +2,6 @@ import textwrap, os, glob from ebook_converter.customize import FileTypePlugin from ebook_converter.constants import numeric_version -from ebook_converter.polyglot.builtins import unicode_type __license__ = 'GPL v3' @@ -111,7 +110,7 @@ every time you add an HTML file to the library.\ config_dialog.exec_() if config_dialog.result() == QDialog.Accepted: - sc = unicode_type(sc.text()).strip() + sc = str(sc.text()).strip() if bf.isChecked(): sc += '|bf' customize_plugin(self, sc) diff --git a/ebook_converter/ebooks/htmlz/oeb2html.py b/ebook_converter/ebooks/htmlz/oeb2html.py index 5256eeb..dacc03f 100644 --- a/ebook_converter/ebooks/htmlz/oeb2html.py +++ b/ebook_converter/ebooks/htmlz/oeb2html.py @@ -13,7 +13,7 @@ from ebook_converter.ebooks.oeb.base import ( XHTML, XHTML_NS, SVG_NS, barename, namespace, OEB_IMAGES, XLINK, rewrite_links, urlnormalize) from ebook_converter.ebooks.oeb.stylizer import Stylizer from ebook_converter.utils.logging import default_log -from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes, as_bytes +from ebook_converter.polyglot.builtins import string_or_bytes, as_bytes __license__ = 'GPL 3' @@ -43,7 +43,7 @@ class OEB2HTML(object): self.log.info('Converting OEB book to HTML...') self.opts = opts try: - self.book_title = unicode_type(oeb_book.metadata.title[0]) + self.book_title = str(oeb_book.metadata.title[0]) except Exception: self.book_title = _('Unknown') self.links = {} diff --git a/ebook_converter/ebooks/lrf/html/convert_from.py b/ebook_converter/ebooks/lrf/html/convert_from.py index 85c73b6..5285c75 100644 --- a/ebook_converter/ebooks/lrf/html/convert_from.py +++ b/ebook_converter/ebooks/lrf/html/convert_from.py @@ -37,7 +37,7 @@ from ebook_converter.ebooks.lrf.pylrs.pylrs import ( RuledLine, Span, Sub, Sup, TextBlock ) from ebook_converter.ptempfile import PersistentTemporaryFile -from ebook_converter.polyglot.builtins import getcwd, itervalues, string_or_bytes, unicode_type +from ebook_converter.polyglot.builtins import getcwd, itervalues, string_or_bytes from ebook_converter.polyglot.urllib import unquote from PIL import Image as PILImage @@ -276,7 +276,7 @@ class HTMLConverter(object): update_css(npcss, self.override_pcss) paths = [os.path.abspath(path) for path in paths] - paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, unicode_type) else path for path in paths] + paths = [path.decode(sys.getfilesystemencoding()) if not isinstance(path, str) else path for path in paths] while len(paths) > 0 and self.link_level <= self.link_levels: for path in paths: @@ -356,7 +356,7 @@ class HTMLConverter(object): os.makedirs(tdir) try: with open(os.path.join(tdir, 'html2lrf-verbose.html'), 'wb') as f: - f.write(unicode_type(soup).encode('utf-8')) + f.write(str(soup).encode('utf-8')) self.log.info(_('Written preprocessed HTML to ')+f.name) except: pass @@ -389,7 +389,7 @@ class HTMLConverter(object): self.log.info(_('\tConverting to BBeB...')) self.current_style = {} self.page_break_found = False - if not isinstance(path, unicode_type): + if not isinstance(path, str): path = path.decode(sys.getfilesystemencoding()) self.target_prefix = path self.previous_text = '\n' @@ -399,7 +399,7 @@ class HTMLConverter(object): def parse_css(self, style): """ Parse the contents of a