', re.IGNORECASE), ''),
+ # Remove gray background
+ (re.compile(r']+>'), ''),
- # Remove gray background
- (re.compile(r']+>'), ''),
+ # Convert line breaks to paragraphs
+ (re.compile(r'
]*>\s*'), '\n'),
+ (re.compile(r'
]*>\s*'), '\n'),
+ (re.compile(r'\s*'), '
\n'),
- # Convert line breaks to paragraphs
- (re.compile(r'
]*>\s*'), '\n'),
- (re.compile(r'
]*>\s*'), '\n'),
- (re.compile(r'\s*'), '
\n'),
-
- # Clean up spaces
- (re.compile(r'(?<=[\.,;\?!”"\'])[\s^ ]*(?=<)'), ' '),
- # Add space before and after italics
- (re.compile(r'(?'), ' '),
- (re.compile(r'(?=\w)'), ' '),
- ]
+ # Clean up spaces
+ (re.compile(r'(?<=[\.,;\?!”"\'])[\s^ ]*(?=<)'), ' '),
+ # Add space before and after italics
+ (re.compile(r'(?'), ' '),
+ (re.compile(r'(?=\w)'), ' ')]
+ pdftohtml_rules.ans = ans
return ans
def book_designer_rules():
ans = getattr(book_designer_rules, 'ans', None)
if ans is None:
- ans = book_designer_rules.ans = [
- # HR
- (re.compile('
', re.IGNORECASE),
- lambda match : ' '),
- # Create header tags
- (re.compile(r'<]*?id=BookTitle[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?
', re.IGNORECASE),
- lambda match : '%s
'%(match.group(2) if match.group(2) else 'center', match.group(3))),
- (re.compile(r'<]*?id=BookAuthor[^><]*?(align=)*(?(1)(\w+))*[^><]*?>[^><]*?
', re.IGNORECASE),
- lambda match : '%s
'%(match.group(2) if match.group(2) else 'center', match.group(3))),
- (re.compile('<]*?id=title[^><]*?>(.*?)', re.IGNORECASE|re.DOTALL),
- lambda match : '%s
'%(match.group(1),)),
- (re.compile('<]*?id=subtitle[^><]*?>(.*?)', re.IGNORECASE|re.DOTALL),
- lambda match : '%s
'%(match.group(1),)),
- ]
+ ans = [(re.compile('
', re.IGNORECASE),
+ lambda match: ' '
+ ''),
+ # Create header tags
+ (re.compile(r'<]*?id=BookTitle[^><]*?(align=)*(?(1)'
+ r'(\w+))*[^><]*?>[^><]*?
', re.IGNORECASE),
+ lambda match: '%s
' %
+ (match.group(2) if match.group(2) else 'center',
+ match.group(3))),
+ (re.compile(r'<]*?id=BookAuthor[^><]*?(align=)*(?(1)'
+ r'(\w+))*[^><]*?>[^><]*?
', re.IGNORECASE),
+ lambda match: '%s
' %
+ (match.group(2) if match.group(2) else 'center',
+ match.group(3))),
+ (re.compile('<]*?id=title[^><]*?>(.*?)',
+ re.IGNORECASE | re.DOTALL),
+ lambda match: '%s
' % (match.group(1),)),
+ (re.compile('<]*?id=subtitle[^><]*?>(.*?)',
+ re.IGNORECASE | re.DOTALL),
+ lambda match: '%s
' %
+ (match.group(1),))]
+ book_designer_rules.ans = ans
return None
@@ -470,7 +505,7 @@ class HTMLPreProcessor(object):
return '' in src[:1000]
def __call__(self, html, remove_special_chars=None,
- get_preprocess_html=False):
+ get_preprocess_html=False):
if remove_special_chars is not None:
html = remove_special_chars.sub('', html)
html = html.replace('\0', '')
@@ -487,13 +522,14 @@ class HTMLPreProcessor(object):
start_rules = []
if not getattr(self.extra_opts, 'keep_ligatures', False):
- html = _ligpat.sub(lambda m:LIGATURES[m.group()], html)
+ html = _ligpat.sub(lambda m: LIGATURES[m.group()], html)
user_sr_rules = {}
# Function for processing search and replace
def do_search_replace(search_pattern, replace_txt):
- from ebook_converter.ebooks.conversion.search_replace import compile_regular_expression
+ from ebook_converter.ebooks.conversion.search_replace import \
+ compile_regular_expression
try:
search_re = compile_regular_expression(search_pattern)
if not replace_txt:
@@ -502,11 +538,11 @@ class HTMLPreProcessor(object):
user_sr_rules[(search_re, replace_txt)] = search_pattern
except Exception as e:
self.log.error('Failed to parse %r regexp because %s' %
- (search, as_unicode(e)))
+ (search, e))
# search / replace using the sr?_search / sr?_replace options
for i in range(1, 4):
- search, replace = 'sr%d_search'%i, 'sr%d_replace'%i
+ search, replace = 'sr%d_search' % i, 'sr%d_replace' % i
search_pattern = getattr(self.extra_opts, search, '')
replace_txt = getattr(self.extra_opts, replace, '')
if search_pattern:
@@ -520,31 +556,35 @@ class HTMLPreProcessor(object):
do_search_replace(search_pattern, replace_txt)
end_rules = []
- # delete soft hyphens - moved here so it's executed after header/footer removal
+ # delete soft hyphens - moved here so it's executed after
+ # header/footer removal
if is_pdftohtml:
# unwrap/delete soft hyphens
- end_rules.append((re.compile(
- r'[](\s*\s*)+\s*(?=[\[a-z\d])'), lambda match: ''))
+ end_rules.append((re.compile(r'[](
\s*\s*)+\s*'
+ r'(?=[\[a-z\d])'), lambda match: ''))
# unwrap/delete soft hyphens with formatting
- end_rules.append((re.compile(
- r'[]\s*((i|u|b)>)+(
\s*\s*)+\s*(<(i|u|b)>)+\s*(?=[\[a-z\d])'), lambda match: ''))
+ end_rules.append((re.compile(r'[]\s*((i|u|b)>)+(
\s*\s*)+'
+ r'\s*(<(i|u|b)>)+\s*(?=[\[a-z\d])'),
+ lambda match: ''))
length = -1
if getattr(self.extra_opts, 'unwrap_factor', 0.0) > 0.01:
docanalysis = DocAnalysis('pdf', html)
- length = docanalysis.line_length(getattr(self.extra_opts, 'unwrap_factor'))
+ length = docanalysis.line_length(getattr(self.extra_opts,
+ 'unwrap_factor'))
if length:
# print("The pdf line length returned is " + str(length))
# unwrap em/en dashes
- end_rules.append((re.compile(
- r'(?<=.{%i}[–—])\s*
\s*(?=[\[a-z\d])' % length), lambda match: ''))
+ end_rules.append((re.compile(r'(?<=.{%i}[–—])\s*
\s*'
+ r'(?=[\[a-z\d])' % length),
+ lambda match: ''))
end_rules.append(
# Un wrap using punctuation
- (re.compile((
- r'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçąężıãõñæøþðßěľščťžňďřů,:)\\IAß]'
- r'|(?(i|b|u)>)?\s*(
\s*\s*)+\s*(?=(<(i|b|u)>)?'
- r'\s*[\w\d$(])') % length, re.UNICODE), wrap_lines),
- )
+ (re.compile((r'(?<=.{%i}([a-zäëïöüàèìòùáćéíĺóŕńśúýâêîôûçą'
+ r'ężıãõñæøþðßěľščťžňďřů,:)\\IAß]|(?(i|b|u)>)?\s*(
\s*'
+ r'\s*)+\s*(?=(<(i|b|u)>)?\s*[\w\d$(])') %
+ length, re.UNICODE), wrap_lines))
for rule in html_preprocess_rules() + start_rules:
html = rule[0].sub(rule[1], html)
@@ -567,7 +607,7 @@ class HTMLPreProcessor(object):
name, i = None, 0
while not name or os.path.exists(os.path.join(odir, name)):
i += 1
- name = '%04d.html'%i
+ name = '%04d.html' % i
with open(os.path.join(odir, name), 'wb') as f:
f.write(raw.encode('utf-8'))
@@ -578,20 +618,20 @@ class HTMLPreProcessor(object):
html = rule[0].sub(rule[1], html)
except Exception as e:
if rule in user_sr_rules:
- self.log.error(
- 'User supplied search & replace rule: %s -> %s '
- 'failed with error: %s, ignoring.'%(
- user_sr_rules[rule], rule[1], e))
+ self.log.error('User supplied search & replace rule: %s '
+ '-> %s failed with error: %s, ignoring.' %
+ (user_sr_rules[rule], rule[1], e))
else:
raise
if is_pdftohtml and length > -1:
# Dehyphenate
dehyphenator = Dehyphenator(self.extra_opts.verbose, self.log)
- html = dehyphenator(html,'html', length)
+ html = dehyphenator(html, 'html', length)
if is_pdftohtml:
- from ebook_converter.ebooks.conversion.utils import HeuristicProcessor
+ from ebook_converter.ebooks.conversion.utils import \
+ HeuristicProcessor
pdf_markup = HeuristicProcessor(self.extra_opts, None)
totalwords = 0
if pdf_markup.get_word_count(html) > 7000:
@@ -613,23 +653,26 @@ class HTMLPreProcessor(object):
from ebook_converter.utils.localization import get_udc
from ebook_converter.utils.mreplace import MReplace
unihandecoder = get_udc()
- mr = MReplace(data={'«':'<'*3, '»':'>'*3})
+ mr = MReplace(data={'«': '<' * 3, '»': '>' * 3})
html = mr.mreplace(html)
html = unihandecoder.decode(html)
if getattr(self.extra_opts, 'enable_heuristics', False):
- from ebook_converter.ebooks.conversion.utils import HeuristicProcessor
+ from ebook_converter.ebooks.conversion.utils import \
+ HeuristicProcessor
preprocessor = HeuristicProcessor(self.extra_opts, self.log)
html = preprocessor(html)
if is_pdftohtml:
- html = html.replace('', '')
+ html = html.replace('', '')
if getattr(self.extra_opts, 'smarten_punctuation', False):
html = smarten_punctuation(html, self.log)
try:
- unsupported_unicode_chars = self.extra_opts.output_profile.unsupported_unicode_chars
+ unsupported_unicode_chars = (self.extra_opts.output_profile
+ .unsupported_unicode_chars)
except AttributeError:
unsupported_unicode_chars = ''
if unsupported_unicode_chars:
diff --git a/ebook_converter/ebooks/html/input.py b/ebook_converter/ebooks/html/input.py
index 3a61e1d..58b69bc 100644
--- a/ebook_converter/ebooks/html/input.py
+++ b/ebook_converter/ebooks/html/input.py
@@ -10,19 +10,13 @@ import urllib.parse
from ebook_converter.ebooks.oeb.base import urlunquote
from ebook_converter.ebooks.chardet import detect_xml_encoding
from ebook_converter.constants_old import iswindows
-from ebook_converter import unicode_path, as_unicode, replace_entities
-
-
-__license__ = 'GPL v3'
-__copyright__ = '2009, Kovid Goyal '
-__docformat__ = 'restructuredtext en'
+from ebook_converter import unicode_path, replace_entities
class Link(object):
-
- '''
+ """
Represents a link in a HTML file.
- '''
+ """
@classmethod
def url_to_local_path(cls, url, base):
@@ -31,7 +25,8 @@ class Link(object):
if iswindows and path.startswith('/'):
path = path[1:]
isabs = True
- path = urllib.parse.urlunparse(('', '', path, url.params, url.query, ''))
+ path = urllib.parse.urlunparse(('', '', path, url.params, url.query,
+ ''))
path = urlunquote(path)
if isabs or os.path.isabs(path):
return path
@@ -39,17 +34,18 @@ class Link(object):
def __init__(self, url, base):
'''
- :param url: The url this link points to. Must be an unquoted unicode string.
- :param base: The base directory that relative URLs are with respect to.
- Must be a unicode string.
+ :param url: The url this link points to. Must be an unquoted unicode
+ string.
+ :param base: The base directory that relative URLs are with respect
+ to. Must be a unicode string.
'''
assert isinstance(url, str) and isinstance(base, str)
- self.url = url
- self.parsed_url = urllib.parse.urlparse(self.url)
- self.is_local = self.parsed_url.scheme in ('', 'file')
+ self.url = url
+ self.parsed_url = urllib.parse.urlparse(self.url)
+ self.is_local = self.parsed_url.scheme in ('', 'file')
self.is_internal = self.is_local and not bool(self.parsed_url.path)
- self.path = None
- self.fragment = urlunquote(self.parsed_url.fragment)
+ self.path = None
+ self.fragment = urlunquote(self.parsed_url.fragment)
if self.is_local and not self.is_internal:
self.path = self.url_to_local_path(self.parsed_url, base)
@@ -62,7 +58,7 @@ class Link(object):
return self.path == getattr(other, 'path', other)
def __str__(self):
- return 'Link: %s --> %s'%(self.url, self.path)
+ return 'Link: %s --> %s' % (self.url, self.path)
class IgnoreFile(Exception):
@@ -84,24 +80,25 @@ class HTMLFile(object):
The encoding of the file is available as :member:`encoding`.
'''
- HTML_PAT = re.compile(r'<\s*html', re.IGNORECASE)
+ HTML_PAT = re.compile(r'<\s*html', re.IGNORECASE)
TITLE_PAT = re.compile('([^<>]+)', re.IGNORECASE)
- LINK_PAT = re.compile(
- r'<\s*a\s+.*?href\s*=\s*(?:(?:"(?P[^"]+)")|(?:\'(?P[^\']+)\')|(?P[^\s>]+))',
- re.DOTALL|re.IGNORECASE)
+ LINK_PAT = re.compile(r'<\s*a\s+.*?href\s*=\s*(?:(?:"(?P[^"]+)")|'
+ r'(?:\'(?P[^\']+)\')|(?P[^\s>]+))',
+ re.DOTALL | re.IGNORECASE)
- def __init__(self, path_to_html_file, level, encoding, verbose, referrer=None):
- '''
+ def __init__(self, path_to_html_file, level, encoding, verbose,
+ referrer=None):
+ """
:param level: The level of this file. Should be 0 for the root file.
:param encoding: Use `encoding` to decode HTML.
:param referrer: The :class:`HTMLFile` that first refers to this file.
- '''
- self.path = unicode_path(path_to_html_file, abs=True)
- self.title = os.path.splitext(os.path.basename(self.path))[0]
- self.base = os.path.dirname(self.path)
- self.level = level
+ """
+ self.path = unicode_path(path_to_html_file, abs=True)
+ self.title = os.path.splitext(os.path.basename(self.path))[0]
+ self.base = os.path.dirname(self.path)
+ self.level = level
self.referrer = referrer
- self.links = []
+ self.links = []
try:
with open(self.path, 'rb') as f:
@@ -112,18 +109,21 @@ class HTMLFile(object):
header = header.decode(encoding)
except ValueError:
pass
- self.is_binary = level > 0 and not bool(self.HTML_PAT.search(header))
+ self.is_binary = level > 0 and not bool(self
+ .HTML_PAT
+ .search(header))
if not self.is_binary:
src += f.read()
except IOError as err:
- msg = 'Could not read from file: %s with error: %s'%(self.path, as_unicode(err))
+ msg = ('Could not read from file: %s with error: %s' %
+ (self.path, str(err)))
if level == 0:
raise IOError(msg)
raise IgnoreFile(msg, err.errno)
if not src:
if level == 0:
- raise ValueError('The file %s is empty'%self.path)
+ raise ValueError('The file %s is empty' % self.path)
self.is_binary = True
if not self.is_binary:
@@ -145,7 +145,9 @@ class HTMLFile(object):
return hash(self.path)
def __str__(self):
- return 'HTMLFile:%d:%s:%s'%(self.level, 'b' if self.is_binary else 'a', self.path)
+ return 'HTMLFile:%d:%s:%s' % (self.level,
+ 'b' if self.is_binary else 'a',
+ self.path)
def __repr__(self):
return str(self)
@@ -191,20 +193,22 @@ def depth_first(root, flat, visited=None):
visited.add(hf)
-def traverse(path_to_html_file, max_levels=sys.maxsize, verbose=0, encoding=None):
- '''
+def traverse(path_to_html_file, max_levels=sys.maxsize, verbose=0,
+ encoding=None):
+ """
Recursively traverse all links in the HTML file.
:param max_levels: Maximum levels of recursion. Must be non-negative. 0
- implies that no links in the root HTML file are followed.
- :param encoding: Specify character encoding of HTML files. If `None` it is
- auto-detected.
- :return: A pair of lists (breadth_first, depth_first). Each list contains
- :class:`HTMLFile` objects.
- '''
+ implies that no links in the root HTML file are
+ followed.
+ :param encoding: Specify character encoding of HTML files. If `None` it
+ is auto-detected.
+ :return: A pair of lists (breadth_first, depth_first). Each list
+ contains :class:`HTMLFile` objects.
+ """
assert max_levels >= 0
level = 0
- flat = [HTMLFile(path_to_html_file, level, encoding, verbose)]
+ flat = [HTMLFile(path_to_html_file, level, encoding, verbose)]
next_level = list(flat)
while level < max_levels and len(next_level) > 0:
level += 1
@@ -215,9 +219,10 @@ def traverse(path_to_html_file, max_levels=sys.maxsize, verbose=0, encoding=None
if link.path is None or link.path in flat:
continue
try:
- nf = HTMLFile(link.path, level, encoding, verbose, referrer=hf)
+ nf = HTMLFile(link.path, level, encoding, verbose,
+ referrer=hf)
if nf.is_binary:
- raise IgnoreFile('%s is a binary file'%nf.path, -1)
+ raise IgnoreFile('%s is a binary file' % nf.path, -1)
nl.append(nf)
flat.append(nf)
except IgnoreFile as err:
@@ -244,7 +249,8 @@ def get_filelist(htmlfile, dir, opts, log):
log.info('Building file list...')
filelist = traverse(htmlfile, max_levels=int(opts.max_levels),
verbose=opts.verbose,
- encoding=opts.input_encoding)[0 if opts.breadth_first else 1]
+ encoding=opts
+ .input_encoding)[0 if opts.breadth_first else 1]
if opts.verbose:
log.debug('\tFound files...')
for f in filelist:
diff --git a/ebook_converter/ebooks/oeb/base.py b/ebook_converter/ebooks/oeb/base.py
index bde545c..dce8242 100644
--- a/ebook_converter/ebooks/oeb/base.py
+++ b/ebook_converter/ebooks/oeb/base.py
@@ -21,7 +21,6 @@ from ebook_converter import force_unicode
from ebook_converter.constants_old import filesystem_encoding, __version__
from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.ebooks.conversion.preprocess import CSSPreProcessor
-from ebook_converter import as_unicode
from ebook_converter.ebooks.oeb import parse_utils
from ebook_converter.utils.cleantext import clean_xml_chars
from ebook_converter.utils.short_uuid import uuid4
@@ -419,7 +418,7 @@ def urlnormalize(href):
parts = urllib.parse.urlparse(href)
except ValueError as e:
raise ValueError('Failed to parse the URL: %r with underlying error: '
- '%s' % (href, as_unicode(e)))
+ '%s' % (href, e))
if not parts.scheme or parts.scheme == 'file':
path, frag = urllib.parse.urldefrag(href)
parts = ('', '', path, '', '', frag)
@@ -723,7 +722,7 @@ class Metadata(object):
% (parse_utils.barename(self.term), self.value, self.attrib)
def __str__(self):
- return as_unicode(self.value)
+ return str(self.value)
def to_opf1(self, dcmeta=None, xmeta=None, nsrmap={}):
attrib = {}
diff --git a/ebook_converter/ebooks/oeb/transforms/split.py b/ebook_converter/ebooks/oeb/transforms/split.py
index 66b5b81..753b5c0 100644
--- a/ebook_converter/ebooks/oeb/transforms/split.py
+++ b/ebook_converter/ebooks/oeb/transforms/split.py
@@ -14,7 +14,7 @@ from lxml.etree import XPath as _XPath
from lxml import etree
from ebook_converter import constants as const
-from ebook_converter import as_unicode, force_unicode
+from ebook_converter import force_unicode
from ebook_converter.ebooks.epub import rules
from ebook_converter.ebooks.oeb import base
from ebook_converter.ebooks.oeb.polish.split import do_split
@@ -126,7 +126,7 @@ class Split(object):
except SelectorError as err:
self.log.warn('Ignoring page breaks specified with invalid '
'CSS selector: %r (%s)' %
- (selector, as_unicode(err)))
+ (selector, err))
for i, elem in enumerate(item.data.iter('*')):
try:
diff --git a/ebook_converter/utils/fonts/scanner.py b/ebook_converter/utils/fonts/scanner.py
index 4883acc..d87faf0 100644
--- a/ebook_converter/utils/fonts/scanner.py
+++ b/ebook_converter/utils/fonts/scanner.py
@@ -2,17 +2,13 @@ import os
from collections import defaultdict
from threading import Thread
-from ebook_converter import walk, prints, as_unicode
-from ebook_converter.constants_old import (config_dir, iswindows, isosx, plugins, DEBUG,
- isworker, filesystem_encoding)
+from ebook_converter import walk, prints
+from ebook_converter.constants_old import iswindows, isosx
+from ebook_converter.constants_old import plugins, DEBUG, isworker
+from ebook_converter.constants_old import filesystem_encoding
from ebook_converter.utils.fonts.metadata import FontMetadata, UnsupportedFont
-__license__ = 'GPL v3'
-__copyright__ = '2012, Kovid Goyal '
-__docformat__ = 'restructuredtext en'
-
-
class NoFonts(ValueError):
pass
@@ -38,7 +34,7 @@ def fc_list():
return default_font_dirs()
try:
lib = ctypes.CDLL(lib)
- except:
+ except Exception:
return default_font_dirs()
prototype = ctypes.CFUNCTYPE(ctypes.c_void_p, ctypes.c_void_p)
@@ -97,7 +93,7 @@ def font_dirs():
if iswindows:
winutil, err = plugins['winutil']
if err:
- raise RuntimeError('Failed to load winutil: %s'%err)
+ raise RuntimeError('Failed to load winutil: %s' % err)
try:
return [winutil.special_folder_path(winutil.CSIDL_FONTS)]
except ValueError:
@@ -126,9 +122,10 @@ def font_priority(font):
width_normal = font['font-stretch'] == 'normal'
weight_normal = font['font-weight'] == 'normal'
num_normal = sum(filter(None, (style_normal, width_normal,
- weight_normal)))
+ weight_normal)))
subfamily_name = (font['wws_subfamily_name'] or
- font['preferred_subfamily_name'] or font['subfamily_name'])
+ font['preferred_subfamily_name'] or
+ font['subfamily_name'])
if num_normal == 3 and subfamily_name == 'Regular':
return 0
if num_normal == 3:
@@ -167,7 +164,9 @@ def build_families(cached_fonts, folders, family_attr='font-family'):
if fingerprint in fmap:
opath = fmap[fingerprint]['path']
npath = font['path']
- if path_significance(npath, folders) >= path_significance(opath, folders):
+ if path_significance(npath,
+ folders) >= path_significance(opath,
+ folders):
remove.append(fmap[fingerprint])
fmap[fingerprint] = font
else:
@@ -214,7 +213,7 @@ class FontScanner(Thread):
try:
return self.font_family_map[family.lower()]
except KeyError:
- raise NoFonts('No fonts found for the family: %r'%family)
+ raise NoFonts('No fonts found for the family: %r' % family)
def legacy_fonts_for_family(self, family):
'''
@@ -247,8 +246,11 @@ class FontScanner(Thread):
with open(path, 'rb') as f:
return f.read()
- def find_font_for_text(self, text, allowed_families={'serif', 'sans-serif'},
- preferred_families=('serif', 'sans-serif', 'monospace', 'cursive', 'fantasy')):
+ def find_font_for_text(self, text,
+ allowed_families={'serif', 'sans-serif'},
+ preferred_families=('serif', 'sans-serif',
+ 'monospace', 'cursive',
+ 'fantasy')):
'''
Find a font on the system capable of rendering the given text.
@@ -258,10 +260,11 @@ class FontScanner(Thread):
:return: (family name, faces) or None, None
'''
- from ebook_converter.utils.fonts.utils import (supports_text,
- panose_to_css_generic_family, get_printable_characters)
+ from ebook_converter.utils.fonts.utils import \
+ supports_text, panose_to_css_generic_family, \
+ get_printable_characters
if not isinstance(text, str):
- raise TypeError(u'%r is not unicode'%text)
+ raise TypeError(u'%r is not unicode' % text)
text = get_printable_characters(text)
found = {}
@@ -269,7 +272,7 @@ class FontScanner(Thread):
try:
raw = self.get_font_data(font)
return supports_text(raw, text)
- except:
+ except Exception:
pass
return False
@@ -278,7 +281,8 @@ class FontScanner(Thread):
if not faces:
continue
generic_family = panose_to_css_generic_family(faces[0]['panose'])
- if generic_family in allowed_families or generic_family == preferred_families[0]:
+ if (generic_family in allowed_families or
+ generic_family == preferred_families[0]):
return (family, faces)
elif generic_family not in found:
found[generic_family] = (family, faces)
@@ -321,18 +325,20 @@ class FontScanner(Thread):
files = tuple(walk(folder))
except EnvironmentError as e:
if DEBUG:
- prints('Failed to walk font folder:', folder,
- as_unicode(e))
+ prints('Failed to walk font folder:', folder, str(e))
continue
for candidate in files:
- if (candidate.rpartition('.')[-1].lower() not in self.allowed_extensions or not os.path.isfile(candidate)):
+ if (candidate.rpartition('.')[-1].lower() not in
+ self.allowed_extensions or
+ not os.path.isfile(candidate)):
continue
candidate = os.path.normcase(os.path.abspath(candidate))
try:
s = os.stat(candidate)
except EnvironmentError:
continue
- fileid = '{0}||{1}:{2}'.format(candidate, s.st_size, s.st_mtime)
+ fileid = '{0}||{1}:{2}'.format(candidate, s.st_size,
+ s.st_mtime)
if fileid in cached_fonts:
# Use previously cached metadata, since the file size and
# last modified timestamp have not changed.
@@ -343,7 +349,7 @@ class FontScanner(Thread):
except Exception as e:
if DEBUG:
prints('Failed to read metadata from font file:',
- candidate, as_unicode(e))
+ candidate, str(e))
continue
if frozenset(cached_fonts) != frozenset(self.cached_fonts):
@@ -353,7 +359,8 @@ class FontScanner(Thread):
self.build_families()
def build_families(self):
- self.font_family_map, self.font_families = build_families(self.cached_fonts, self.folders)
+ (self.font_family_map,
+ self.font_families) = build_families(self.cached_fonts, self.folders)
def write_cache(self):
with self.cache:
@@ -380,14 +387,14 @@ class FontScanner(Thread):
for family in self.font_families:
prints(family)
for font in self.fonts_for_family(family):
- prints('\t%s: %s'%(font['full_name'], font['path']))
+ prints('\t%s: %s' % (font['full_name'], font['path']))
prints(end='\t')
for key in ('font-stretch', 'font-weight', 'font-style'):
- prints('%s: %s'%(key, font[key]), end=' ')
+ prints('%s: %s' % (key, font[key]), end=' ')
prints()
prints('\tSub-family:', font['wws_subfamily_name'] or
- font['preferred_subfamily_name'] or
- font['subfamily_name'])
+ font['preferred_subfamily_name'] or
+ font['subfamily_name'])
prints()
prints()
diff --git a/ebook_converter/utils/logging.py b/ebook_converter/utils/logging.py
index c1f2398..c25ab46 100644
--- a/ebook_converter/utils/logging.py
+++ b/ebook_converter/utils/logging.py
@@ -1,20 +1,18 @@
"""
A simplified logging system
"""
-import sys, traceback, io
+import sys
+import traceback
+import io
from functools import partial
from threading import Lock
-from ebook_converter import force_unicode, as_unicode, prints
+from ebook_converter import force_unicode, prints
-__license__ = 'GPL 3'
-__copyright__ = '2009, Kovid Goyal '
-__docformat__ = 'restructuredtext en'
-
DEBUG = 0
-INFO = 1
-WARN = 2
+INFO = 1
+WARN = 2
ERROR = 3
@@ -38,10 +36,10 @@ class ANSIStream(Stream):
def __init__(self, stream=sys.stdout):
Stream.__init__(self, stream)
self.color = {
- DEBUG: u'green',
+ DEBUG: 'green',
INFO: None,
- WARN: u'yellow',
- ERROR: u'red',
+ WARN: 'yellow',
+ ERROR: 'red',
}
def prints(self, level, *args, **kwargs):
@@ -64,12 +62,10 @@ class FileStream(Stream):
class HTMLStream(Stream):
- color = {
- DEBUG: b'',
- INFO: b'',
- WARN: b'',
- ERROR: b''
- }
+ color = {DEBUG: b'',
+ INFO: b'',
+ WARN: b'',
+ ERROR: b''}
normal = b''
def __init__(self, stream=sys.stdout):
@@ -104,14 +100,14 @@ class UnicodeHTMLStream(HTMLStream):
self.data.append(col)
self.last_col = col
- sep = kwargs.get(u'sep', u' ')
- end = kwargs.get(u'end', u'\n')
+ sep = kwargs.get('sep', ' ')
+ end = kwargs.get('end', '\n')
for arg in args:
if isinstance(arg, bytes):
arg = force_unicode(arg)
elif not isinstance(arg, str):
- arg = as_unicode(arg)
+ arg = str(arg)
self.data.append(arg+sep)
self.plain_text.append(arg+sep)
self.data.append(end)
@@ -124,8 +120,8 @@ class UnicodeHTMLStream(HTMLStream):
@property
def html(self):
- end = self.normal if self.data else u''
- return u''.join(self.data) + end
+ end = self.normal if self.data else ''
+ return ''.join(self.data) + end
def dump(self):
return [self.data, self.plain_text, self.last_col]
@@ -143,8 +139,8 @@ class UnicodeHTMLStream(HTMLStream):
class Log(object):
DEBUG = DEBUG
- INFO = INFO
- WARN = WARN
+ INFO = INFO
+ WARN = WARN
ERROR = ERROR
def __init__(self, level=INFO):
@@ -153,8 +149,8 @@ class Log(object):
self.outputs = [default_output]
self.debug = partial(self.print_with_flush, DEBUG)
- self.info = partial(self.print_with_flush, INFO)
- self.warn = self.warning = partial(self.print_with_flush, WARN)
+ self.info = partial(self.print_with_flush, INFO)
+ self.warn = self.warning = partial(self.print_with_flush, WARN)
self.error = partial(self.print_with_flush, ERROR)
def prints(self, level, *args, **kwargs):
@@ -222,7 +218,8 @@ class ThreadSafeLog(Log):
limit = kwargs.pop('limit', None)
with self._lock:
Log.print_with_flush(self, ERROR, *args, **kwargs)
- Log.print_with_flush(self, self.exception_traceback_level, traceback.format_exc(limit))
+ Log.print_with_flush(self, self.exception_traceback_level,
+ traceback.format_exc(limit))
class ThreadSafeWrapper(Log):
@@ -242,10 +239,9 @@ class ThreadSafeWrapper(Log):
class GUILog(ThreadSafeLog):
-
- '''
+ """
Logs in HTML and plain text as unicode. Ideal for display in a GUI context.
- '''
+ """
def __init__(self):
ThreadSafeLog.__init__(self, level=self.DEBUG)