From ef7e2b10be78dc45dc5343dd4f4d0116c92f2da9 Mon Sep 17 00:00:00 2001 From: gryf Date: Sun, 19 Apr 2020 21:22:24 +0200 Subject: [PATCH] Removing is_py3 method and duplicated by urllib. --- ebook_converter/__init__.py | 24 +++++--- .../ebooks/conversion/plugins/chm_input.py | 9 +-- .../ebooks/conversion/plugins/epub_output.py | 9 ++- .../ebooks/conversion/plugins/html_input.py | 21 ++++--- .../ebooks/conversion/plugins/html_output.py | 55 ++++++++++--------- .../ebooks/conversion/plugins/oeb_output.py | 11 ++-- ebook_converter/ebooks/docx/writer/links.py | 13 +++-- ebook_converter/ebooks/fb2/fb2ml.py | 8 ++- ebook_converter/ebooks/html/input.py | 22 ++++---- ebook_converter/ebooks/htmlz/oeb2html.py | 4 +- .../ebooks/lrf/html/convert_from.py | 10 ++-- ebook_converter/ebooks/metadata/__init__.py | 13 +++-- ebook_converter/ebooks/metadata/opf2.py | 16 +++++- ebook_converter/ebooks/metadata/toc.py | 18 +++--- ebook_converter/ebooks/mobi/reader/mobi8.py | 32 ++++++----- .../ebooks/mobi/writer2/serializer.py | 20 +++---- ebook_converter/ebooks/oeb/base.py | 29 +++++----- .../ebooks/oeb/polish/container.py | 4 +- ebook_converter/ebooks/oeb/polish/replace.py | 10 ++-- ebook_converter/ebooks/oeb/polish/split.py | 8 +-- ebook_converter/ebooks/oeb/polish/toc.py | 8 +-- ebook_converter/ebooks/oeb/reader.py | 23 ++++---- .../ebooks/oeb/transforms/cover.py | 4 +- .../ebooks/oeb/transforms/filenames.py | 10 ++-- .../ebooks/oeb/transforms/jacket.py | 5 +- .../ebooks/oeb/transforms/rasterize.py | 7 ++- .../ebooks/oeb/transforms/split.py | 11 ++-- .../ebooks/oeb/transforms/structure.py | 7 ++- .../ebooks/oeb/transforms/trimmanifest.py | 5 +- ebook_converter/ebooks/textile/functions.py | 9 +-- ebook_converter/polyglot/functools.py | 8 --- ebook_converter/polyglot/html_entities.py | 10 ---- ebook_converter/polyglot/urllib.py | 53 +++++------------- ebook_converter/utils/cleantext.py | 5 +- ebook_converter/utils/ipc/__init__.py | 20 ++++--- 35 files changed, 267 insertions(+), 254 deletions(-) delete mode 100644 ebook_converter/polyglot/functools.py delete mode 100644 ebook_converter/polyglot/html_entities.py diff --git a/ebook_converter/__init__.py b/ebook_converter/__init__.py index 0d71742..e77ba15 100644 --- a/ebook_converter/__init__.py +++ b/ebook_converter/__init__.py @@ -3,10 +3,18 @@ __license__ = 'GPL v3' __copyright__ = '2008, Kovid Goyal ' __docformat__ = 'restructuredtext en' -import sys, os, re, time, random, warnings +import math +import os import pkg_resources +import random +import re +import sys +import time +import urllib.parse +import urllib.request +import warnings + from ebook_converter.polyglot.builtins import codepoint_to_chr, unicode_type, hasenv, native_string_type -from math import floor from functools import partial if not hasenv('CALIBRE_SHOW_DEPRECATION_WARNINGS'): @@ -276,8 +284,7 @@ def extract(path, dir): def get_proxies(debug=True): - from polyglot.urllib import getproxies - proxies = getproxies() + proxies = urllib.request.getproxies() for key, proxy in list(proxies.items()): if not proxy or '..' in proxy or key == 'auto': del proxies[key] @@ -338,10 +345,9 @@ def get_proxy_info(proxy_scheme, proxy_string): is not available in the string. If an exception occurs parsing the string this method returns None. ''' - from polyglot.urllib import urlparse try: proxy_url = '%s://%s'%(proxy_scheme, proxy_string) - urlinfo = urlparse(proxy_url) + urlinfo = urllib.parse.urlparse(proxy_url) ans = { 'scheme': urlinfo.scheme, 'hostname': urlinfo.hostname, @@ -414,13 +420,13 @@ def fit_image(width, height, pwidth, pheight): scaled = height > pheight or width > pwidth if height > pheight: corrf = pheight / float(height) - width, height = floor(corrf*width), pheight + width, height = math.floor(corrf*width), pheight if width > pwidth: corrf = pwidth / float(width) - width, height = pwidth, floor(corrf*height) + width, height = pwidth, math.floor(corrf*height) if height > pheight: corrf = pheight / float(height) - width, height = floor(corrf*width), pheight + width, height = math.floor(corrf*width), pheight return scaled, int(width), int(height) diff --git a/ebook_converter/ebooks/conversion/plugins/chm_input.py b/ebook_converter/ebooks/conversion/plugins/chm_input.py index 0e58156..e65dd70 100644 --- a/ebook_converter/ebooks/conversion/plugins/chm_input.py +++ b/ebook_converter/ebooks/conversion/plugins/chm_input.py @@ -2,7 +2,11 @@ CHM File decoding support """ import os +from lxml import html +from ebook_converter.polyglot.urllib import unquote as _unquote +from ebook_converter.ebooks.oeb.base import urlquote +from ebook_converter.ebooks.chardet import xml_to_unicode from ebook_converter.customize.conversion import InputFormatPlugin from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.constants import filesystem_encoding @@ -109,10 +113,7 @@ class CHMInput(InputFormatPlugin): return oeb def _create_html_root(self, hhcpath, log, encoding): - from lxml import html - from ebook_converter.polyglot.urllib import unquote as _unquote - from ebook_converter.ebooks.oeb.base import urlquote - from ebook_converter.ebooks.chardet import xml_to_unicode + hhcdata = self._read_file(hhcpath) hhcdata = hhcdata.decode(encoding) hhcdata = xml_to_unicode(hhcdata, verbose=True, diff --git a/ebook_converter/ebooks/conversion/plugins/epub_output.py b/ebook_converter/ebooks/conversion/plugins/epub_output.py index b9674d8..20a454f 100644 --- a/ebook_converter/ebooks/conversion/plugins/epub_output.py +++ b/ebook_converter/ebooks/conversion/plugins/epub_output.py @@ -1,4 +1,7 @@ -import os, shutil, re +import os +import re +import shutil +import urllib.parse from ebook_converter.customize.conversion import (OutputFormatPlugin, OptionRecommendation) @@ -514,7 +517,7 @@ class EPUBOutput(OutputFormatPlugin): ''' Perform toc link transforms to alleviate slow loading. ''' - from ebook_converter.ebooks.oeb.base import urldefrag, XPath + from ebook_converter.ebooks.oeb.base import XPath from ebook_converter.ebooks.oeb.polish.toc import item_at_top def frag_is_at_top(root, frag): @@ -527,7 +530,7 @@ class EPUBOutput(OutputFormatPlugin): def simplify_toc_entry(toc): if toc.href: - href, frag = urldefrag(toc.href) + href, frag = urllib.parse.urldefrag(toc.href) if frag: for x in self.oeb.spine: if x.href == href: diff --git a/ebook_converter/ebooks/conversion/plugins/html_input.py b/ebook_converter/ebooks/conversion/plugins/html_input.py index cc76395..9dafbd2 100644 --- a/ebook_converter/ebooks/conversion/plugins/html_input.py +++ b/ebook_converter/ebooks/conversion/plugins/html_input.py @@ -1,5 +1,8 @@ -import re, tempfile, os -from functools import partial +import functools +import os +import re +import tempfile +import urllib.parse from ebook_converter.constants import islinux, isbsd from ebook_converter.customize.conversion import (InputFormatPlugin, @@ -97,7 +100,7 @@ class HTMLInput(InputFormatPlugin): import uuid from ebook_converter.ebooks.conversion.plumber import create_oebbook from ebook_converter.ebooks.oeb.base import (DirContainer, - rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES, + rewrite_links, urlnormalize, BINARY_MIME, OEB_STYLES, xpath, urlquote) from ebook_converter import guess_type from ebook_converter.ebooks.oeb.transforms.metadata import \ @@ -163,7 +166,7 @@ class HTMLInput(InputFormatPlugin): path = path.lower() self.added_resources[path] = href self.urlnormalize, self.DirContainer = urlnormalize, DirContainer - self.urldefrag = urldefrag + self.urldefrag = urllib.parse.urldefrag self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME self.log('Rewriting HTML links') @@ -176,7 +179,8 @@ class HTMLInput(InputFormatPlugin): item = oeb.manifest.hrefs[href] except KeyError: item = oeb.manifest.hrefs[urlnormalize(href)] - rewrite_links(item.data, partial(self.resource_adder, base=dpath)) + rewrite_links(item.data, + functools.partial(self.resource_adder, base=dpath)) for item in oeb.manifest.values(): if item.media_type in self.OEB_STYLES: @@ -186,7 +190,7 @@ class HTMLInput(InputFormatPlugin): dpath = os.path.dirname(path) break css_parser.replaceUrls(item.data, - partial(self.resource_adder, base=dpath)) + functools.partial(self.resource_adder, base=dpath)) toc = self.oeb.toc self.oeb.auto_generated_toc = True @@ -242,7 +246,6 @@ class HTMLInput(InputFormatPlugin): return link, frag def resource_adder(self, link_, base=None): - from ebook_converter.polyglot.urllib import quote link, frag = self.link_to_local_path(link_, base=base) if link is None: return link_ @@ -287,9 +290,9 @@ class HTMLInput(InputFormatPlugin): # file, therefore we quote it here. if isinstance(bhref, unicode_type): bhref = bhref.encode('utf-8') - item.html_input_href = as_unicode(quote(bhref)) + item.html_input_href = as_unicode(urllib.parse.quote(bhref)) if guessed in self.OEB_STYLES: - item.override_css_fetch = partial( + item.override_css_fetch = functools.partial( self.css_import_handler, os.path.dirname(link)) item.data self.added_resources[link] = href diff --git a/ebook_converter/ebooks/conversion/plugins/html_output.py b/ebook_converter/ebooks/conversion/plugins/html_output.py index 8f2338a..f34a373 100644 --- a/ebook_converter/ebooks/conversion/plugins/html_output.py +++ b/ebook_converter/ebooks/conversion/plugins/html_output.py @@ -1,12 +1,17 @@ -import os, re, shutil -from os.path import dirname, abspath, relpath as _relpath, exists, basename +import os import pkg_resources +import re +import shutil + +from lxml import etree -from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation from ebook_converter import CurrentDir -from ebook_converter.ptempfile import PersistentTemporaryDirectory +from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation +from ebook_converter.ebooks.oeb.base import element from ebook_converter.polyglot.builtins import unicode_type - +from ebook_converter.polyglot.urllib import unquote +from ebook_converter.ptempfile import PersistentTemporaryDirectory +from ebook_converter.utils.cleantext import clean_xml_chars __license__ = 'GPL 3' __copyright__ = '2010, Fabian Grassl ' @@ -14,7 +19,7 @@ __docformat__ = 'restructuredtext en' def relpath(*args): - return _relpath(*args).replace(os.sep, '/') + return os.path.relpath(*args).replace(os.sep, '/') class HTMLOutput(OutputFormatPlugin): @@ -47,11 +52,7 @@ class HTMLOutput(OutputFormatPlugin): ''' Generate table of contents ''' - from lxml import etree - from ebook_converter.polyglot.urllib import unquote - from ebook_converter.ebooks.oeb.base import element - from ebook_converter.utils.cleantext import clean_xml_chars with CurrentDir(output_dir): def build_node(current_node, parent=None): if parent is None: @@ -60,7 +61,8 @@ class HTMLOutput(OutputFormatPlugin): parent = element(parent, ('ul')) for node in current_node.nodes: point = element(parent, 'li') - href = relpath(abspath(unquote(node.href)), dirname(ref_url)) + href = relpath(os.path.abspath(unquote(node.href)), + os.path.dirname(ref_url)) if isinstance(href, bytes): href = href.decode('utf-8') link = element(point, 'a', href=clean_xml_chars(href)) @@ -131,10 +133,10 @@ class HTMLOutput(OutputFormatPlugin): tempdir = os.path.realpath(PersistentTemporaryDirectory()) output_file = os.path.join(tempdir, - basename(re.sub(r'\.zip', '', output_path)+'.html')) + os.path.basename(re.sub(r'\.zip', '', output_path)+'.html')) output_dir = re.sub(r'\.html', '', output_file)+'_files' - if not exists(output_dir): + if not os.path.exists(output_dir): os.makedirs(output_dir) css_path = output_dir+os.sep+'calibreHtmlOutBasicCss.css' @@ -145,9 +147,10 @@ class HTMLOutput(OutputFormatPlugin): html_toc = self.generate_html_toc(oeb_book, output_file, output_dir) templite = Templite(template_html_index_data) nextLink = oeb_book.spine[0].href - nextLink = relpath(output_dir+os.sep+nextLink, dirname(output_file)) - cssLink = relpath(abspath(css_path), dirname(output_file)) - tocUrl = relpath(output_file, dirname(output_file)) + nextLink = relpath(output_dir+os.sep+nextLink, + os.path.dirname(output_file)) + cssLink = relpath(os.path.abspath(css_path), os.path.dirname(output_file)) + tocUrl = relpath(output_file, os.path.dirname(output_file)) t = templite.render(has_toc=bool(oeb_book.toc.count()), toc=html_toc, meta=meta, nextLink=nextLink, tocUrl=tocUrl, cssLink=cssLink, @@ -158,9 +161,9 @@ class HTMLOutput(OutputFormatPlugin): with CurrentDir(output_dir): for item in oeb_book.manifest: - path = abspath(unquote(item.href)) - dir = dirname(path) - if not exists(dir): + path = os.path.abspath(unquote(item.href)) + dir = os.path.dirname(path) + if not os.path.exists(dir): os.makedirs(dir) if item.spine_position is not None: with open(path, 'wb') as f: @@ -171,8 +174,8 @@ class HTMLOutput(OutputFormatPlugin): item.unload_data_from_memory(memory=path) for item in oeb_book.spine: - path = abspath(unquote(item.href)) - dir = dirname(path) + path = os.path.abspath(unquote(item.href)) + dir = os.path.dirname(path) root = item.data.getroottree() # get & clean HTML -data @@ -191,18 +194,18 @@ class HTMLOutput(OutputFormatPlugin): # generate link to next page if item.spine_position+1 < len(oeb_book.spine): nextLink = oeb_book.spine[item.spine_position+1].href - nextLink = relpath(abspath(nextLink), dir) + nextLink = relpath(os.path.abspath(nextLink), dir) else: nextLink = None # generate link to previous page if item.spine_position > 0: prevLink = oeb_book.spine[item.spine_position-1].href - prevLink = relpath(abspath(prevLink), dir) + prevLink = relpath(os.path.abspath(prevLink), dir) else: prevLink = None - cssLink = relpath(abspath(css_path), dir) + cssLink = relpath(os.path.abspath(css_path), dir) tocUrl = relpath(output_file, dir) firstContentPageLink = oeb_book.spine[0].href @@ -222,8 +225,8 @@ class HTMLOutput(OutputFormatPlugin): item.unload_data_from_memory(memory=path) zfile = zipfile.ZipFile(output_path, "w") - zfile.add_dir(output_dir, basename(output_dir)) - zfile.write(output_file, basename(output_file), zipfile.ZIP_DEFLATED) + zfile.add_dir(output_dir, os.path.basename(output_dir)) + zfile.write(output_file, os.path.basename(output_file), zipfile.ZIP_DEFLATED) if opts.extract_to: if os.path.exists(opts.extract_to): diff --git a/ebook_converter/ebooks/conversion/plugins/oeb_output.py b/ebook_converter/ebooks/conversion/plugins/oeb_output.py index 27ac5d8..ab80d3b 100644 --- a/ebook_converter/ebooks/conversion/plugins/oeb_output.py +++ b/ebook_converter/ebooks/conversion/plugins/oeb_output.py @@ -1,9 +1,14 @@ -import os, re +import os +import re +from lxml import etree from ebook_converter.customize.conversion import (OutputFormatPlugin, OptionRecommendation) from ebook_converter import CurrentDir +from ebook_converter.polyglot.urllib import unquote +from ebook_converter.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME, OEB_STYLES +from ebook_converter.ebooks.oeb.normalize_css import condense_sheet __license__ = 'GPL 3' @@ -21,14 +26,10 @@ class OEBOutput(OutputFormatPlugin): recommendations = {('pretty_print', True, OptionRecommendation.HIGH)} def convert(self, oeb_book, output_path, input_plugin, opts, log): - from ebook_converter.polyglot.urllib import unquote - from lxml import etree self.log, self.opts = log, opts if not os.path.exists(output_path): os.makedirs(output_path) - from ebook_converter.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME, OEB_STYLES - from ebook_converter.ebooks.oeb.normalize_css import condense_sheet with CurrentDir(output_path): results = oeb_book.to_opf2(page_map=True) for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME): diff --git a/ebook_converter/ebooks/docx/writer/links.py b/ebook_converter/ebooks/docx/writer/links.py index 97b27c0..b69f520 100644 --- a/ebook_converter/ebooks/docx/writer/links.py +++ b/ebook_converter/ebooks/docx/writer/links.py @@ -1,9 +1,10 @@ -import posixpath, re -from uuid import uuid4 +import posixpath +import re +import urllib.parse +import uuid from ebook_converter.utils.filenames import ascii_text from ebook_converter.polyglot.builtins import unicode_type -from ebook_converter.polyglot.urllib import urlparse __license__ = 'GPL v3' @@ -67,7 +68,7 @@ class LinksManager(object): self.namespace = namespace self.log = log self.document_relationships = document_relationships - self.top_anchor = unicode_type(uuid4().hex) + self.top_anchor = unicode_type(uuid.uuid4().hex) self.anchor_map = {} self.used_bookmark_names = set() self.bmark_id = 0 @@ -100,7 +101,7 @@ class LinksManager(object): def serialize_hyperlink(self, parent, link): item, url, tooltip = link - purl = urlparse(url) + purl = urllib.parse.urlparse(url) href = purl.path def make_link(parent, anchor=None, id=None, tooltip=None): @@ -133,7 +134,7 @@ class LinksManager(object): def process_toc_node(self, toc, level=0): href = toc.href if href: - purl = urlparse(href) + purl = urllib.parse.urlparse(href) href = purl.path if href in self.document_hrefs: key = (href, purl.fragment or self.top_anchor) diff --git a/ebook_converter/ebooks/fb2/fb2ml.py b/ebook_converter/ebooks/fb2/fb2ml.py index 590d13b..8977eaa 100644 --- a/ebook_converter/ebooks/fb2/fb2ml.py +++ b/ebook_converter/ebooks/fb2/fb2ml.py @@ -1,8 +1,11 @@ """ Transform OEB content into FB2 markup """ -import re, textwrap, uuid from datetime import datetime +import re +import textwrap +import urllib.parse +import uuid from lxml import etree @@ -14,7 +17,6 @@ from ebook_converter.utils.img import save_cover_data_to from ebook_converter.ebooks.oeb.base import urlnormalize from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes from ebook_converter.polyglot.binary import as_base64_unicode -from ebook_converter.polyglot.urllib import urlparse __license__ = 'GPL 3' @@ -508,7 +510,7 @@ class FB2MLizer(object): tags.append('p') if tag == 'a' and elem_tree.attrib.get('href', None): # Handle only external links for now - if urlparse(elem_tree.attrib['href']).netloc: + if urllib.parse.urlparse(elem_tree.attrib['href']).netloc: p_txt, p_tag = self.ensure_p() fb2_out += p_txt tags += p_tag diff --git a/ebook_converter/ebooks/html/input.py b/ebook_converter/ebooks/html/input.py index ff51158..0ef1445 100644 --- a/ebook_converter/ebooks/html/input.py +++ b/ebook_converter/ebooks/html/input.py @@ -1,14 +1,17 @@ """ Input plugin for HTML or OPF ebooks. """ -import os, re, sys, errno as gerrno +import errno +import os +import re +import sys +import urllib.parse from ebook_converter.ebooks.oeb.base import urlunquote from ebook_converter.ebooks.chardet import detect_xml_encoding from ebook_converter.constants import iswindows from ebook_converter import unicode_path, as_unicode, replace_entities -from ebook_converter.polyglot.builtins import is_py3, unicode_type -from ebook_converter.polyglot.urllib import urlparse, urlunparse +from ebook_converter.polyglot.builtins import unicode_type __license__ = 'GPL v3' @@ -29,7 +32,7 @@ class Link(object): if iswindows and path.startswith('/'): path = path[1:] isabs = True - path = urlunparse(('', '', path, url.params, url.query, '')) + path = urllib.parse.urlunparse(('', '', path, url.params, url.query, '')) path = urlunquote(path) if isabs or os.path.isabs(path): return path @@ -43,7 +46,7 @@ class Link(object): ''' assert isinstance(url, unicode_type) and isinstance(base, unicode_type) self.url = url - self.parsed_url = urlparse(self.url) + self.parsed_url = urllib.parse.urlparse(self.url) self.is_local = self.parsed_url.scheme in ('', 'file') self.is_internal = self.is_local and not bool(self.parsed_url.path) self.path = None @@ -62,16 +65,13 @@ class Link(object): def __str__(self): return 'Link: %s --> %s'%(self.url, self.path) - if not is_py3: - __unicode__ = __str__ - class IgnoreFile(Exception): - def __init__(self, msg, errno): + def __init__(self, msg, err_no): Exception.__init__(self, msg) - self.doesnt_exist = errno == gerrno.ENOENT - self.errno = errno + self.errno = err_no + self.doesnt_exist = err_no == errno.ENOENT class HTMLFile(object): diff --git a/ebook_converter/ebooks/htmlz/oeb2html.py b/ebook_converter/ebooks/htmlz/oeb2html.py index d7ad6dd..5256eeb 100644 --- a/ebook_converter/ebooks/htmlz/oeb2html.py +++ b/ebook_converter/ebooks/htmlz/oeb2html.py @@ -3,6 +3,7 @@ Transform OEB content into a single (more or less) HTML file. """ import os import re +import urllib.parse from functools import partial from lxml import html @@ -13,7 +14,6 @@ from ebook_converter.ebooks.oeb.base import ( from ebook_converter.ebooks.oeb.stylizer import Stylizer from ebook_converter.utils.logging import default_log from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes, as_bytes -from ebook_converter.polyglot.urllib import urldefrag __license__ = 'GPL 3' @@ -101,7 +101,7 @@ class OEB2HTML(object): for attr in attribs: if attr in link_attrs: href = item.abshref(attribs[attr]) - href, id = urldefrag(href) + href, id = urllib.parse.urldefrag(href) if href in self.base_hrefs: self.get_link_id(href, id) diff --git a/ebook_converter/ebooks/lrf/html/convert_from.py b/ebook_converter/ebooks/lrf/html/convert_from.py index 89ee036..85c73b6 100644 --- a/ebook_converter/ebooks/lrf/html/convert_from.py +++ b/ebook_converter/ebooks/lrf/html/convert_from.py @@ -12,6 +12,7 @@ import os import re import sys import tempfile +import urllib.parse from collections import deque from functools import partial from itertools import chain @@ -37,7 +38,7 @@ from ebook_converter.ebooks.lrf.pylrs.pylrs import ( ) from ebook_converter.ptempfile import PersistentTemporaryFile from ebook_converter.polyglot.builtins import getcwd, itervalues, string_or_bytes, unicode_type -from ebook_converter.polyglot.urllib import unquote, urlparse +from ebook_converter.polyglot.urllib import unquote from PIL import Image as PILImage @@ -51,7 +52,7 @@ def update_css(ncss, ocss): def munge_paths(basepath, url): - purl = urlparse(unquote(url),) + purl = urllib.parse.urlparse(unquote(url),) path, fragment = purl[2], purl[5] if path: path = path.replace('/', os.sep) @@ -1471,7 +1472,8 @@ class HTMLConverter(object): pass elif tagname == 'a' and self.link_levels >= 0: if tag.has_attr('href') and not self.link_exclude.match(tag['href']): - if urlparse(tag['href'])[0] not in ('', 'file'): + if urllib.parse.urlparse(tag['href'])[0] not in ('', + 'file'): self.process_children(tag, tag_css, tag_pseudo_css) else: path = munge_paths(self.target_prefix, tag['href'])[0] @@ -1513,7 +1515,7 @@ class HTMLConverter(object): dropcaps = tag.get('class') in ('libprs500_dropcaps', ['libprs500_dropcaps']) self.process_image(path, tag_css, width, height, dropcaps=dropcaps, rescale=True) - elif not urlparse(tag['src'])[0]: + elif not urllib.parse.urlparse(tag['src'])[0]: self.log.warn('Could not find image: '+tag['src']) else: self.log.debug("Failed to process: %s"%unicode_type(tag)) diff --git a/ebook_converter/ebooks/metadata/__init__.py b/ebook_converter/ebooks/metadata/__init__.py index 5d3648d..6dcb442 100644 --- a/ebook_converter/ebooks/metadata/__init__.py +++ b/ebook_converter/ebooks/metadata/__init__.py @@ -2,12 +2,15 @@ Provides abstraction for metadata reading.writing from a variety of ebook formats. """ -import os, sys, re +import os +import re +import sys +import urllib.parse from ebook_converter import relpath, guess_type, prints, force_unicode from ebook_converter.utils.config_base import tweaks from ebook_converter.polyglot.builtins import codepoint_to_chr, unicode_type, getcwd, iteritems, itervalues, as_unicode -from ebook_converter.polyglot.urllib import quote, unquote, urlparse +from ebook_converter.polyglot.urllib import unquote __license__ = 'GPL v3' @@ -241,7 +244,7 @@ class Resource(object): path = path.decode(sys.getfilesystemencoding()) self.path = path else: - url = urlparse(href_or_path) + url = urllib.parse.urlparse(href_or_path) if url[0] not in ('', 'file'): self._href = href_or_path else: @@ -268,7 +271,7 @@ class Resource(object): if self.path is None: return self._href f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment - frag = '#'+as_unicode(quote(f)) if self.fragment else '' + frag = '#'+as_unicode(urllib.parse.quote(f)) if self.fragment else '' if self.path == basedir: return ''+frag try: @@ -277,7 +280,7 @@ class Resource(object): rpath = self.path if isinstance(rpath, unicode_type): rpath = rpath.encode('utf-8') - return as_unicode(quote(rpath.replace(os.sep, '/')))+frag + return as_unicode(urllib.parse.quote(rpath.replace(os.sep, '/')))+frag def set_basedir(self, path): self._basedir = path diff --git a/ebook_converter/ebooks/metadata/opf2.py b/ebook_converter/ebooks/metadata/opf2.py index e3d8f48..fe270e0 100644 --- a/ebook_converter/ebooks/metadata/opf2.py +++ b/ebook_converter/ebooks/metadata/opf2.py @@ -1,7 +1,17 @@ """ lxml based OPF parser. """ -import re, sys, unittest, functools, os, uuid, glob, io, json, copy +import copy +import functools +import glob +import io +import json +import os +import re +import sys +import unittest +import urllib.parse +import uuid from lxml import etree @@ -18,7 +28,7 @@ from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars from ebook_converter.utils.config import tweaks from ebook_converter.utils.xml_parse import safe_xml_fromstring from ebook_converter.polyglot.builtins import iteritems, unicode_type, getcwd -from ebook_converter.polyglot.urllib import unquote, urlparse +from ebook_converter.polyglot.urllib import unquote __license__ = 'GPL v3' @@ -76,7 +86,7 @@ class Resource(object): # {{{ self.path = path else: href_or_path = href_or_path - url = urlparse(href_or_path) + url = urllib.parse.urlparse(href_or_path) if url[0] not in ('', 'file'): self._href = href_or_path else: diff --git a/ebook_converter/ebooks/metadata/toc.py b/ebook_converter/ebooks/metadata/toc.py index 4e8b82d..f83882a 100644 --- a/ebook_converter/ebooks/metadata/toc.py +++ b/ebook_converter/ebooks/metadata/toc.py @@ -1,5 +1,9 @@ -import os, glob, re, functools -from collections import Counter +import collections +import functools +import glob +import os +import re +import urllib.parse from lxml import etree from lxml.builder import ElementMaker @@ -9,7 +13,7 @@ from ebook_converter.ebooks.chardet import xml_to_unicode from ebook_converter.utils.xml_parse import safe_xml_fromstring from ebook_converter.utils.cleantext import clean_xml_chars from ebook_converter.polyglot.builtins import unicode_type, getcwd -from ebook_converter.polyglot.urllib import unquote, urlparse +from ebook_converter.polyglot.urllib import unquote __license__ = 'GPL v3' @@ -30,7 +34,7 @@ def parse_html_toc(data): data = xml_to_unicode(data, strip_encoding_pats=True, resolve_entities=True)[0] root = parse(clean_xml_chars(data), maybe_xhtml=True, keep_doctype=False, sanitize_names=True) for a in root.xpath('//*[@href and local-name()="a"]'): - purl = urlparse(unquote(a.get('href'))) + purl = urllib.parse.urlparse(unquote(a.get('href'))) href, fragment = purl[2], purl[5] if not fragment: fragment = None @@ -142,7 +146,7 @@ class TOC(list): if toc is not None: if toc.lower() not in ('ncx', 'ncxtoc'): - toc = urlparse(unquote(toc))[2] + toc = urllib.parse.urlparse(unquote(toc))[2] toc = toc.replace('/', os.sep) if not os.path.isabs(toc): toc = os.path.join(self.base_path, toc) @@ -209,7 +213,7 @@ class TOC(list): if content and text: content = content[0] # if get_attr(content, attr='src'): - purl = urlparse(content.get('src')) + purl = urllib.parse.urlparse(content.get('src')) href, fragment = unquote(purl[2]), unquote(purl[5]) nd = dest.add_item(href, fragment, text) nd.play_order = play_order @@ -253,7 +257,7 @@ class TOC(list): navmap = E.navMap() root.append(navmap) root.set('{http://www.w3.org/XML/1998/namespace}lang', 'en') - c = Counter() + c = collections.Counter() def navpoint(parent, np): text = np.text diff --git a/ebook_converter/ebooks/mobi/reader/mobi8.py b/ebook_converter/ebooks/mobi/reader/mobi8.py index 25f353f..3977af4 100644 --- a/ebook_converter/ebooks/mobi/reader/mobi8.py +++ b/ebook_converter/ebooks/mobi/reader/mobi8.py @@ -1,7 +1,10 @@ -import struct, re, os -from collections import namedtuple -from itertools import repeat -from uuid import uuid4 +import collections +import itertools +import os +import re +import struct +import urllib.parse +import uuid from lxml import etree @@ -16,21 +19,20 @@ from ebook_converter.ebooks.mobi.utils import read_font_record from ebook_converter.ebooks.oeb.parse_utils import parse_html from ebook_converter.ebooks.oeb.base import XPath, XHTML, xml2text from ebook_converter.polyglot.builtins import unicode_type, getcwd, as_unicode -from ebook_converter.polyglot.urllib import urldefrag __license__ = 'GPL v3' __copyright__ = '2012, Kovid Goyal ' __docformat__ = 'restructuredtext en' -Part = namedtuple('Part', +Part = collections.namedtuple('Part', 'num type filename start end aid') -Elem = namedtuple('Elem', +Elem = collections.namedtuple('Elem', 'insert_pos toc_text file_number sequence_number start_pos ' 'length') -FlowInfo = namedtuple('FlowInfo', +FlowInfo = collections.namedtuple('FlowInfo', 'type format dir fname') # locate beginning and ending positions of tag with specific aid attribute @@ -81,7 +83,7 @@ class Mobi8Reader(object): def __call__(self): self.mobi6_reader.check_for_drm() - self.aid_anchor_suffix = uuid4().hex.encode('utf-8') + self.aid_anchor_suffix = uuid.uuid4().hex.encode('utf-8') bh = self.mobi6_reader.book_header if self.mobi6_reader.kf8_type == 'joint': offset = self.mobi6_reader.kf8_boundary + 2 @@ -127,7 +129,7 @@ class Mobi8Reader(object): if self.header.skelidx != NULL_INDEX: table = read_index(self.kf8_sections, self.header.skelidx, self.header.codec)[0] - File = namedtuple('File', + File = collections.namedtuple('File', 'file_number name divtbl_count start_position length') for i, text in enumerate(table): @@ -149,7 +151,7 @@ class Mobi8Reader(object): if self.header.othidx != NULL_INDEX: table, cncx = read_index(self.kf8_sections, self.header.othidx, self.header.codec) - Item = namedtuple('Item', + Item = collections.namedtuple('Item', 'type title pos_fid') for i, ref_type in enumerate(table): @@ -222,7 +224,7 @@ class Mobi8Reader(object): self.parts.append(skeleton) if divcnt < 1: # Empty file - aidtext = unicode_type(uuid4()) + aidtext = unicode_type(uuid.uuid4()) filename = aidtext + '.html' self.partinfo.append(Part(skelnum, 'text', filename, skelpos, baseptr, aidtext)) @@ -293,7 +295,7 @@ class Mobi8Reader(object): for part in self.partinfo: if pos >= part.start and pos < part.end: return part - return Part(*repeat(None, len(Part._fields))) + return Part(*itertools.repeat(None, len(Part._fields))) def get_id_tag_by_pos_fid(self, posfid, offset): # first convert kindle:pos:fid and offset info to position in file @@ -475,7 +477,7 @@ class Mobi8Reader(object): for ref in guide: if ref.type == 'toc': href = ref.href() - href, frag = urldefrag(href) + href, frag = urllib.parse.urldefrag(href) if os.path.exists(href.replace('/', os.sep)): try: toc = self.read_inline_toc(href, frag) @@ -554,7 +556,7 @@ class Mobi8Reader(object): if reached and elem.tag == XHTML('a') and elem.get('href', False): href = elem.get('href') - href, frag = urldefrag(href) + href, frag = urllib.parse.urldefrag(href) href = base_href + '/' + href text = xml2text(elem).strip() if (text, href, frag) in seen: diff --git a/ebook_converter/ebooks/mobi/writer2/serializer.py b/ebook_converter/ebooks/mobi/writer2/serializer.py index 5548882..4d579bd 100644 --- a/ebook_converter/ebooks/mobi/writer2/serializer.py +++ b/ebook_converter/ebooks/mobi/writer2/serializer.py @@ -1,7 +1,8 @@ +import collections +import io import re import unicodedata -from collections import defaultdict -from io import BytesIO +import urllib.parse from ebook_converter.ebooks.mobi.mobiml import MBP_NS from ebook_converter.ebooks.mobi.utils import is_guide_ref_start @@ -9,7 +10,6 @@ from ebook_converter.ebooks.oeb.base import ( OEB_DOCS, XHTML, XHTML_NS, XML_NS, namespace, prefixname, urlnormalize ) from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes -from ebook_converter.polyglot.urllib import urldefrag __license__ = 'GPL v3' @@ -17,12 +17,12 @@ __copyright__ = '2011, Kovid Goyal ' __docformat__ = 'restructuredtext en' -class Buf(BytesIO): +class Buf(io.BytesIO): def write(self, x): if isinstance(x, unicode_type): x = x.encode('utf-8') - BytesIO.write(self, x) + io.BytesIO.write(self, x) class Serializer(object): @@ -63,7 +63,7 @@ class Serializer(object): # Mapping of hrefs (urlnormalized) to a list of offsets into the buffer # where filepos="..." elements are written corresponding to links that # point to the href. This is used at the end to fill in the correct values. - self.href_offsets = defaultdict(list) + self.href_offsets = collections.defaultdict(list) # List of offsets in the buffer of non linear items in the spine. These # become uncrossable breaks in the MOBI @@ -81,7 +81,7 @@ class Serializer(object): item.is_article_start = item.is_article_end = False def spine_item(tocitem): - href = urldefrag(tocitem.href)[0] + href = urllib.parse.urldefrag(tocitem.href)[0] for item in self.oeb.spine: if item.href == href: return item @@ -157,7 +157,7 @@ class Serializer(object): hrefs = self.oeb.manifest.hrefs buf.write(b'') for ref in self.oeb.guide.values(): - path = urldefrag(ref.href)[0] + path = urllib.parse.urldefrag(ref.href)[0] if path not in hrefs or hrefs[path].media_type not in OEB_DOCS: continue @@ -188,7 +188,7 @@ class Serializer(object): ''' hrefs = self.oeb.manifest.hrefs try: - path, frag = urldefrag(urlnormalize(href)) + path, frag = urllib.parse.urldefrag(urlnormalize(href)) except ValueError: # Unparseable URL return False @@ -382,7 +382,7 @@ class Serializer(object): if href not in id_offsets: self.logger.warn('Hyperlink target %r not found' % href) # Link to the top of the document, better than just ignoring - href, _ = urldefrag(href) + href, _ = urllib.parse.urldefrag(href) if href in self.id_offsets: ioff = self.id_offsets[href] if is_start: diff --git a/ebook_converter/ebooks/oeb/base.py b/ebook_converter/ebooks/oeb/base.py index e89f694..638e096 100644 --- a/ebook_converter/ebooks/oeb/base.py +++ b/ebook_converter/ebooks/oeb/base.py @@ -5,6 +5,7 @@ import os, re, logging, sys, numbers from collections import defaultdict from itertools import count from operator import attrgetter +import urllib.parse from lxml import etree, html from ebook_converter import force_unicode @@ -17,7 +18,7 @@ from ebook_converter.ebooks.oeb.parse_utils import barename, XHTML_NS, namespace from ebook_converter.utils.cleantext import clean_xml_chars from ebook_converter.utils.short_uuid import uuid4 from ebook_converter.polyglot.builtins import iteritems, unicode_type, string_or_bytes, itervalues, codepoint_to_chr -from ebook_converter.polyglot.urllib import unquote as urlunquote, urldefrag, urljoin, urlparse, urlunparse +from ebook_converter.polyglot.urllib import unquote as urlunquote __license__ = 'GPL v3' @@ -185,13 +186,13 @@ def iterlinks(root, find_links_in_css=True): if attrib in attribs: value = el.get(attrib) if codebase is not None: - value = urljoin(codebase, value) + value = urllib.parse.urljoin(codebase, value) yield (el, attrib, value, 0) if 'archive' in attribs: for match in _archive_re.finditer(el.get('archive')): value = match.group(0) if codebase is not None: - value = urljoin(codebase, value) + value = urllib.parse.urljoin(codebase, value) yield (el, 'archive', value, match.start()) else: for attr in attribs: @@ -217,7 +218,7 @@ def make_links_absolute(root, base_url): came from) ''' def link_repl(href): - return urljoin(base_url, href) + return urllib.parse.urljoin(base_url, href) rewrite_links(root, link_repl) @@ -463,16 +464,16 @@ def urlnormalize(href): characters URL quoted. """ try: - parts = urlparse(href) + parts = urllib.parse.urlparse(href) except ValueError as e: raise ValueError('Failed to parse the URL: %r with underlying error: %s' % (href, as_unicode(e))) if not parts.scheme or parts.scheme == 'file': - path, frag = urldefrag(href) + path, frag = urllib.parse.urldefrag(href) parts = ('', '', path, '', '', frag) parts = (part.replace('\\', '/') for part in parts) parts = (urlunquote(part) for part in parts) parts = (urlquote(part) for part in parts) - return urlunparse(parts) + return urllib.parse.urlunparse(parts) def extract(elem): @@ -1135,7 +1136,7 @@ class Manifest(object): relative to this manifest item to a book-absolute reference. """ try: - purl = urlparse(href) + purl = urllib.parse.urlparse(href) except ValueError: return href scheme = purl.scheme @@ -1143,8 +1144,8 @@ class Manifest(object): return href purl = list(purl) purl[0] = '' - href = urlunparse(purl) - path, frag = urldefrag(href) + href = urllib.parse.urlunparse(purl) + path, frag = urllib.parse.urldefrag(href) if not path: if frag: return '#'.join((self.href, frag)) @@ -1423,7 +1424,7 @@ class Guide(object): @property def item(self): """The manifest item associated with this reference.""" - path = urldefrag(self.href)[0] + path = uurllib.parse.rldefrag(self.href)[0] hrefs = self.oeb.manifest.hrefs return hrefs.get(path, None) @@ -1596,7 +1597,7 @@ class TOC(object): """ prev = None for node in list(self.nodes): - if prev and urldefrag(prev.href)[0] == urldefrag(node.href)[0]: + if prev and urllib.parse.urldefrag(prev.href)[0] == urllib.parse.urldefrag(node.href)[0]: self.nodes.remove(node) prev.nodes.append(node) else: @@ -1988,7 +1989,7 @@ class OEBBook(object): def rel_href(base_href, href): """Convert the URL provided in :param:`href` to a URL relative to the URL in :param:`base_href` """ - if urlparse(href).scheme: + if urllib.parse.urlparse(href).scheme: return href if '/' not in base_href: return href @@ -2004,7 +2005,7 @@ def rel_href(base_href, href): break if not base: return href - target, frag = urldefrag(href) + target, frag = urllib.parse.urldefrag(href) target = target.split('/') index = 0 for index in range(min(len(base), len(target))): diff --git a/ebook_converter/ebooks/oeb/polish/container.py b/ebook_converter/ebooks/oeb/polish/container.py index 0690658..9c08391 100644 --- a/ebook_converter/ebooks/oeb/polish/container.py +++ b/ebook_converter/ebooks/oeb/polish/container.py @@ -11,6 +11,7 @@ import uuid from collections import defaultdict from io import BytesIO from itertools import count +import urllib.parse from css_parser import getUrls, replaceUrls @@ -49,7 +50,6 @@ from ebook_converter.utils.logging import default_log from ebook_converter.utils.xml_parse import safe_xml_fromstring from ebook_converter.utils.zipfile import ZipFile from ebook_converter.polyglot.builtins import iteritems, unicode_type -from ebook_converter.polyglot.urllib import urlparse exists, join, relpath = os.path.exists, os.path.join, os.path.relpath @@ -107,7 +107,7 @@ def name_to_href(name, root, base=None, quote=urlquote): def href_to_name(href, root, base=None): base = root if base is None else os.path.dirname(name_to_abspath(base, root)) try: - purl = urlparse(href) + purl = urllib.parse.urlparse(href) except ValueError: return None if purl.scheme or not purl.path: diff --git a/ebook_converter/ebooks/oeb/polish/replace.py b/ebook_converter/ebooks/oeb/polish/replace.py index af211e7..be7090f 100644 --- a/ebook_converter/ebooks/oeb/polish/replace.py +++ b/ebook_converter/ebooks/oeb/polish/replace.py @@ -2,13 +2,13 @@ import codecs, shutil, os, posixpath from ebook_converter.polyglot.builtins import iteritems, itervalues from functools import partial from collections import Counter, defaultdict +import urllib.parse from ebook_converter import sanitize_file_name from ebook_converter.ebooks.chardet import strip_encoding_declarations from ebook_converter.ebooks.oeb.base import css_text from ebook_converter.ebooks.oeb.polish.css import iter_declarations, remove_property_value from ebook_converter.ebooks.oeb.polish.utils import extract -from ebook_converter.polyglot.urllib import urlparse, urlunparse __license__ = 'GPL v3' @@ -38,7 +38,7 @@ class LinkReplacer(object): nname = self.link_map.get(name, None) if not nname: return url - purl = urlparse(url) + purl = urllib.parse.urlparse(url) href = self.container.name_to_href(nname, self.base) if purl.fragment: nfrag = self.frag_map(name, purl.fragment) @@ -68,12 +68,12 @@ class IdReplacer(object): id_map = self.id_map.get(name) if id_map is None: return url - purl = urlparse(url) + purl = urllib.parse.urlparse(url) nfrag = id_map.get(purl.fragment) if nfrag is None: return url purl = purl._replace(fragment=nfrag) - href = urlunparse(purl) + href = urllib.parse.urlunparse(purl) if href != url: self.replaced = True return href @@ -89,7 +89,7 @@ class LinkRebaser(object): def __call__(self, url): if url and url.startswith('#'): return url - purl = urlparse(url) + purl = urllib.parse.urlparse(url) frag = purl.fragment name = self.container.href_to_name(url, self.old_name) if not name: diff --git a/ebook_converter/ebooks/oeb/polish/split.py b/ebook_converter/ebooks/oeb/polish/split.py index 10fe9cd..e4f7172 100644 --- a/ebook_converter/ebooks/oeb/polish/split.py +++ b/ebook_converter/ebooks/oeb/polish/split.py @@ -1,12 +1,12 @@ import copy, os, re from ebook_converter.polyglot.builtins import string_or_bytes +import urllib.parse from ebook_converter.ebooks.oeb.base import barename, XPNSMAP, XPath, OPF, XHTML, OEB_DOCS from ebook_converter.ebooks.oeb.polish.errors import MalformedMarkup from ebook_converter.ebooks.oeb.polish.toc import node_from_loc from ebook_converter.ebooks.oeb.polish.replace import LinkRebaser from ebook_converter.polyglot.builtins import iteritems, unicode_type -from ebook_converter.polyglot.urllib import urlparse __license__ = 'GPL v3' @@ -160,7 +160,7 @@ class SplitLinkReplacer(object): name = self.container.href_to_name(url, self.base) if name != self.top_name: return url - purl = urlparse(url) + purl = urllib.parse.urlparse(url) if purl.fragment and purl.fragment in self.bottom_anchors: url = self.container.name_to_href(self.bottom_name, self.base) + '#' + purl.fragment self.replaced = True @@ -225,7 +225,7 @@ def split(container, name, loc_or_xpath, before=True, totals=None): else: fname = container.href_to_name(url, name) if fname == name: - purl = urlparse(url) + purl = urllib.parse.urlparse(url) if purl.fragment in anchors_in_top: if r is root2: a.set('href', '%s#%s' % (container.name_to_href(name, bottom_name), purl.fragment)) @@ -310,7 +310,7 @@ class MergeLinkReplacer(object): amap = self.anchor_map.get(name, None) if amap is None: return url - purl = urlparse(url) + purl = urllib.parse.urlparse(url) frag = purl.fragment or '' frag = amap.get(frag, frag) url = self.container.name_to_href(self.master, self.base) + '#' + frag diff --git a/ebook_converter/ebooks/oeb/polish/toc.py b/ebook_converter/ebooks/oeb/polish/toc.py index 966368b..d51dfd5 100644 --- a/ebook_converter/ebooks/oeb/polish/toc.py +++ b/ebook_converter/ebooks/oeb/polish/toc.py @@ -3,6 +3,7 @@ from collections import Counter, OrderedDict from functools import partial from operator import itemgetter import pkg_resources +import urllib.parse from lxml import etree from lxml.builder import ElementMaker @@ -16,7 +17,6 @@ from ebook_converter.ebooks.oeb.polish.opf import set_guide_item, get_book_langu from ebook_converter.ebooks.oeb.polish.pretty import pretty_html_tree from ebook_converter.utils.localization import get_lang, canonicalize_lang, lang_as_iso639_1 from ebook_converter.polyglot.builtins import iteritems, unicode_type -from ebook_converter.polyglot.urllib import urlparse __license__ = 'GPL v3' @@ -150,7 +150,7 @@ def add_from_navpoint(container, navpoint, parent, ncx_name): href = content.get('src', None) if href: dest = container.href_to_name(href, base=ncx_name) - frag = urlparse(href).fragment or None + frag = urllib.parse.urlparse(href).fragment or None return parent.add(text or None, dest or None, frag or None) @@ -183,7 +183,7 @@ def parse_ncx(container, ncx_name): href = pt.xpath('descendant::*[calibre:lower-case(local-name()) = "content"]/@src') if href: dest = container.href_to_name(href[0], base=ncx_name) - frag = urlparse(href[0]).fragment or None + frag = urllib.parse.urlparse(href[0]).fragment or None toc_root.page_list.append({'dest': dest, 'pagenum': pagenum, 'frag': frag}) return toc_root @@ -195,7 +195,7 @@ def add_from_li(container, li, parent, nav_name): href = x.get('href') if href: dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name) - frag = urlparse(href).fragment or None + frag = urllib.parse.urlparse(href).fragment or None break return parent.add(text or None, dest or None, frag or None) diff --git a/ebook_converter/ebooks/oeb/reader.py b/ebook_converter/ebooks/oeb/reader.py index 749efa1..ed6d57d 100644 --- a/ebook_converter/ebooks/oeb/reader.py +++ b/ebook_converter/ebooks/oeb/reader.py @@ -3,6 +3,7 @@ Container-/OPF-based input OEBBook reader. """ import sys, os, uuid, copy, re, io from collections import defaultdict +import urllib.parse from lxml import etree @@ -23,7 +24,7 @@ from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.constants import __appname__, __version__ from ebook_converter import guess_type, xml_replace_entities from ebook_converter.polyglot.builtins import unicode_type -from ebook_converter.polyglot.urllib import unquote, urldefrag, urlparse +from ebook_converter.polyglot.urllib import unquote __all__ = ['OEBReader'] @@ -203,12 +204,12 @@ class OEBReader(object): for href in hrefs: if isinstance(href, bytes): href = href.decode('utf-8') - href, _ = urldefrag(href) + href, _ = urllib.parse.urldefrag(href) if not href: continue try: href = item.abshref(urlnormalize(href)) - scheme = urlparse(href).scheme + scheme = urllib.parse.urlparse(href).scheme except: self.oeb.log.exception( 'Skipping invalid href: %r'%href) @@ -221,9 +222,9 @@ class OEBReader(object): except: urls = [] for url in urls: - href, _ = urldefrag(url) + href, _ = urllib.parse.urldefrag(url) href = item.abshref(urlnormalize(href)) - scheme = urlparse(href).scheme + scheme = urllib.parse.urlparse(href).scheme if not scheme and href not in known: new.add(href) unchecked.clear() @@ -294,7 +295,7 @@ class OEBReader(object): # TODO: handle fallback chains continue for href in selector(item.data): - href, _ = urldefrag(href) + href, _ = urllib.parse.urldefrag(href) if not href: continue try: @@ -350,7 +351,7 @@ class OEBReader(object): manifest = self.oeb.manifest for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'): ref_href = elem.get('href') - path = urlnormalize(urldefrag(ref_href)[0]) + path = urlnormalize(urllib.parse.urldefrag(ref_href)[0]) if path not in manifest.hrefs: corrected_href = None for href in manifest.hrefs: @@ -393,7 +394,7 @@ class OEBReader(object): # This node is useless continue href = item.abshref(urlnormalize(href[0])) if href and href[0] else '' - path, _ = urldefrag(href) + path, _ = urllib.parse.urldefrag(href) if path and path not in self.oeb.manifest.hrefs: path = urlnormalize(path) if href and path not in self.oeb.manifest.hrefs: @@ -468,7 +469,7 @@ class OEBReader(object): href = site.get('href') if not title or not href: continue - path, _ = urldefrag(urlnormalize(href)) + path, _ = urllib.parse.urldefrag(urlnormalize(href)) if path not in self.oeb.manifest.hrefs: self.logger.warn('TOC reference %r not found' % href) continue @@ -480,7 +481,7 @@ class OEBReader(object): if 'toc' not in self.oeb.guide: return False self.log.debug('Reading TOC from HTML...') - itempath, frag = urldefrag(self.oeb.guide['toc'].href) + itempath, frag = urllib.parse.urldefrag(self.oeb.guide['toc'].href) item = self.oeb.manifest.hrefs[itempath] html = item.data if frag: @@ -496,7 +497,7 @@ class OEBReader(object): for anchor in xpath(html, './/h:a[@href]'): href = anchor.attrib['href'] href = item.abshref(urlnormalize(href)) - path, frag = urldefrag(href) + path, frag = urllib.parse.urldefrag(href) if path not in self.oeb.manifest.hrefs: continue title = xml2text(anchor) diff --git a/ebook_converter/ebooks/oeb/transforms/cover.py b/ebook_converter/ebooks/oeb/transforms/cover.py index de15853..fa46e42 100644 --- a/ebook_converter/ebooks/oeb/transforms/cover.py +++ b/ebook_converter/ebooks/oeb/transforms/cover.py @@ -1,4 +1,5 @@ import textwrap +import urllib.parse from ebook_converter import guess_type from ebook_converter.utils.imghdr import identify @@ -93,7 +94,6 @@ class CoverManager(object): return -1, -1 def insert_cover(self): - from ebook_converter.ebooks.oeb.base import urldefrag g, m = self.oeb.guide, self.oeb.manifest item = None href = None @@ -124,7 +124,7 @@ class CoverManager(object): data=safe_xml_fromstring(tp)) else: item = self.oeb.manifest.hrefs[ - urldefrag(self.oeb.guide['titlepage'].href)[0]] + urllib.parse.urldefrag(self.oeb.guide['titlepage'].href)[0]] if item is not None: self.oeb.spine.insert(0, item, True) if 'cover' not in self.oeb.guide.refs: diff --git a/ebook_converter/ebooks/oeb/transforms/filenames.py b/ebook_converter/ebooks/oeb/transforms/filenames.py index b168890..4b975ee 100644 --- a/ebook_converter/ebooks/oeb/transforms/filenames.py +++ b/ebook_converter/ebooks/oeb/transforms/filenames.py @@ -1,9 +1,9 @@ import posixpath +import urllib.parse from lxml import etree from ebook_converter.ebooks.oeb.base import rewrite_links, urlnormalize -from ebook_converter.polyglot.urllib import urldefrag, urlparse __license__ = 'GPL v3' @@ -38,7 +38,7 @@ class RenameFiles(object): # {{{ if self.oeb.guide: for ref in self.oeb.guide.values(): href = urlnormalize(ref.href) - href, frag = urldefrag(href) + href, frag = urllib.parse.urldefrag(href) replacement = self.rename_map.get(href, None) if replacement is not None: nhref = replacement @@ -52,7 +52,7 @@ class RenameFiles(object): # {{{ def fix_toc_entry(self, toc): if toc.href: href = urlnormalize(toc.href) - href, frag = urldefrag(href) + href, frag = urllib.parse.urldefrag(href) replacement = self.rename_map.get(href, None) if replacement is not None: @@ -66,11 +66,11 @@ class RenameFiles(object): # {{{ def url_replacer(self, orig_url): url = urlnormalize(orig_url) - parts = urlparse(url) + parts = urllib.parse.urlparse(url) if parts.scheme: # Only rewrite local URLs return orig_url - path, frag = urldefrag(url) + path, frag = urllib.parse.urldefrag(url) if self.renamed_items_map: orig_item = self.renamed_items_map.get(self.current_item.href, self.current_item) else: diff --git a/ebook_converter/ebooks/oeb/transforms/jacket.py b/ebook_converter/ebooks/oeb/transforms/jacket.py index eb4d0f9..6781ab2 100644 --- a/ebook_converter/ebooks/oeb/transforms/jacket.py +++ b/ebook_converter/ebooks/oeb/transforms/jacket.py @@ -2,10 +2,11 @@ import sys, os, re from xml.sax.saxutils import escape from string import Formatter import pkg_resources +import urllib.parse from ebook_converter import guess_type, strftime from ebook_converter.constants import iswindows -from ebook_converter.ebooks.oeb.base import XPath, XHTML_NS, XHTML, xml2text, urldefrag, urlnormalize +from ebook_converter.ebooks.oeb.base import XPath, XHTML_NS, XHTML, xml2text, urlnormalize from ebook_converter.library.comments import comments_to_html, markdown from ebook_converter.utils.date import is_date_undefined, as_local_time from ebook_converter.ebooks.chardet import strip_encoding_declarations @@ -73,7 +74,7 @@ class RemoveFirstImage(Base): self.log.warn('Could not find first image to remove') if deleted_item is not None: for item in list(self.oeb.toc): - href = urldefrag(item.href)[0] + href = urllib.parse.urldefrag(item.href)[0] if href == deleted_item.href: self.oeb.toc.remove(item) self.oeb.guide.remove_by_href(deleted_item.href) diff --git a/ebook_converter/ebooks/oeb/transforms/rasterize.py b/ebook_converter/ebooks/oeb/transforms/rasterize.py index f1bc251..60fc8a3 100644 --- a/ebook_converter/ebooks/oeb/transforms/rasterize.py +++ b/ebook_converter/ebooks/oeb/transforms/rasterize.py @@ -1,7 +1,9 @@ """ SVG rasterization transform. """ -import os, re +import os +import re +import urllib.parse # from PyQt5.Qt import ( # Qt, QByteArray, QBuffer, QIODevice, QColor, QImage, QPainter, QSvgRenderer) @@ -14,7 +16,6 @@ from ebook_converter.ebooks.oeb.stylizer import Stylizer from ebook_converter.ptempfile import PersistentTemporaryFile from ebook_converter.utils.imghdr import what from ebook_converter.polyglot.builtins import unicode_type -from ebook_converter.polyglot.urllib import urldefrag __license__ = 'GPL v3' @@ -114,7 +115,7 @@ class SVGRasterizer(object): hrefs = self.oeb.manifest.hrefs for elem in xpath(svg, '//svg:*[@xl:href]'): href = urlnormalize(elem.attrib[XLINK('href')]) - path = urldefrag(href)[0] + path = urllib.parse.urldefrag(href)[0] if not path: continue abshref = item.abshref(path) diff --git a/ebook_converter/ebooks/oeb/transforms/split.py b/ebook_converter/ebooks/oeb/transforms/split.py index c7d1d8a..79add5c 100644 --- a/ebook_converter/ebooks/oeb/transforms/split.py +++ b/ebook_converter/ebooks/oeb/transforms/split.py @@ -5,6 +5,7 @@ assumes a prior call to the flatcss transform. """ import os, functools, collections, re, copy from collections import OrderedDict +import urllib.parse from lxml.etree import XPath as _XPath from lxml import etree @@ -12,7 +13,7 @@ from lxml import etree from ebook_converter import as_unicode, force_unicode from ebook_converter.ebooks.epub import rules from ebook_converter.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES, - urldefrag, rewrite_links, XHTML, urlnormalize) + rewrite_links, XHTML, urlnormalize) from ebook_converter.ebooks.oeb.polish.split import do_split from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.urllib import unquote @@ -162,7 +163,7 @@ class Split(object): rewrite_links(item.data, self.rewrite_links) def rewrite_links(self, url): - href, frag = urldefrag(url) + href, frag = urllib.parse.urldefrag(url) try: href = self.current_item.abshref(href) except ValueError: @@ -453,7 +454,7 @@ class FlowSplitter(object): if self.oeb.guide: for ref in self.oeb.guide.values(): - href, frag = urldefrag(ref.href) + href, frag = urllib.parse.urldefrag(ref.href) if href == self.item.href: nhref = self.anchor_map[frag if frag else None] if frag: @@ -462,7 +463,7 @@ class FlowSplitter(object): def fix_toc_entry(toc): if toc.href: - href, frag = urldefrag(toc.href) + href, frag = urllib.parse.urldefrag(toc.href) if href == self.item.href: nhref = self.anchor_map[frag if frag else None] if frag: @@ -476,7 +477,7 @@ class FlowSplitter(object): if self.oeb.pages: for page in self.oeb.pages: - href, frag = urldefrag(page.href) + href, frag = urllib.parse.urldefrag(page.href) if href == self.item.href: nhref = self.anchor_map[frag if frag else None] if frag: diff --git a/ebook_converter/ebooks/oeb/transforms/structure.py b/ebook_converter/ebooks/oeb/transforms/structure.py index 8090ce2..9e6910c 100644 --- a/ebook_converter/ebooks/oeb/transforms/structure.py +++ b/ebook_converter/ebooks/oeb/transforms/structure.py @@ -1,4 +1,6 @@ -import re, uuid +import re +import uuid +import urllib.parse from lxml import etree from collections import OrderedDict, Counter @@ -6,7 +8,6 @@ from collections import OrderedDict, Counter from ebook_converter.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text, barename from ebook_converter.ebooks import ConversionError from ebook_converter.polyglot.builtins import itervalues, unicode_type -from ebook_converter.polyglot.urllib import urlparse __license__ = 'GPL v3' @@ -209,7 +210,7 @@ class DetectStructure(object): for a in XPath('//h:a[@href]')(item.data): href = a.get('href') try: - purl = urlparse(href) + purl = urllib.parse.urlparse(href) except ValueError: self.log.warning('Ignoring malformed URL:', href) continue diff --git a/ebook_converter/ebooks/oeb/transforms/trimmanifest.py b/ebook_converter/ebooks/oeb/transforms/trimmanifest.py index 1c19dfb..ae72397 100644 --- a/ebook_converter/ebooks/oeb/transforms/trimmanifest.py +++ b/ebook_converter/ebooks/oeb/transforms/trimmanifest.py @@ -1,9 +1,10 @@ """ OPF manifest trimming transform. """ +import urllib.parse + from ebook_converter.ebooks.oeb.base import CSS_MIME, OEB_DOCS from ebook_converter.ebooks.oeb.base import urlnormalize, iterlinks -from ebook_converter.polyglot.urllib import urldefrag __license__ = 'GPL v3' @@ -32,7 +33,7 @@ class ManifestTrimmer(object): elif item.value in oeb.manifest.ids: used.add(oeb.manifest.ids[item.value]) for ref in oeb.guide.values(): - path, _ = urldefrag(ref.href) + path, _ = urllib.parse.urldefrag(ref.href) if path in oeb.manifest.hrefs: used.add(oeb.manifest.hrefs[path]) # TOC items are required to be in the spine diff --git a/ebook_converter/ebooks/textile/functions.py b/ebook_converter/ebooks/textile/functions.py index f019815..2ba98ad 100644 --- a/ebook_converter/ebooks/textile/functions.py +++ b/ebook_converter/ebooks/textile/functions.py @@ -4,11 +4,12 @@ PyTextile A Humane Web Text Generator """ import re +import urllib.request +import urllib.parse import uuid from ebook_converter.utils.smartypants import smartyPants from ebook_converter.polyglot.builtins import unicode_type -from ebook_converter.polyglot.urllib import urlopen, urlparse # Last upstream version basis @@ -85,7 +86,7 @@ def getimagesize(url): try: p = ImageFile.Parser() - f = urlopen(url) + f = urllib.request.urlopen(url) while True: s = f.read(1024) if not s: @@ -777,11 +778,11 @@ class Textile(object): True """ - (scheme, netloc) = urlparse(url)[0:2] + (scheme, netloc) = urllib.parse.urlparse(url)[0:2] return not scheme and not netloc def relURL(self, url): - scheme = urlparse(url)[0] + scheme = urllib.parse.urlparse(url)[0] if self.restricted and scheme and scheme not in self.url_schemes: return '#' return url diff --git a/ebook_converter/polyglot/functools.py b/ebook_converter/polyglot/functools.py deleted file mode 100644 index 589999f..0000000 --- a/ebook_converter/polyglot/functools.py +++ /dev/null @@ -1,8 +0,0 @@ -from ebook_converter.polyglot.builtins import is_py3 -if is_py3: - from functools import lru_cache -else: - from backports.functools_lru_cache import lru_cache - - -lru_cache diff --git a/ebook_converter/polyglot/html_entities.py b/ebook_converter/polyglot/html_entities.py deleted file mode 100644 index 90464c0..0000000 --- a/ebook_converter/polyglot/html_entities.py +++ /dev/null @@ -1,10 +0,0 @@ -#!/usr/bin/env python2 -# vim:fileencoding=utf-8 -# License: GPL v3 Copyright: 2019, Eli Schwartz - -from ebook_converter.polyglot.builtins import is_py3 - -if is_py3: - from html.entities import name2codepoint -else: - from htmlentitydefs import name2codepoint diff --git a/ebook_converter/polyglot/urllib.py b/ebook_converter/polyglot/urllib.py index 97ca92a..6fb8f81 100644 --- a/ebook_converter/polyglot/urllib.py +++ b/ebook_converter/polyglot/urllib.py @@ -1,44 +1,19 @@ -from ebook_converter.polyglot.builtins import is_py3 +from urllib.request import (build_opener, getproxies, install_opener, + HTTPBasicAuthHandler, HTTPCookieProcessor, HTTPDigestAuthHandler, + url2pathname, urlopen, Request) +from urllib.parse import (parse_qs, quote, unquote as uq, quote_plus, urldefrag, + urlencode, urljoin, urlparse, urlunparse, urlsplit, urlunsplit) +from urllib.error import HTTPError, URLError -if is_py3: - from urllib.request import (build_opener, getproxies, install_opener, # noqa - HTTPBasicAuthHandler, HTTPCookieProcessor, HTTPDigestAuthHandler, # noqa - url2pathname, urlopen, Request) # noqa - from urllib.parse import (parse_qs, quote, unquote as uq, quote_plus, urldefrag, # noqa - urlencode, urljoin, urlparse, urlunparse, urlsplit, urlunsplit) # noqa - from urllib.error import HTTPError, URLError # noqa - - def unquote(x, encoding='utf-8', errors='replace'): - binary = isinstance(x, bytes) - if binary: - x = x.decode(encoding, errors) - ans = uq(x, encoding, errors) - if binary: - ans = ans.encode(encoding, errors) - return ans - -else: - from urllib import (getproxies, quote, unquote as uq, quote_plus, url2pathname, # noqa - urlencode) # noqa - from urllib2 import (build_opener, install_opener, HTTPBasicAuthHandler, # noqa - HTTPCookieProcessor, HTTPDigestAuthHandler, HTTPError, URLError, # noqa - urlopen, Request) # noqa - from urlparse import (parse_qs, urldefrag, urljoin, urlparse, urlunparse, # noqa - urlsplit, urlunsplit) # noqa - - def unquote(x, encoding='utf-8', errors='replace'): - # unquote must run on a bytestring and will return a bytestring - # If it runs on a unicode object, it returns a double encoded unicode - # string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8') - # and the latter is correct - binary = isinstance(x, bytes) - if not binary: - x = x.encode(encoding, errors) - ans = uq(x) - if not binary: - ans = ans.decode(encoding, errors) - return ans +def unquote(x, encoding='utf-8', errors='replace'): + binary = isinstance(x, bytes) + if binary: + x = x.decode(encoding, errors) + ans = uq(x, encoding, errors) + if binary: + ans = ans.encode(encoding, errors) + return ans def unquote_plus(x, encoding='utf-8', errors='replace'): diff --git a/ebook_converter/utils/cleantext.py b/ebook_converter/utils/cleantext.py index 101517e..67c8133 100644 --- a/ebook_converter/utils/cleantext.py +++ b/ebook_converter/utils/cleantext.py @@ -1,7 +1,7 @@ import re +import html.entities from ebook_converter.polyglot.builtins import codepoint_to_chr -from ebook_converter.polyglot.html_entities import name2codepoint from ebook_converter.constants import plugins, preferred_encoding @@ -77,7 +77,8 @@ def unescape(text, rm=False, rchar=''): else: # named entity try: - text = codepoint_to_chr(name2codepoint[text[1:-1]]) + text = codepoint_to_chr(html.entities + .name2codepoint[text[1:-1]]) except KeyError: pass if rm: diff --git a/ebook_converter/utils/ipc/__init__.py b/ebook_converter/utils/ipc/__init__.py index a14be7c..ec395a8 100644 --- a/ebook_converter/utils/ipc/__init__.py +++ b/ebook_converter/utils/ipc/__init__.py @@ -1,10 +1,16 @@ -import os, errno, sys -from threading import Thread +import errno +import functools +import os +import sys +import threading from ebook_converter import force_unicode -from ebook_converter.constants import iswindows, get_windows_username, islinux, filesystem_encoding, ispy3 +from ebook_converter.constants import filesystem_encoding +from ebook_converter.constants import get_windows_username +from ebook_converter.constants import islinux +from ebook_converter.constants import ispy3 +from ebook_converter.constants import iswindows from ebook_converter.utils.filenames import ascii_filename -from ebook_converter.polyglot.functools import lru_cache __license__ = 'GPL v3' @@ -24,7 +30,7 @@ def eintr_retry_call(func, *args, **kwargs): raise -@lru_cache() +@functools.lru_cache() def socket_address(which): if iswindows: ans = r'\\.\pipe\Calibre' + which @@ -58,12 +64,12 @@ def viewer_socket_address(): return socket_address('Viewer' if iswindows else 'viewer') -class RC(Thread): +class RC(threading.Thread): def __init__(self, print_error=True, socket_address=None): self.print_error = print_error self.socket_address = socket_address or gui_socket_address() - Thread.__init__(self) + threading.Thread.__init__(self) self.conn = None self.daemon = True