1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-04-24 15:11:30 +02:00

Removing is_py3 method and duplicated by urllib.

This commit is contained in:
2020-04-19 21:22:24 +02:00
parent b66cbd2c1e
commit ef7e2b10be
35 changed files with 267 additions and 254 deletions
+15 -9
View File
@@ -3,10 +3,18 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
import sys, os, re, time, random, warnings import math
import os
import pkg_resources import pkg_resources
import random
import re
import sys
import time
import urllib.parse
import urllib.request
import warnings
from ebook_converter.polyglot.builtins import codepoint_to_chr, unicode_type, hasenv, native_string_type from ebook_converter.polyglot.builtins import codepoint_to_chr, unicode_type, hasenv, native_string_type
from math import floor
from functools import partial from functools import partial
if not hasenv('CALIBRE_SHOW_DEPRECATION_WARNINGS'): if not hasenv('CALIBRE_SHOW_DEPRECATION_WARNINGS'):
@@ -276,8 +284,7 @@ def extract(path, dir):
def get_proxies(debug=True): def get_proxies(debug=True):
from polyglot.urllib import getproxies proxies = urllib.request.getproxies()
proxies = getproxies()
for key, proxy in list(proxies.items()): for key, proxy in list(proxies.items()):
if not proxy or '..' in proxy or key == 'auto': if not proxy or '..' in proxy or key == 'auto':
del proxies[key] del proxies[key]
@@ -338,10 +345,9 @@ def get_proxy_info(proxy_scheme, proxy_string):
is not available in the string. If an exception occurs parsing the string is not available in the string. If an exception occurs parsing the string
this method returns None. this method returns None.
''' '''
from polyglot.urllib import urlparse
try: try:
proxy_url = '%s://%s'%(proxy_scheme, proxy_string) proxy_url = '%s://%s'%(proxy_scheme, proxy_string)
urlinfo = urlparse(proxy_url) urlinfo = urllib.parse.urlparse(proxy_url)
ans = { ans = {
'scheme': urlinfo.scheme, 'scheme': urlinfo.scheme,
'hostname': urlinfo.hostname, 'hostname': urlinfo.hostname,
@@ -414,13 +420,13 @@ def fit_image(width, height, pwidth, pheight):
scaled = height > pheight or width > pwidth scaled = height > pheight or width > pwidth
if height > pheight: if height > pheight:
corrf = pheight / float(height) corrf = pheight / float(height)
width, height = floor(corrf*width), pheight width, height = math.floor(corrf*width), pheight
if width > pwidth: if width > pwidth:
corrf = pwidth / float(width) corrf = pwidth / float(width)
width, height = pwidth, floor(corrf*height) width, height = pwidth, math.floor(corrf*height)
if height > pheight: if height > pheight:
corrf = pheight / float(height) corrf = pheight / float(height)
width, height = floor(corrf*width), pheight width, height = math.floor(corrf*width), pheight
return scaled, int(width), int(height) return scaled, int(width), int(height)
@@ -2,7 +2,11 @@
CHM File decoding support CHM File decoding support
""" """
import os import os
from lxml import html
from ebook_converter.polyglot.urllib import unquote as _unquote
from ebook_converter.ebooks.oeb.base import urlquote
from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.customize.conversion import InputFormatPlugin from ebook_converter.customize.conversion import InputFormatPlugin
from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.constants import filesystem_encoding from ebook_converter.constants import filesystem_encoding
@@ -109,10 +113,7 @@ class CHMInput(InputFormatPlugin):
return oeb return oeb
def _create_html_root(self, hhcpath, log, encoding): def _create_html_root(self, hhcpath, log, encoding):
from lxml import html
from ebook_converter.polyglot.urllib import unquote as _unquote
from ebook_converter.ebooks.oeb.base import urlquote
from ebook_converter.ebooks.chardet import xml_to_unicode
hhcdata = self._read_file(hhcpath) hhcdata = self._read_file(hhcpath)
hhcdata = hhcdata.decode(encoding) hhcdata = hhcdata.decode(encoding)
hhcdata = xml_to_unicode(hhcdata, verbose=True, hhcdata = xml_to_unicode(hhcdata, verbose=True,
@@ -1,4 +1,7 @@
import os, shutil, re import os
import re
import shutil
import urllib.parse
from ebook_converter.customize.conversion import (OutputFormatPlugin, from ebook_converter.customize.conversion import (OutputFormatPlugin,
OptionRecommendation) OptionRecommendation)
@@ -514,7 +517,7 @@ class EPUBOutput(OutputFormatPlugin):
''' '''
Perform toc link transforms to alleviate slow loading. Perform toc link transforms to alleviate slow loading.
''' '''
from ebook_converter.ebooks.oeb.base import urldefrag, XPath from ebook_converter.ebooks.oeb.base import XPath
from ebook_converter.ebooks.oeb.polish.toc import item_at_top from ebook_converter.ebooks.oeb.polish.toc import item_at_top
def frag_is_at_top(root, frag): def frag_is_at_top(root, frag):
@@ -527,7 +530,7 @@ class EPUBOutput(OutputFormatPlugin):
def simplify_toc_entry(toc): def simplify_toc_entry(toc):
if toc.href: if toc.href:
href, frag = urldefrag(toc.href) href, frag = urllib.parse.urldefrag(toc.href)
if frag: if frag:
for x in self.oeb.spine: for x in self.oeb.spine:
if x.href == href: if x.href == href:
@@ -1,5 +1,8 @@
import re, tempfile, os import functools
from functools import partial import os
import re
import tempfile
import urllib.parse
from ebook_converter.constants import islinux, isbsd from ebook_converter.constants import islinux, isbsd
from ebook_converter.customize.conversion import (InputFormatPlugin, from ebook_converter.customize.conversion import (InputFormatPlugin,
@@ -97,7 +100,7 @@ class HTMLInput(InputFormatPlugin):
import uuid import uuid
from ebook_converter.ebooks.conversion.plumber import create_oebbook from ebook_converter.ebooks.conversion.plumber import create_oebbook
from ebook_converter.ebooks.oeb.base import (DirContainer, from ebook_converter.ebooks.oeb.base import (DirContainer,
rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES, rewrite_links, urlnormalize, BINARY_MIME, OEB_STYLES,
xpath, urlquote) xpath, urlquote)
from ebook_converter import guess_type from ebook_converter import guess_type
from ebook_converter.ebooks.oeb.transforms.metadata import \ from ebook_converter.ebooks.oeb.transforms.metadata import \
@@ -163,7 +166,7 @@ class HTMLInput(InputFormatPlugin):
path = path.lower() path = path.lower()
self.added_resources[path] = href self.added_resources[path] = href
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
self.urldefrag = urldefrag self.urldefrag = urllib.parse.urldefrag
self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
self.log('Rewriting HTML links') self.log('Rewriting HTML links')
@@ -176,7 +179,8 @@ class HTMLInput(InputFormatPlugin):
item = oeb.manifest.hrefs[href] item = oeb.manifest.hrefs[href]
except KeyError: except KeyError:
item = oeb.manifest.hrefs[urlnormalize(href)] item = oeb.manifest.hrefs[urlnormalize(href)]
rewrite_links(item.data, partial(self.resource_adder, base=dpath)) rewrite_links(item.data,
functools.partial(self.resource_adder, base=dpath))
for item in oeb.manifest.values(): for item in oeb.manifest.values():
if item.media_type in self.OEB_STYLES: if item.media_type in self.OEB_STYLES:
@@ -186,7 +190,7 @@ class HTMLInput(InputFormatPlugin):
dpath = os.path.dirname(path) dpath = os.path.dirname(path)
break break
css_parser.replaceUrls(item.data, css_parser.replaceUrls(item.data,
partial(self.resource_adder, base=dpath)) functools.partial(self.resource_adder, base=dpath))
toc = self.oeb.toc toc = self.oeb.toc
self.oeb.auto_generated_toc = True self.oeb.auto_generated_toc = True
@@ -242,7 +246,6 @@ class HTMLInput(InputFormatPlugin):
return link, frag return link, frag
def resource_adder(self, link_, base=None): def resource_adder(self, link_, base=None):
from ebook_converter.polyglot.urllib import quote
link, frag = self.link_to_local_path(link_, base=base) link, frag = self.link_to_local_path(link_, base=base)
if link is None: if link is None:
return link_ return link_
@@ -287,9 +290,9 @@ class HTMLInput(InputFormatPlugin):
# file, therefore we quote it here. # file, therefore we quote it here.
if isinstance(bhref, unicode_type): if isinstance(bhref, unicode_type):
bhref = bhref.encode('utf-8') bhref = bhref.encode('utf-8')
item.html_input_href = as_unicode(quote(bhref)) item.html_input_href = as_unicode(urllib.parse.quote(bhref))
if guessed in self.OEB_STYLES: if guessed in self.OEB_STYLES:
item.override_css_fetch = partial( item.override_css_fetch = functools.partial(
self.css_import_handler, os.path.dirname(link)) self.css_import_handler, os.path.dirname(link))
item.data item.data
self.added_resources[link] = href self.added_resources[link] = href
@@ -1,12 +1,17 @@
import os, re, shutil import os
from os.path import dirname, abspath, relpath as _relpath, exists, basename
import pkg_resources import pkg_resources
import re
import shutil
from lxml import etree
from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation
from ebook_converter import CurrentDir from ebook_converter import CurrentDir
from ebook_converter.ptempfile import PersistentTemporaryDirectory from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation
from ebook_converter.ebooks.oeb.base import element
from ebook_converter.polyglot.builtins import unicode_type from ebook_converter.polyglot.builtins import unicode_type
from ebook_converter.polyglot.urllib import unquote
from ebook_converter.ptempfile import PersistentTemporaryDirectory
from ebook_converter.utils.cleantext import clean_xml_chars
__license__ = 'GPL 3' __license__ = 'GPL 3'
__copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>' __copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
@@ -14,7 +19,7 @@ __docformat__ = 'restructuredtext en'
def relpath(*args): def relpath(*args):
return _relpath(*args).replace(os.sep, '/') return os.path.relpath(*args).replace(os.sep, '/')
class HTMLOutput(OutputFormatPlugin): class HTMLOutput(OutputFormatPlugin):
@@ -47,11 +52,7 @@ class HTMLOutput(OutputFormatPlugin):
''' '''
Generate table of contents Generate table of contents
''' '''
from lxml import etree
from ebook_converter.polyglot.urllib import unquote
from ebook_converter.ebooks.oeb.base import element
from ebook_converter.utils.cleantext import clean_xml_chars
with CurrentDir(output_dir): with CurrentDir(output_dir):
def build_node(current_node, parent=None): def build_node(current_node, parent=None):
if parent is None: if parent is None:
@@ -60,7 +61,8 @@ class HTMLOutput(OutputFormatPlugin):
parent = element(parent, ('ul')) parent = element(parent, ('ul'))
for node in current_node.nodes: for node in current_node.nodes:
point = element(parent, 'li') point = element(parent, 'li')
href = relpath(abspath(unquote(node.href)), dirname(ref_url)) href = relpath(os.path.abspath(unquote(node.href)),
os.path.dirname(ref_url))
if isinstance(href, bytes): if isinstance(href, bytes):
href = href.decode('utf-8') href = href.decode('utf-8')
link = element(point, 'a', href=clean_xml_chars(href)) link = element(point, 'a', href=clean_xml_chars(href))
@@ -131,10 +133,10 @@ class HTMLOutput(OutputFormatPlugin):
tempdir = os.path.realpath(PersistentTemporaryDirectory()) tempdir = os.path.realpath(PersistentTemporaryDirectory())
output_file = os.path.join(tempdir, output_file = os.path.join(tempdir,
basename(re.sub(r'\.zip', '', output_path)+'.html')) os.path.basename(re.sub(r'\.zip', '', output_path)+'.html'))
output_dir = re.sub(r'\.html', '', output_file)+'_files' output_dir = re.sub(r'\.html', '', output_file)+'_files'
if not exists(output_dir): if not os.path.exists(output_dir):
os.makedirs(output_dir) os.makedirs(output_dir)
css_path = output_dir+os.sep+'calibreHtmlOutBasicCss.css' css_path = output_dir+os.sep+'calibreHtmlOutBasicCss.css'
@@ -145,9 +147,10 @@ class HTMLOutput(OutputFormatPlugin):
html_toc = self.generate_html_toc(oeb_book, output_file, output_dir) html_toc = self.generate_html_toc(oeb_book, output_file, output_dir)
templite = Templite(template_html_index_data) templite = Templite(template_html_index_data)
nextLink = oeb_book.spine[0].href nextLink = oeb_book.spine[0].href
nextLink = relpath(output_dir+os.sep+nextLink, dirname(output_file)) nextLink = relpath(output_dir+os.sep+nextLink,
cssLink = relpath(abspath(css_path), dirname(output_file)) os.path.dirname(output_file))
tocUrl = relpath(output_file, dirname(output_file)) cssLink = relpath(os.path.abspath(css_path), os.path.dirname(output_file))
tocUrl = relpath(output_file, os.path.dirname(output_file))
t = templite.render(has_toc=bool(oeb_book.toc.count()), t = templite.render(has_toc=bool(oeb_book.toc.count()),
toc=html_toc, meta=meta, nextLink=nextLink, toc=html_toc, meta=meta, nextLink=nextLink,
tocUrl=tocUrl, cssLink=cssLink, tocUrl=tocUrl, cssLink=cssLink,
@@ -158,9 +161,9 @@ class HTMLOutput(OutputFormatPlugin):
with CurrentDir(output_dir): with CurrentDir(output_dir):
for item in oeb_book.manifest: for item in oeb_book.manifest:
path = abspath(unquote(item.href)) path = os.path.abspath(unquote(item.href))
dir = dirname(path) dir = os.path.dirname(path)
if not exists(dir): if not os.path.exists(dir):
os.makedirs(dir) os.makedirs(dir)
if item.spine_position is not None: if item.spine_position is not None:
with open(path, 'wb') as f: with open(path, 'wb') as f:
@@ -171,8 +174,8 @@ class HTMLOutput(OutputFormatPlugin):
item.unload_data_from_memory(memory=path) item.unload_data_from_memory(memory=path)
for item in oeb_book.spine: for item in oeb_book.spine:
path = abspath(unquote(item.href)) path = os.path.abspath(unquote(item.href))
dir = dirname(path) dir = os.path.dirname(path)
root = item.data.getroottree() root = item.data.getroottree()
# get & clean HTML <HEAD>-data # get & clean HTML <HEAD>-data
@@ -191,18 +194,18 @@ class HTMLOutput(OutputFormatPlugin):
# generate link to next page # generate link to next page
if item.spine_position+1 < len(oeb_book.spine): if item.spine_position+1 < len(oeb_book.spine):
nextLink = oeb_book.spine[item.spine_position+1].href nextLink = oeb_book.spine[item.spine_position+1].href
nextLink = relpath(abspath(nextLink), dir) nextLink = relpath(os.path.abspath(nextLink), dir)
else: else:
nextLink = None nextLink = None
# generate link to previous page # generate link to previous page
if item.spine_position > 0: if item.spine_position > 0:
prevLink = oeb_book.spine[item.spine_position-1].href prevLink = oeb_book.spine[item.spine_position-1].href
prevLink = relpath(abspath(prevLink), dir) prevLink = relpath(os.path.abspath(prevLink), dir)
else: else:
prevLink = None prevLink = None
cssLink = relpath(abspath(css_path), dir) cssLink = relpath(os.path.abspath(css_path), dir)
tocUrl = relpath(output_file, dir) tocUrl = relpath(output_file, dir)
firstContentPageLink = oeb_book.spine[0].href firstContentPageLink = oeb_book.spine[0].href
@@ -222,8 +225,8 @@ class HTMLOutput(OutputFormatPlugin):
item.unload_data_from_memory(memory=path) item.unload_data_from_memory(memory=path)
zfile = zipfile.ZipFile(output_path, "w") zfile = zipfile.ZipFile(output_path, "w")
zfile.add_dir(output_dir, basename(output_dir)) zfile.add_dir(output_dir, os.path.basename(output_dir))
zfile.write(output_file, basename(output_file), zipfile.ZIP_DEFLATED) zfile.write(output_file, os.path.basename(output_file), zipfile.ZIP_DEFLATED)
if opts.extract_to: if opts.extract_to:
if os.path.exists(opts.extract_to): if os.path.exists(opts.extract_to):
@@ -1,9 +1,14 @@
import os, re import os
import re
from lxml import etree
from ebook_converter.customize.conversion import (OutputFormatPlugin, from ebook_converter.customize.conversion import (OutputFormatPlugin,
OptionRecommendation) OptionRecommendation)
from ebook_converter import CurrentDir from ebook_converter import CurrentDir
from ebook_converter.polyglot.urllib import unquote
from ebook_converter.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME, OEB_STYLES
from ebook_converter.ebooks.oeb.normalize_css import condense_sheet
__license__ = 'GPL 3' __license__ = 'GPL 3'
@@ -21,14 +26,10 @@ class OEBOutput(OutputFormatPlugin):
recommendations = {('pretty_print', True, OptionRecommendation.HIGH)} recommendations = {('pretty_print', True, OptionRecommendation.HIGH)}
def convert(self, oeb_book, output_path, input_plugin, opts, log): def convert(self, oeb_book, output_path, input_plugin, opts, log):
from ebook_converter.polyglot.urllib import unquote
from lxml import etree
self.log, self.opts = log, opts self.log, self.opts = log, opts
if not os.path.exists(output_path): if not os.path.exists(output_path):
os.makedirs(output_path) os.makedirs(output_path)
from ebook_converter.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME, OEB_STYLES
from ebook_converter.ebooks.oeb.normalize_css import condense_sheet
with CurrentDir(output_path): with CurrentDir(output_path):
results = oeb_book.to_opf2(page_map=True) results = oeb_book.to_opf2(page_map=True)
for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME): for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME):
+7 -6
View File
@@ -1,9 +1,10 @@
import posixpath, re import posixpath
from uuid import uuid4 import re
import urllib.parse
import uuid
from ebook_converter.utils.filenames import ascii_text from ebook_converter.utils.filenames import ascii_text
from ebook_converter.polyglot.builtins import unicode_type from ebook_converter.polyglot.builtins import unicode_type
from ebook_converter.polyglot.urllib import urlparse
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -67,7 +68,7 @@ class LinksManager(object):
self.namespace = namespace self.namespace = namespace
self.log = log self.log = log
self.document_relationships = document_relationships self.document_relationships = document_relationships
self.top_anchor = unicode_type(uuid4().hex) self.top_anchor = unicode_type(uuid.uuid4().hex)
self.anchor_map = {} self.anchor_map = {}
self.used_bookmark_names = set() self.used_bookmark_names = set()
self.bmark_id = 0 self.bmark_id = 0
@@ -100,7 +101,7 @@ class LinksManager(object):
def serialize_hyperlink(self, parent, link): def serialize_hyperlink(self, parent, link):
item, url, tooltip = link item, url, tooltip = link
purl = urlparse(url) purl = urllib.parse.urlparse(url)
href = purl.path href = purl.path
def make_link(parent, anchor=None, id=None, tooltip=None): def make_link(parent, anchor=None, id=None, tooltip=None):
@@ -133,7 +134,7 @@ class LinksManager(object):
def process_toc_node(self, toc, level=0): def process_toc_node(self, toc, level=0):
href = toc.href href = toc.href
if href: if href:
purl = urlparse(href) purl = urllib.parse.urlparse(href)
href = purl.path href = purl.path
if href in self.document_hrefs: if href in self.document_hrefs:
key = (href, purl.fragment or self.top_anchor) key = (href, purl.fragment or self.top_anchor)
+5 -3
View File
@@ -1,8 +1,11 @@
""" """
Transform OEB content into FB2 markup Transform OEB content into FB2 markup
""" """
import re, textwrap, uuid
from datetime import datetime from datetime import datetime
import re
import textwrap
import urllib.parse
import uuid
from lxml import etree from lxml import etree
@@ -14,7 +17,6 @@ from ebook_converter.utils.img import save_cover_data_to
from ebook_converter.ebooks.oeb.base import urlnormalize from ebook_converter.ebooks.oeb.base import urlnormalize
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes
from ebook_converter.polyglot.binary import as_base64_unicode from ebook_converter.polyglot.binary import as_base64_unicode
from ebook_converter.polyglot.urllib import urlparse
__license__ = 'GPL 3' __license__ = 'GPL 3'
@@ -508,7 +510,7 @@ class FB2MLizer(object):
tags.append('p') tags.append('p')
if tag == 'a' and elem_tree.attrib.get('href', None): if tag == 'a' and elem_tree.attrib.get('href', None):
# Handle only external links for now # Handle only external links for now
if urlparse(elem_tree.attrib['href']).netloc: if urllib.parse.urlparse(elem_tree.attrib['href']).netloc:
p_txt, p_tag = self.ensure_p() p_txt, p_tag = self.ensure_p()
fb2_out += p_txt fb2_out += p_txt
tags += p_tag tags += p_tag
+11 -11
View File
@@ -1,14 +1,17 @@
""" """
Input plugin for HTML or OPF ebooks. Input plugin for HTML or OPF ebooks.
""" """
import os, re, sys, errno as gerrno import errno
import os
import re
import sys
import urllib.parse
from ebook_converter.ebooks.oeb.base import urlunquote from ebook_converter.ebooks.oeb.base import urlunquote
from ebook_converter.ebooks.chardet import detect_xml_encoding from ebook_converter.ebooks.chardet import detect_xml_encoding
from ebook_converter.constants import iswindows from ebook_converter.constants import iswindows
from ebook_converter import unicode_path, as_unicode, replace_entities from ebook_converter import unicode_path, as_unicode, replace_entities
from ebook_converter.polyglot.builtins import is_py3, unicode_type from ebook_converter.polyglot.builtins import unicode_type
from ebook_converter.polyglot.urllib import urlparse, urlunparse
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -29,7 +32,7 @@ class Link(object):
if iswindows and path.startswith('/'): if iswindows and path.startswith('/'):
path = path[1:] path = path[1:]
isabs = True isabs = True
path = urlunparse(('', '', path, url.params, url.query, '')) path = urllib.parse.urlunparse(('', '', path, url.params, url.query, ''))
path = urlunquote(path) path = urlunquote(path)
if isabs or os.path.isabs(path): if isabs or os.path.isabs(path):
return path return path
@@ -43,7 +46,7 @@ class Link(object):
''' '''
assert isinstance(url, unicode_type) and isinstance(base, unicode_type) assert isinstance(url, unicode_type) and isinstance(base, unicode_type)
self.url = url self.url = url
self.parsed_url = urlparse(self.url) self.parsed_url = urllib.parse.urlparse(self.url)
self.is_local = self.parsed_url.scheme in ('', 'file') self.is_local = self.parsed_url.scheme in ('', 'file')
self.is_internal = self.is_local and not bool(self.parsed_url.path) self.is_internal = self.is_local and not bool(self.parsed_url.path)
self.path = None self.path = None
@@ -62,16 +65,13 @@ class Link(object):
def __str__(self): def __str__(self):
return 'Link: %s --> %s'%(self.url, self.path) return 'Link: %s --> %s'%(self.url, self.path)
if not is_py3:
__unicode__ = __str__
class IgnoreFile(Exception): class IgnoreFile(Exception):
def __init__(self, msg, errno): def __init__(self, msg, err_no):
Exception.__init__(self, msg) Exception.__init__(self, msg)
self.doesnt_exist = errno == gerrno.ENOENT self.errno = err_no
self.errno = errno self.doesnt_exist = err_no == errno.ENOENT
class HTMLFile(object): class HTMLFile(object):
+2 -2
View File
@@ -3,6 +3,7 @@ Transform OEB content into a single (more or less) HTML file.
""" """
import os import os
import re import re
import urllib.parse
from functools import partial from functools import partial
from lxml import html from lxml import html
@@ -13,7 +14,6 @@ from ebook_converter.ebooks.oeb.base import (
from ebook_converter.ebooks.oeb.stylizer import Stylizer from ebook_converter.ebooks.oeb.stylizer import Stylizer
from ebook_converter.utils.logging import default_log from ebook_converter.utils.logging import default_log
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes, as_bytes from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes, as_bytes
from ebook_converter.polyglot.urllib import urldefrag
__license__ = 'GPL 3' __license__ = 'GPL 3'
@@ -101,7 +101,7 @@ class OEB2HTML(object):
for attr in attribs: for attr in attribs:
if attr in link_attrs: if attr in link_attrs:
href = item.abshref(attribs[attr]) href = item.abshref(attribs[attr])
href, id = urldefrag(href) href, id = urllib.parse.urldefrag(href)
if href in self.base_hrefs: if href in self.base_hrefs:
self.get_link_id(href, id) self.get_link_id(href, id)
@@ -12,6 +12,7 @@ import os
import re import re
import sys import sys
import tempfile import tempfile
import urllib.parse
from collections import deque from collections import deque
from functools import partial from functools import partial
from itertools import chain from itertools import chain
@@ -37,7 +38,7 @@ from ebook_converter.ebooks.lrf.pylrs.pylrs import (
) )
from ebook_converter.ptempfile import PersistentTemporaryFile from ebook_converter.ptempfile import PersistentTemporaryFile
from ebook_converter.polyglot.builtins import getcwd, itervalues, string_or_bytes, unicode_type from ebook_converter.polyglot.builtins import getcwd, itervalues, string_or_bytes, unicode_type
from ebook_converter.polyglot.urllib import unquote, urlparse from ebook_converter.polyglot.urllib import unquote
from PIL import Image as PILImage from PIL import Image as PILImage
@@ -51,7 +52,7 @@ def update_css(ncss, ocss):
def munge_paths(basepath, url): def munge_paths(basepath, url):
purl = urlparse(unquote(url),) purl = urllib.parse.urlparse(unquote(url),)
path, fragment = purl[2], purl[5] path, fragment = purl[2], purl[5]
if path: if path:
path = path.replace('/', os.sep) path = path.replace('/', os.sep)
@@ -1471,7 +1472,8 @@ class HTMLConverter(object):
pass pass
elif tagname == 'a' and self.link_levels >= 0: elif tagname == 'a' and self.link_levels >= 0:
if tag.has_attr('href') and not self.link_exclude.match(tag['href']): if tag.has_attr('href') and not self.link_exclude.match(tag['href']):
if urlparse(tag['href'])[0] not in ('', 'file'): if urllib.parse.urlparse(tag['href'])[0] not in ('',
'file'):
self.process_children(tag, tag_css, tag_pseudo_css) self.process_children(tag, tag_css, tag_pseudo_css)
else: else:
path = munge_paths(self.target_prefix, tag['href'])[0] path = munge_paths(self.target_prefix, tag['href'])[0]
@@ -1513,7 +1515,7 @@ class HTMLConverter(object):
dropcaps = tag.get('class') in ('libprs500_dropcaps', ['libprs500_dropcaps']) dropcaps = tag.get('class') in ('libprs500_dropcaps', ['libprs500_dropcaps'])
self.process_image(path, tag_css, width, height, self.process_image(path, tag_css, width, height,
dropcaps=dropcaps, rescale=True) dropcaps=dropcaps, rescale=True)
elif not urlparse(tag['src'])[0]: elif not urllib.parse.urlparse(tag['src'])[0]:
self.log.warn('Could not find image: '+tag['src']) self.log.warn('Could not find image: '+tag['src'])
else: else:
self.log.debug("Failed to process: %s"%unicode_type(tag)) self.log.debug("Failed to process: %s"%unicode_type(tag))
+8 -5
View File
@@ -2,12 +2,15 @@
Provides abstraction for metadata reading.writing from a variety of ebook Provides abstraction for metadata reading.writing from a variety of ebook
formats. formats.
""" """
import os, sys, re import os
import re
import sys
import urllib.parse
from ebook_converter import relpath, guess_type, prints, force_unicode from ebook_converter import relpath, guess_type, prints, force_unicode
from ebook_converter.utils.config_base import tweaks from ebook_converter.utils.config_base import tweaks
from ebook_converter.polyglot.builtins import codepoint_to_chr, unicode_type, getcwd, iteritems, itervalues, as_unicode from ebook_converter.polyglot.builtins import codepoint_to_chr, unicode_type, getcwd, iteritems, itervalues, as_unicode
from ebook_converter.polyglot.urllib import quote, unquote, urlparse from ebook_converter.polyglot.urllib import unquote
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -241,7 +244,7 @@ class Resource(object):
path = path.decode(sys.getfilesystemencoding()) path = path.decode(sys.getfilesystemencoding())
self.path = path self.path = path
else: else:
url = urlparse(href_or_path) url = urllib.parse.urlparse(href_or_path)
if url[0] not in ('', 'file'): if url[0] not in ('', 'file'):
self._href = href_or_path self._href = href_or_path
else: else:
@@ -268,7 +271,7 @@ class Resource(object):
if self.path is None: if self.path is None:
return self._href return self._href
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment
frag = '#'+as_unicode(quote(f)) if self.fragment else '' frag = '#'+as_unicode(urllib.parse.quote(f)) if self.fragment else ''
if self.path == basedir: if self.path == basedir:
return ''+frag return ''+frag
try: try:
@@ -277,7 +280,7 @@ class Resource(object):
rpath = self.path rpath = self.path
if isinstance(rpath, unicode_type): if isinstance(rpath, unicode_type):
rpath = rpath.encode('utf-8') rpath = rpath.encode('utf-8')
return as_unicode(quote(rpath.replace(os.sep, '/')))+frag return as_unicode(urllib.parse.quote(rpath.replace(os.sep, '/')))+frag
def set_basedir(self, path): def set_basedir(self, path):
self._basedir = path self._basedir = path
+13 -3
View File
@@ -1,7 +1,17 @@
""" """
lxml based OPF parser. lxml based OPF parser.
""" """
import re, sys, unittest, functools, os, uuid, glob, io, json, copy import copy
import functools
import glob
import io
import json
import os
import re
import sys
import unittest
import urllib.parse
import uuid
from lxml import etree from lxml import etree
@@ -18,7 +28,7 @@ from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
from ebook_converter.utils.config import tweaks from ebook_converter.utils.config import tweaks
from ebook_converter.utils.xml_parse import safe_xml_fromstring from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.polyglot.builtins import iteritems, unicode_type, getcwd from ebook_converter.polyglot.builtins import iteritems, unicode_type, getcwd
from ebook_converter.polyglot.urllib import unquote, urlparse from ebook_converter.polyglot.urllib import unquote
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -76,7 +86,7 @@ class Resource(object): # {{{
self.path = path self.path = path
else: else:
href_or_path = href_or_path href_or_path = href_or_path
url = urlparse(href_or_path) url = urllib.parse.urlparse(href_or_path)
if url[0] not in ('', 'file'): if url[0] not in ('', 'file'):
self._href = href_or_path self._href = href_or_path
else: else:
+11 -7
View File
@@ -1,5 +1,9 @@
import os, glob, re, functools import collections
from collections import Counter import functools
import glob
import os
import re
import urllib.parse
from lxml import etree from lxml import etree
from lxml.builder import ElementMaker from lxml.builder import ElementMaker
@@ -9,7 +13,7 @@ from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.utils.xml_parse import safe_xml_fromstring from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.utils.cleantext import clean_xml_chars from ebook_converter.utils.cleantext import clean_xml_chars
from ebook_converter.polyglot.builtins import unicode_type, getcwd from ebook_converter.polyglot.builtins import unicode_type, getcwd
from ebook_converter.polyglot.urllib import unquote, urlparse from ebook_converter.polyglot.urllib import unquote
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -30,7 +34,7 @@ def parse_html_toc(data):
data = xml_to_unicode(data, strip_encoding_pats=True, resolve_entities=True)[0] data = xml_to_unicode(data, strip_encoding_pats=True, resolve_entities=True)[0]
root = parse(clean_xml_chars(data), maybe_xhtml=True, keep_doctype=False, sanitize_names=True) root = parse(clean_xml_chars(data), maybe_xhtml=True, keep_doctype=False, sanitize_names=True)
for a in root.xpath('//*[@href and local-name()="a"]'): for a in root.xpath('//*[@href and local-name()="a"]'):
purl = urlparse(unquote(a.get('href'))) purl = urllib.parse.urlparse(unquote(a.get('href')))
href, fragment = purl[2], purl[5] href, fragment = purl[2], purl[5]
if not fragment: if not fragment:
fragment = None fragment = None
@@ -142,7 +146,7 @@ class TOC(list):
if toc is not None: if toc is not None:
if toc.lower() not in ('ncx', 'ncxtoc'): if toc.lower() not in ('ncx', 'ncxtoc'):
toc = urlparse(unquote(toc))[2] toc = urllib.parse.urlparse(unquote(toc))[2]
toc = toc.replace('/', os.sep) toc = toc.replace('/', os.sep)
if not os.path.isabs(toc): if not os.path.isabs(toc):
toc = os.path.join(self.base_path, toc) toc = os.path.join(self.base_path, toc)
@@ -209,7 +213,7 @@ class TOC(list):
if content and text: if content and text:
content = content[0] content = content[0]
# if get_attr(content, attr='src'): # if get_attr(content, attr='src'):
purl = urlparse(content.get('src')) purl = urllib.parse.urlparse(content.get('src'))
href, fragment = unquote(purl[2]), unquote(purl[5]) href, fragment = unquote(purl[2]), unquote(purl[5])
nd = dest.add_item(href, fragment, text) nd = dest.add_item(href, fragment, text)
nd.play_order = play_order nd.play_order = play_order
@@ -253,7 +257,7 @@ class TOC(list):
navmap = E.navMap() navmap = E.navMap()
root.append(navmap) root.append(navmap)
root.set('{http://www.w3.org/XML/1998/namespace}lang', 'en') root.set('{http://www.w3.org/XML/1998/namespace}lang', 'en')
c = Counter() c = collections.Counter()
def navpoint(parent, np): def navpoint(parent, np):
text = np.text text = np.text
+17 -15
View File
@@ -1,7 +1,10 @@
import struct, re, os import collections
from collections import namedtuple import itertools
from itertools import repeat import os
from uuid import uuid4 import re
import struct
import urllib.parse
import uuid
from lxml import etree from lxml import etree
@@ -16,21 +19,20 @@ from ebook_converter.ebooks.mobi.utils import read_font_record
from ebook_converter.ebooks.oeb.parse_utils import parse_html from ebook_converter.ebooks.oeb.parse_utils import parse_html
from ebook_converter.ebooks.oeb.base import XPath, XHTML, xml2text from ebook_converter.ebooks.oeb.base import XPath, XHTML, xml2text
from ebook_converter.polyglot.builtins import unicode_type, getcwd, as_unicode from ebook_converter.polyglot.builtins import unicode_type, getcwd, as_unicode
from ebook_converter.polyglot.urllib import urldefrag
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>' __copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
Part = namedtuple('Part', Part = collections.namedtuple('Part',
'num type filename start end aid') 'num type filename start end aid')
Elem = namedtuple('Elem', Elem = collections.namedtuple('Elem',
'insert_pos toc_text file_number sequence_number start_pos ' 'insert_pos toc_text file_number sequence_number start_pos '
'length') 'length')
FlowInfo = namedtuple('FlowInfo', FlowInfo = collections.namedtuple('FlowInfo',
'type format dir fname') 'type format dir fname')
# locate beginning and ending positions of tag with specific aid attribute # locate beginning and ending positions of tag with specific aid attribute
@@ -81,7 +83,7 @@ class Mobi8Reader(object):
def __call__(self): def __call__(self):
self.mobi6_reader.check_for_drm() self.mobi6_reader.check_for_drm()
self.aid_anchor_suffix = uuid4().hex.encode('utf-8') self.aid_anchor_suffix = uuid.uuid4().hex.encode('utf-8')
bh = self.mobi6_reader.book_header bh = self.mobi6_reader.book_header
if self.mobi6_reader.kf8_type == 'joint': if self.mobi6_reader.kf8_type == 'joint':
offset = self.mobi6_reader.kf8_boundary + 2 offset = self.mobi6_reader.kf8_boundary + 2
@@ -127,7 +129,7 @@ class Mobi8Reader(object):
if self.header.skelidx != NULL_INDEX: if self.header.skelidx != NULL_INDEX:
table = read_index(self.kf8_sections, self.header.skelidx, table = read_index(self.kf8_sections, self.header.skelidx,
self.header.codec)[0] self.header.codec)[0]
File = namedtuple('File', File = collections.namedtuple('File',
'file_number name divtbl_count start_position length') 'file_number name divtbl_count start_position length')
for i, text in enumerate(table): for i, text in enumerate(table):
@@ -149,7 +151,7 @@ class Mobi8Reader(object):
if self.header.othidx != NULL_INDEX: if self.header.othidx != NULL_INDEX:
table, cncx = read_index(self.kf8_sections, self.header.othidx, table, cncx = read_index(self.kf8_sections, self.header.othidx,
self.header.codec) self.header.codec)
Item = namedtuple('Item', Item = collections.namedtuple('Item',
'type title pos_fid') 'type title pos_fid')
for i, ref_type in enumerate(table): for i, ref_type in enumerate(table):
@@ -222,7 +224,7 @@ class Mobi8Reader(object):
self.parts.append(skeleton) self.parts.append(skeleton)
if divcnt < 1: if divcnt < 1:
# Empty file # Empty file
aidtext = unicode_type(uuid4()) aidtext = unicode_type(uuid.uuid4())
filename = aidtext + '.html' filename = aidtext + '.html'
self.partinfo.append(Part(skelnum, 'text', filename, skelpos, self.partinfo.append(Part(skelnum, 'text', filename, skelpos,
baseptr, aidtext)) baseptr, aidtext))
@@ -293,7 +295,7 @@ class Mobi8Reader(object):
for part in self.partinfo: for part in self.partinfo:
if pos >= part.start and pos < part.end: if pos >= part.start and pos < part.end:
return part return part
return Part(*repeat(None, len(Part._fields))) return Part(*itertools.repeat(None, len(Part._fields)))
def get_id_tag_by_pos_fid(self, posfid, offset): def get_id_tag_by_pos_fid(self, posfid, offset):
# first convert kindle:pos:fid and offset info to position in file # first convert kindle:pos:fid and offset info to position in file
@@ -475,7 +477,7 @@ class Mobi8Reader(object):
for ref in guide: for ref in guide:
if ref.type == 'toc': if ref.type == 'toc':
href = ref.href() href = ref.href()
href, frag = urldefrag(href) href, frag = urllib.parse.urldefrag(href)
if os.path.exists(href.replace('/', os.sep)): if os.path.exists(href.replace('/', os.sep)):
try: try:
toc = self.read_inline_toc(href, frag) toc = self.read_inline_toc(href, frag)
@@ -554,7 +556,7 @@ class Mobi8Reader(object):
if reached and elem.tag == XHTML('a') and elem.get('href', if reached and elem.tag == XHTML('a') and elem.get('href',
False): False):
href = elem.get('href') href = elem.get('href')
href, frag = urldefrag(href) href, frag = urllib.parse.urldefrag(href)
href = base_href + '/' + href href = base_href + '/' + href
text = xml2text(elem).strip() text = xml2text(elem).strip()
if (text, href, frag) in seen: if (text, href, frag) in seen:
@@ -1,7 +1,8 @@
import collections
import io
import re import re
import unicodedata import unicodedata
from collections import defaultdict import urllib.parse
from io import BytesIO
from ebook_converter.ebooks.mobi.mobiml import MBP_NS from ebook_converter.ebooks.mobi.mobiml import MBP_NS
from ebook_converter.ebooks.mobi.utils import is_guide_ref_start from ebook_converter.ebooks.mobi.utils import is_guide_ref_start
@@ -9,7 +10,6 @@ from ebook_converter.ebooks.oeb.base import (
OEB_DOCS, XHTML, XHTML_NS, XML_NS, namespace, prefixname, urlnormalize OEB_DOCS, XHTML, XHTML_NS, XML_NS, namespace, prefixname, urlnormalize
) )
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes
from ebook_converter.polyglot.urllib import urldefrag
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -17,12 +17,12 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en' __docformat__ = 'restructuredtext en'
class Buf(BytesIO): class Buf(io.BytesIO):
def write(self, x): def write(self, x):
if isinstance(x, unicode_type): if isinstance(x, unicode_type):
x = x.encode('utf-8') x = x.encode('utf-8')
BytesIO.write(self, x) io.BytesIO.write(self, x)
class Serializer(object): class Serializer(object):
@@ -63,7 +63,7 @@ class Serializer(object):
# Mapping of hrefs (urlnormalized) to a list of offsets into the buffer # Mapping of hrefs (urlnormalized) to a list of offsets into the buffer
# where filepos="..." elements are written corresponding to links that # where filepos="..." elements are written corresponding to links that
# point to the href. This is used at the end to fill in the correct values. # point to the href. This is used at the end to fill in the correct values.
self.href_offsets = defaultdict(list) self.href_offsets = collections.defaultdict(list)
# List of offsets in the buffer of non linear items in the spine. These # List of offsets in the buffer of non linear items in the spine. These
# become uncrossable breaks in the MOBI # become uncrossable breaks in the MOBI
@@ -81,7 +81,7 @@ class Serializer(object):
item.is_article_start = item.is_article_end = False item.is_article_start = item.is_article_end = False
def spine_item(tocitem): def spine_item(tocitem):
href = urldefrag(tocitem.href)[0] href = urllib.parse.urldefrag(tocitem.href)[0]
for item in self.oeb.spine: for item in self.oeb.spine:
if item.href == href: if item.href == href:
return item return item
@@ -157,7 +157,7 @@ class Serializer(object):
hrefs = self.oeb.manifest.hrefs hrefs = self.oeb.manifest.hrefs
buf.write(b'<guide>') buf.write(b'<guide>')
for ref in self.oeb.guide.values(): for ref in self.oeb.guide.values():
path = urldefrag(ref.href)[0] path = urllib.parse.urldefrag(ref.href)[0]
if path not in hrefs or hrefs[path].media_type not in OEB_DOCS: if path not in hrefs or hrefs[path].media_type not in OEB_DOCS:
continue continue
@@ -188,7 +188,7 @@ class Serializer(object):
''' '''
hrefs = self.oeb.manifest.hrefs hrefs = self.oeb.manifest.hrefs
try: try:
path, frag = urldefrag(urlnormalize(href)) path, frag = urllib.parse.urldefrag(urlnormalize(href))
except ValueError: except ValueError:
# Unparseable URL # Unparseable URL
return False return False
@@ -382,7 +382,7 @@ class Serializer(object):
if href not in id_offsets: if href not in id_offsets:
self.logger.warn('Hyperlink target %r not found' % href) self.logger.warn('Hyperlink target %r not found' % href)
# Link to the top of the document, better than just ignoring # Link to the top of the document, better than just ignoring
href, _ = urldefrag(href) href, _ = urllib.parse.urldefrag(href)
if href in self.id_offsets: if href in self.id_offsets:
ioff = self.id_offsets[href] ioff = self.id_offsets[href]
if is_start: if is_start:
+15 -14
View File
@@ -5,6 +5,7 @@ import os, re, logging, sys, numbers
from collections import defaultdict from collections import defaultdict
from itertools import count from itertools import count
from operator import attrgetter from operator import attrgetter
import urllib.parse
from lxml import etree, html from lxml import etree, html
from ebook_converter import force_unicode from ebook_converter import force_unicode
@@ -17,7 +18,7 @@ from ebook_converter.ebooks.oeb.parse_utils import barename, XHTML_NS, namespace
from ebook_converter.utils.cleantext import clean_xml_chars from ebook_converter.utils.cleantext import clean_xml_chars
from ebook_converter.utils.short_uuid import uuid4 from ebook_converter.utils.short_uuid import uuid4
from ebook_converter.polyglot.builtins import iteritems, unicode_type, string_or_bytes, itervalues, codepoint_to_chr from ebook_converter.polyglot.builtins import iteritems, unicode_type, string_or_bytes, itervalues, codepoint_to_chr
from ebook_converter.polyglot.urllib import unquote as urlunquote, urldefrag, urljoin, urlparse, urlunparse from ebook_converter.polyglot.urllib import unquote as urlunquote
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -185,13 +186,13 @@ def iterlinks(root, find_links_in_css=True):
if attrib in attribs: if attrib in attribs:
value = el.get(attrib) value = el.get(attrib)
if codebase is not None: if codebase is not None:
value = urljoin(codebase, value) value = urllib.parse.urljoin(codebase, value)
yield (el, attrib, value, 0) yield (el, attrib, value, 0)
if 'archive' in attribs: if 'archive' in attribs:
for match in _archive_re.finditer(el.get('archive')): for match in _archive_re.finditer(el.get('archive')):
value = match.group(0) value = match.group(0)
if codebase is not None: if codebase is not None:
value = urljoin(codebase, value) value = urllib.parse.urljoin(codebase, value)
yield (el, 'archive', value, match.start()) yield (el, 'archive', value, match.start())
else: else:
for attr in attribs: for attr in attribs:
@@ -217,7 +218,7 @@ def make_links_absolute(root, base_url):
came from) came from)
''' '''
def link_repl(href): def link_repl(href):
return urljoin(base_url, href) return urllib.parse.urljoin(base_url, href)
rewrite_links(root, link_repl) rewrite_links(root, link_repl)
@@ -463,16 +464,16 @@ def urlnormalize(href):
characters URL quoted. characters URL quoted.
""" """
try: try:
parts = urlparse(href) parts = urllib.parse.urlparse(href)
except ValueError as e: except ValueError as e:
raise ValueError('Failed to parse the URL: %r with underlying error: %s' % (href, as_unicode(e))) raise ValueError('Failed to parse the URL: %r with underlying error: %s' % (href, as_unicode(e)))
if not parts.scheme or parts.scheme == 'file': if not parts.scheme or parts.scheme == 'file':
path, frag = urldefrag(href) path, frag = urllib.parse.urldefrag(href)
parts = ('', '', path, '', '', frag) parts = ('', '', path, '', '', frag)
parts = (part.replace('\\', '/') for part in parts) parts = (part.replace('\\', '/') for part in parts)
parts = (urlunquote(part) for part in parts) parts = (urlunquote(part) for part in parts)
parts = (urlquote(part) for part in parts) parts = (urlquote(part) for part in parts)
return urlunparse(parts) return urllib.parse.urlunparse(parts)
def extract(elem): def extract(elem):
@@ -1135,7 +1136,7 @@ class Manifest(object):
relative to this manifest item to a book-absolute reference. relative to this manifest item to a book-absolute reference.
""" """
try: try:
purl = urlparse(href) purl = urllib.parse.urlparse(href)
except ValueError: except ValueError:
return href return href
scheme = purl.scheme scheme = purl.scheme
@@ -1143,8 +1144,8 @@ class Manifest(object):
return href return href
purl = list(purl) purl = list(purl)
purl[0] = '' purl[0] = ''
href = urlunparse(purl) href = urllib.parse.urlunparse(purl)
path, frag = urldefrag(href) path, frag = urllib.parse.urldefrag(href)
if not path: if not path:
if frag: if frag:
return '#'.join((self.href, frag)) return '#'.join((self.href, frag))
@@ -1423,7 +1424,7 @@ class Guide(object):
@property @property
def item(self): def item(self):
"""The manifest item associated with this reference.""" """The manifest item associated with this reference."""
path = urldefrag(self.href)[0] path = uurllib.parse.rldefrag(self.href)[0]
hrefs = self.oeb.manifest.hrefs hrefs = self.oeb.manifest.hrefs
return hrefs.get(path, None) return hrefs.get(path, None)
@@ -1596,7 +1597,7 @@ class TOC(object):
""" """
prev = None prev = None
for node in list(self.nodes): for node in list(self.nodes):
if prev and urldefrag(prev.href)[0] == urldefrag(node.href)[0]: if prev and urllib.parse.urldefrag(prev.href)[0] == urllib.parse.urldefrag(node.href)[0]:
self.nodes.remove(node) self.nodes.remove(node)
prev.nodes.append(node) prev.nodes.append(node)
else: else:
@@ -1988,7 +1989,7 @@ class OEBBook(object):
def rel_href(base_href, href): def rel_href(base_href, href):
"""Convert the URL provided in :param:`href` to a URL relative to the URL """Convert the URL provided in :param:`href` to a URL relative to the URL
in :param:`base_href` """ in :param:`base_href` """
if urlparse(href).scheme: if urllib.parse.urlparse(href).scheme:
return href return href
if '/' not in base_href: if '/' not in base_href:
return href return href
@@ -2004,7 +2005,7 @@ def rel_href(base_href, href):
break break
if not base: if not base:
return href return href
target, frag = urldefrag(href) target, frag = urllib.parse.urldefrag(href)
target = target.split('/') target = target.split('/')
index = 0 index = 0
for index in range(min(len(base), len(target))): for index in range(min(len(base), len(target))):
@@ -11,6 +11,7 @@ import uuid
from collections import defaultdict from collections import defaultdict
from io import BytesIO from io import BytesIO
from itertools import count from itertools import count
import urllib.parse
from css_parser import getUrls, replaceUrls from css_parser import getUrls, replaceUrls
@@ -49,7 +50,6 @@ from ebook_converter.utils.logging import default_log
from ebook_converter.utils.xml_parse import safe_xml_fromstring from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.utils.zipfile import ZipFile from ebook_converter.utils.zipfile import ZipFile
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems, unicode_type
from ebook_converter.polyglot.urllib import urlparse
exists, join, relpath = os.path.exists, os.path.join, os.path.relpath exists, join, relpath = os.path.exists, os.path.join, os.path.relpath
@@ -107,7 +107,7 @@ def name_to_href(name, root, base=None, quote=urlquote):
def href_to_name(href, root, base=None): def href_to_name(href, root, base=None):
base = root if base is None else os.path.dirname(name_to_abspath(base, root)) base = root if base is None else os.path.dirname(name_to_abspath(base, root))
try: try:
purl = urlparse(href) purl = urllib.parse.urlparse(href)
except ValueError: except ValueError:
return None return None
if purl.scheme or not purl.path: if purl.scheme or not purl.path:
+5 -5
View File
@@ -2,13 +2,13 @@ import codecs, shutil, os, posixpath
from ebook_converter.polyglot.builtins import iteritems, itervalues from ebook_converter.polyglot.builtins import iteritems, itervalues
from functools import partial from functools import partial
from collections import Counter, defaultdict from collections import Counter, defaultdict
import urllib.parse
from ebook_converter import sanitize_file_name from ebook_converter import sanitize_file_name
from ebook_converter.ebooks.chardet import strip_encoding_declarations from ebook_converter.ebooks.chardet import strip_encoding_declarations
from ebook_converter.ebooks.oeb.base import css_text from ebook_converter.ebooks.oeb.base import css_text
from ebook_converter.ebooks.oeb.polish.css import iter_declarations, remove_property_value from ebook_converter.ebooks.oeb.polish.css import iter_declarations, remove_property_value
from ebook_converter.ebooks.oeb.polish.utils import extract from ebook_converter.ebooks.oeb.polish.utils import extract
from ebook_converter.polyglot.urllib import urlparse, urlunparse
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -38,7 +38,7 @@ class LinkReplacer(object):
nname = self.link_map.get(name, None) nname = self.link_map.get(name, None)
if not nname: if not nname:
return url return url
purl = urlparse(url) purl = urllib.parse.urlparse(url)
href = self.container.name_to_href(nname, self.base) href = self.container.name_to_href(nname, self.base)
if purl.fragment: if purl.fragment:
nfrag = self.frag_map(name, purl.fragment) nfrag = self.frag_map(name, purl.fragment)
@@ -68,12 +68,12 @@ class IdReplacer(object):
id_map = self.id_map.get(name) id_map = self.id_map.get(name)
if id_map is None: if id_map is None:
return url return url
purl = urlparse(url) purl = urllib.parse.urlparse(url)
nfrag = id_map.get(purl.fragment) nfrag = id_map.get(purl.fragment)
if nfrag is None: if nfrag is None:
return url return url
purl = purl._replace(fragment=nfrag) purl = purl._replace(fragment=nfrag)
href = urlunparse(purl) href = urllib.parse.urlunparse(purl)
if href != url: if href != url:
self.replaced = True self.replaced = True
return href return href
@@ -89,7 +89,7 @@ class LinkRebaser(object):
def __call__(self, url): def __call__(self, url):
if url and url.startswith('#'): if url and url.startswith('#'):
return url return url
purl = urlparse(url) purl = urllib.parse.urlparse(url)
frag = purl.fragment frag = purl.fragment
name = self.container.href_to_name(url, self.old_name) name = self.container.href_to_name(url, self.old_name)
if not name: if not name:
+4 -4
View File
@@ -1,12 +1,12 @@
import copy, os, re import copy, os, re
from ebook_converter.polyglot.builtins import string_or_bytes from ebook_converter.polyglot.builtins import string_or_bytes
import urllib.parse
from ebook_converter.ebooks.oeb.base import barename, XPNSMAP, XPath, OPF, XHTML, OEB_DOCS from ebook_converter.ebooks.oeb.base import barename, XPNSMAP, XPath, OPF, XHTML, OEB_DOCS
from ebook_converter.ebooks.oeb.polish.errors import MalformedMarkup from ebook_converter.ebooks.oeb.polish.errors import MalformedMarkup
from ebook_converter.ebooks.oeb.polish.toc import node_from_loc from ebook_converter.ebooks.oeb.polish.toc import node_from_loc
from ebook_converter.ebooks.oeb.polish.replace import LinkRebaser from ebook_converter.ebooks.oeb.polish.replace import LinkRebaser
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems, unicode_type
from ebook_converter.polyglot.urllib import urlparse
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -160,7 +160,7 @@ class SplitLinkReplacer(object):
name = self.container.href_to_name(url, self.base) name = self.container.href_to_name(url, self.base)
if name != self.top_name: if name != self.top_name:
return url return url
purl = urlparse(url) purl = urllib.parse.urlparse(url)
if purl.fragment and purl.fragment in self.bottom_anchors: if purl.fragment and purl.fragment in self.bottom_anchors:
url = self.container.name_to_href(self.bottom_name, self.base) + '#' + purl.fragment url = self.container.name_to_href(self.bottom_name, self.base) + '#' + purl.fragment
self.replaced = True self.replaced = True
@@ -225,7 +225,7 @@ def split(container, name, loc_or_xpath, before=True, totals=None):
else: else:
fname = container.href_to_name(url, name) fname = container.href_to_name(url, name)
if fname == name: if fname == name:
purl = urlparse(url) purl = urllib.parse.urlparse(url)
if purl.fragment in anchors_in_top: if purl.fragment in anchors_in_top:
if r is root2: if r is root2:
a.set('href', '%s#%s' % (container.name_to_href(name, bottom_name), purl.fragment)) a.set('href', '%s#%s' % (container.name_to_href(name, bottom_name), purl.fragment))
@@ -310,7 +310,7 @@ class MergeLinkReplacer(object):
amap = self.anchor_map.get(name, None) amap = self.anchor_map.get(name, None)
if amap is None: if amap is None:
return url return url
purl = urlparse(url) purl = urllib.parse.urlparse(url)
frag = purl.fragment or '' frag = purl.fragment or ''
frag = amap.get(frag, frag) frag = amap.get(frag, frag)
url = self.container.name_to_href(self.master, self.base) + '#' + frag url = self.container.name_to_href(self.master, self.base) + '#' + frag
+4 -4
View File
@@ -3,6 +3,7 @@ from collections import Counter, OrderedDict
from functools import partial from functools import partial
from operator import itemgetter from operator import itemgetter
import pkg_resources import pkg_resources
import urllib.parse
from lxml import etree from lxml import etree
from lxml.builder import ElementMaker from lxml.builder import ElementMaker
@@ -16,7 +17,6 @@ from ebook_converter.ebooks.oeb.polish.opf import set_guide_item, get_book_langu
from ebook_converter.ebooks.oeb.polish.pretty import pretty_html_tree from ebook_converter.ebooks.oeb.polish.pretty import pretty_html_tree
from ebook_converter.utils.localization import get_lang, canonicalize_lang, lang_as_iso639_1 from ebook_converter.utils.localization import get_lang, canonicalize_lang, lang_as_iso639_1
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems, unicode_type
from ebook_converter.polyglot.urllib import urlparse
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -150,7 +150,7 @@ def add_from_navpoint(container, navpoint, parent, ncx_name):
href = content.get('src', None) href = content.get('src', None)
if href: if href:
dest = container.href_to_name(href, base=ncx_name) dest = container.href_to_name(href, base=ncx_name)
frag = urlparse(href).fragment or None frag = urllib.parse.urlparse(href).fragment or None
return parent.add(text or None, dest or None, frag or None) return parent.add(text or None, dest or None, frag or None)
@@ -183,7 +183,7 @@ def parse_ncx(container, ncx_name):
href = pt.xpath('descendant::*[calibre:lower-case(local-name()) = "content"]/@src') href = pt.xpath('descendant::*[calibre:lower-case(local-name()) = "content"]/@src')
if href: if href:
dest = container.href_to_name(href[0], base=ncx_name) dest = container.href_to_name(href[0], base=ncx_name)
frag = urlparse(href[0]).fragment or None frag = urllib.parse.urlparse(href[0]).fragment or None
toc_root.page_list.append({'dest': dest, 'pagenum': pagenum, 'frag': frag}) toc_root.page_list.append({'dest': dest, 'pagenum': pagenum, 'frag': frag})
return toc_root return toc_root
@@ -195,7 +195,7 @@ def add_from_li(container, li, parent, nav_name):
href = x.get('href') href = x.get('href')
if href: if href:
dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name) dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name)
frag = urlparse(href).fragment or None frag = urllib.parse.urlparse(href).fragment or None
break break
return parent.add(text or None, dest or None, frag or None) return parent.add(text or None, dest or None, frag or None)
+12 -11
View File
@@ -3,6 +3,7 @@ Container-/OPF-based input OEBBook reader.
""" """
import sys, os, uuid, copy, re, io import sys, os, uuid, copy, re, io
from collections import defaultdict from collections import defaultdict
import urllib.parse
from lxml import etree from lxml import etree
@@ -23,7 +24,7 @@ from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.constants import __appname__, __version__ from ebook_converter.constants import __appname__, __version__
from ebook_converter import guess_type, xml_replace_entities from ebook_converter import guess_type, xml_replace_entities
from ebook_converter.polyglot.builtins import unicode_type from ebook_converter.polyglot.builtins import unicode_type
from ebook_converter.polyglot.urllib import unquote, urldefrag, urlparse from ebook_converter.polyglot.urllib import unquote
__all__ = ['OEBReader'] __all__ = ['OEBReader']
@@ -203,12 +204,12 @@ class OEBReader(object):
for href in hrefs: for href in hrefs:
if isinstance(href, bytes): if isinstance(href, bytes):
href = href.decode('utf-8') href = href.decode('utf-8')
href, _ = urldefrag(href) href, _ = urllib.parse.urldefrag(href)
if not href: if not href:
continue continue
try: try:
href = item.abshref(urlnormalize(href)) href = item.abshref(urlnormalize(href))
scheme = urlparse(href).scheme scheme = urllib.parse.urlparse(href).scheme
except: except:
self.oeb.log.exception( self.oeb.log.exception(
'Skipping invalid href: %r'%href) 'Skipping invalid href: %r'%href)
@@ -221,9 +222,9 @@ class OEBReader(object):
except: except:
urls = [] urls = []
for url in urls: for url in urls:
href, _ = urldefrag(url) href, _ = urllib.parse.urldefrag(url)
href = item.abshref(urlnormalize(href)) href = item.abshref(urlnormalize(href))
scheme = urlparse(href).scheme scheme = urllib.parse.urlparse(href).scheme
if not scheme and href not in known: if not scheme and href not in known:
new.add(href) new.add(href)
unchecked.clear() unchecked.clear()
@@ -294,7 +295,7 @@ class OEBReader(object):
# TODO: handle fallback chains # TODO: handle fallback chains
continue continue
for href in selector(item.data): for href in selector(item.data):
href, _ = urldefrag(href) href, _ = urllib.parse.urldefrag(href)
if not href: if not href:
continue continue
try: try:
@@ -350,7 +351,7 @@ class OEBReader(object):
manifest = self.oeb.manifest manifest = self.oeb.manifest
for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'): for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
ref_href = elem.get('href') ref_href = elem.get('href')
path = urlnormalize(urldefrag(ref_href)[0]) path = urlnormalize(urllib.parse.urldefrag(ref_href)[0])
if path not in manifest.hrefs: if path not in manifest.hrefs:
corrected_href = None corrected_href = None
for href in manifest.hrefs: for href in manifest.hrefs:
@@ -393,7 +394,7 @@ class OEBReader(object):
# This node is useless # This node is useless
continue continue
href = item.abshref(urlnormalize(href[0])) if href and href[0] else '' href = item.abshref(urlnormalize(href[0])) if href and href[0] else ''
path, _ = urldefrag(href) path, _ = urllib.parse.urldefrag(href)
if path and path not in self.oeb.manifest.hrefs: if path and path not in self.oeb.manifest.hrefs:
path = urlnormalize(path) path = urlnormalize(path)
if href and path not in self.oeb.manifest.hrefs: if href and path not in self.oeb.manifest.hrefs:
@@ -468,7 +469,7 @@ class OEBReader(object):
href = site.get('href') href = site.get('href')
if not title or not href: if not title or not href:
continue continue
path, _ = urldefrag(urlnormalize(href)) path, _ = urllib.parse.urldefrag(urlnormalize(href))
if path not in self.oeb.manifest.hrefs: if path not in self.oeb.manifest.hrefs:
self.logger.warn('TOC reference %r not found' % href) self.logger.warn('TOC reference %r not found' % href)
continue continue
@@ -480,7 +481,7 @@ class OEBReader(object):
if 'toc' not in self.oeb.guide: if 'toc' not in self.oeb.guide:
return False return False
self.log.debug('Reading TOC from HTML...') self.log.debug('Reading TOC from HTML...')
itempath, frag = urldefrag(self.oeb.guide['toc'].href) itempath, frag = urllib.parse.urldefrag(self.oeb.guide['toc'].href)
item = self.oeb.manifest.hrefs[itempath] item = self.oeb.manifest.hrefs[itempath]
html = item.data html = item.data
if frag: if frag:
@@ -496,7 +497,7 @@ class OEBReader(object):
for anchor in xpath(html, './/h:a[@href]'): for anchor in xpath(html, './/h:a[@href]'):
href = anchor.attrib['href'] href = anchor.attrib['href']
href = item.abshref(urlnormalize(href)) href = item.abshref(urlnormalize(href))
path, frag = urldefrag(href) path, frag = urllib.parse.urldefrag(href)
if path not in self.oeb.manifest.hrefs: if path not in self.oeb.manifest.hrefs:
continue continue
title = xml2text(anchor) title = xml2text(anchor)
@@ -1,4 +1,5 @@
import textwrap import textwrap
import urllib.parse
from ebook_converter import guess_type from ebook_converter import guess_type
from ebook_converter.utils.imghdr import identify from ebook_converter.utils.imghdr import identify
@@ -93,7 +94,6 @@ class CoverManager(object):
return -1, -1 return -1, -1
def insert_cover(self): def insert_cover(self):
from ebook_converter.ebooks.oeb.base import urldefrag
g, m = self.oeb.guide, self.oeb.manifest g, m = self.oeb.guide, self.oeb.manifest
item = None item = None
href = None href = None
@@ -124,7 +124,7 @@ class CoverManager(object):
data=safe_xml_fromstring(tp)) data=safe_xml_fromstring(tp))
else: else:
item = self.oeb.manifest.hrefs[ item = self.oeb.manifest.hrefs[
urldefrag(self.oeb.guide['titlepage'].href)[0]] urllib.parse.urldefrag(self.oeb.guide['titlepage'].href)[0]]
if item is not None: if item is not None:
self.oeb.spine.insert(0, item, True) self.oeb.spine.insert(0, item, True)
if 'cover' not in self.oeb.guide.refs: if 'cover' not in self.oeb.guide.refs:
@@ -1,9 +1,9 @@
import posixpath import posixpath
import urllib.parse
from lxml import etree from lxml import etree
from ebook_converter.ebooks.oeb.base import rewrite_links, urlnormalize from ebook_converter.ebooks.oeb.base import rewrite_links, urlnormalize
from ebook_converter.polyglot.urllib import urldefrag, urlparse
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -38,7 +38,7 @@ class RenameFiles(object): # {{{
if self.oeb.guide: if self.oeb.guide:
for ref in self.oeb.guide.values(): for ref in self.oeb.guide.values():
href = urlnormalize(ref.href) href = urlnormalize(ref.href)
href, frag = urldefrag(href) href, frag = urllib.parse.urldefrag(href)
replacement = self.rename_map.get(href, None) replacement = self.rename_map.get(href, None)
if replacement is not None: if replacement is not None:
nhref = replacement nhref = replacement
@@ -52,7 +52,7 @@ class RenameFiles(object): # {{{
def fix_toc_entry(self, toc): def fix_toc_entry(self, toc):
if toc.href: if toc.href:
href = urlnormalize(toc.href) href = urlnormalize(toc.href)
href, frag = urldefrag(href) href, frag = urllib.parse.urldefrag(href)
replacement = self.rename_map.get(href, None) replacement = self.rename_map.get(href, None)
if replacement is not None: if replacement is not None:
@@ -66,11 +66,11 @@ class RenameFiles(object): # {{{
def url_replacer(self, orig_url): def url_replacer(self, orig_url):
url = urlnormalize(orig_url) url = urlnormalize(orig_url)
parts = urlparse(url) parts = urllib.parse.urlparse(url)
if parts.scheme: if parts.scheme:
# Only rewrite local URLs # Only rewrite local URLs
return orig_url return orig_url
path, frag = urldefrag(url) path, frag = urllib.parse.urldefrag(url)
if self.renamed_items_map: if self.renamed_items_map:
orig_item = self.renamed_items_map.get(self.current_item.href, self.current_item) orig_item = self.renamed_items_map.get(self.current_item.href, self.current_item)
else: else:
@@ -2,10 +2,11 @@ import sys, os, re
from xml.sax.saxutils import escape from xml.sax.saxutils import escape
from string import Formatter from string import Formatter
import pkg_resources import pkg_resources
import urllib.parse
from ebook_converter import guess_type, strftime from ebook_converter import guess_type, strftime
from ebook_converter.constants import iswindows from ebook_converter.constants import iswindows
from ebook_converter.ebooks.oeb.base import XPath, XHTML_NS, XHTML, xml2text, urldefrag, urlnormalize from ebook_converter.ebooks.oeb.base import XPath, XHTML_NS, XHTML, xml2text, urlnormalize
from ebook_converter.library.comments import comments_to_html, markdown from ebook_converter.library.comments import comments_to_html, markdown
from ebook_converter.utils.date import is_date_undefined, as_local_time from ebook_converter.utils.date import is_date_undefined, as_local_time
from ebook_converter.ebooks.chardet import strip_encoding_declarations from ebook_converter.ebooks.chardet import strip_encoding_declarations
@@ -73,7 +74,7 @@ class RemoveFirstImage(Base):
self.log.warn('Could not find first image to remove') self.log.warn('Could not find first image to remove')
if deleted_item is not None: if deleted_item is not None:
for item in list(self.oeb.toc): for item in list(self.oeb.toc):
href = urldefrag(item.href)[0] href = urllib.parse.urldefrag(item.href)[0]
if href == deleted_item.href: if href == deleted_item.href:
self.oeb.toc.remove(item) self.oeb.toc.remove(item)
self.oeb.guide.remove_by_href(deleted_item.href) self.oeb.guide.remove_by_href(deleted_item.href)
@@ -1,7 +1,9 @@
""" """
SVG rasterization transform. SVG rasterization transform.
""" """
import os, re import os
import re
import urllib.parse
# from PyQt5.Qt import ( # from PyQt5.Qt import (
# Qt, QByteArray, QBuffer, QIODevice, QColor, QImage, QPainter, QSvgRenderer) # Qt, QByteArray, QBuffer, QIODevice, QColor, QImage, QPainter, QSvgRenderer)
@@ -14,7 +16,6 @@ from ebook_converter.ebooks.oeb.stylizer import Stylizer
from ebook_converter.ptempfile import PersistentTemporaryFile from ebook_converter.ptempfile import PersistentTemporaryFile
from ebook_converter.utils.imghdr import what from ebook_converter.utils.imghdr import what
from ebook_converter.polyglot.builtins import unicode_type from ebook_converter.polyglot.builtins import unicode_type
from ebook_converter.polyglot.urllib import urldefrag
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -114,7 +115,7 @@ class SVGRasterizer(object):
hrefs = self.oeb.manifest.hrefs hrefs = self.oeb.manifest.hrefs
for elem in xpath(svg, '//svg:*[@xl:href]'): for elem in xpath(svg, '//svg:*[@xl:href]'):
href = urlnormalize(elem.attrib[XLINK('href')]) href = urlnormalize(elem.attrib[XLINK('href')])
path = urldefrag(href)[0] path = urllib.parse.urldefrag(href)[0]
if not path: if not path:
continue continue
abshref = item.abshref(path) abshref = item.abshref(path)
@@ -5,6 +5,7 @@ assumes a prior call to the flatcss transform.
""" """
import os, functools, collections, re, copy import os, functools, collections, re, copy
from collections import OrderedDict from collections import OrderedDict
import urllib.parse
from lxml.etree import XPath as _XPath from lxml.etree import XPath as _XPath
from lxml import etree from lxml import etree
@@ -12,7 +13,7 @@ from lxml import etree
from ebook_converter import as_unicode, force_unicode from ebook_converter import as_unicode, force_unicode
from ebook_converter.ebooks.epub import rules from ebook_converter.ebooks.epub import rules
from ebook_converter.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES, from ebook_converter.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES,
urldefrag, rewrite_links, XHTML, urlnormalize) rewrite_links, XHTML, urlnormalize)
from ebook_converter.ebooks.oeb.polish.split import do_split from ebook_converter.ebooks.oeb.polish.split import do_split
from ebook_converter.polyglot.builtins import iteritems, unicode_type from ebook_converter.polyglot.builtins import iteritems, unicode_type
from ebook_converter.polyglot.urllib import unquote from ebook_converter.polyglot.urllib import unquote
@@ -162,7 +163,7 @@ class Split(object):
rewrite_links(item.data, self.rewrite_links) rewrite_links(item.data, self.rewrite_links)
def rewrite_links(self, url): def rewrite_links(self, url):
href, frag = urldefrag(url) href, frag = urllib.parse.urldefrag(url)
try: try:
href = self.current_item.abshref(href) href = self.current_item.abshref(href)
except ValueError: except ValueError:
@@ -453,7 +454,7 @@ class FlowSplitter(object):
if self.oeb.guide: if self.oeb.guide:
for ref in self.oeb.guide.values(): for ref in self.oeb.guide.values():
href, frag = urldefrag(ref.href) href, frag = urllib.parse.urldefrag(ref.href)
if href == self.item.href: if href == self.item.href:
nhref = self.anchor_map[frag if frag else None] nhref = self.anchor_map[frag if frag else None]
if frag: if frag:
@@ -462,7 +463,7 @@ class FlowSplitter(object):
def fix_toc_entry(toc): def fix_toc_entry(toc):
if toc.href: if toc.href:
href, frag = urldefrag(toc.href) href, frag = urllib.parse.urldefrag(toc.href)
if href == self.item.href: if href == self.item.href:
nhref = self.anchor_map[frag if frag else None] nhref = self.anchor_map[frag if frag else None]
if frag: if frag:
@@ -476,7 +477,7 @@ class FlowSplitter(object):
if self.oeb.pages: if self.oeb.pages:
for page in self.oeb.pages: for page in self.oeb.pages:
href, frag = urldefrag(page.href) href, frag = urllib.parse.urldefrag(page.href)
if href == self.item.href: if href == self.item.href:
nhref = self.anchor_map[frag if frag else None] nhref = self.anchor_map[frag if frag else None]
if frag: if frag:
@@ -1,4 +1,6 @@
import re, uuid import re
import uuid
import urllib.parse
from lxml import etree from lxml import etree
from collections import OrderedDict, Counter from collections import OrderedDict, Counter
@@ -6,7 +8,6 @@ from collections import OrderedDict, Counter
from ebook_converter.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text, barename from ebook_converter.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text, barename
from ebook_converter.ebooks import ConversionError from ebook_converter.ebooks import ConversionError
from ebook_converter.polyglot.builtins import itervalues, unicode_type from ebook_converter.polyglot.builtins import itervalues, unicode_type
from ebook_converter.polyglot.urllib import urlparse
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -209,7 +210,7 @@ class DetectStructure(object):
for a in XPath('//h:a[@href]')(item.data): for a in XPath('//h:a[@href]')(item.data):
href = a.get('href') href = a.get('href')
try: try:
purl = urlparse(href) purl = urllib.parse.urlparse(href)
except ValueError: except ValueError:
self.log.warning('Ignoring malformed URL:', href) self.log.warning('Ignoring malformed URL:', href)
continue continue
@@ -1,9 +1,10 @@
""" """
OPF manifest trimming transform. OPF manifest trimming transform.
""" """
import urllib.parse
from ebook_converter.ebooks.oeb.base import CSS_MIME, OEB_DOCS from ebook_converter.ebooks.oeb.base import CSS_MIME, OEB_DOCS
from ebook_converter.ebooks.oeb.base import urlnormalize, iterlinks from ebook_converter.ebooks.oeb.base import urlnormalize, iterlinks
from ebook_converter.polyglot.urllib import urldefrag
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -32,7 +33,7 @@ class ManifestTrimmer(object):
elif item.value in oeb.manifest.ids: elif item.value in oeb.manifest.ids:
used.add(oeb.manifest.ids[item.value]) used.add(oeb.manifest.ids[item.value])
for ref in oeb.guide.values(): for ref in oeb.guide.values():
path, _ = urldefrag(ref.href) path, _ = urllib.parse.urldefrag(ref.href)
if path in oeb.manifest.hrefs: if path in oeb.manifest.hrefs:
used.add(oeb.manifest.hrefs[path]) used.add(oeb.manifest.hrefs[path])
# TOC items are required to be in the spine # TOC items are required to be in the spine
+5 -4
View File
@@ -4,11 +4,12 @@ PyTextile
A Humane Web Text Generator A Humane Web Text Generator
""" """
import re import re
import urllib.request
import urllib.parse
import uuid import uuid
from ebook_converter.utils.smartypants import smartyPants from ebook_converter.utils.smartypants import smartyPants
from ebook_converter.polyglot.builtins import unicode_type from ebook_converter.polyglot.builtins import unicode_type
from ebook_converter.polyglot.urllib import urlopen, urlparse
# Last upstream version basis # Last upstream version basis
@@ -85,7 +86,7 @@ def getimagesize(url):
try: try:
p = ImageFile.Parser() p = ImageFile.Parser()
f = urlopen(url) f = urllib.request.urlopen(url)
while True: while True:
s = f.read(1024) s = f.read(1024)
if not s: if not s:
@@ -777,11 +778,11 @@ class Textile(object):
True True
""" """
(scheme, netloc) = urlparse(url)[0:2] (scheme, netloc) = urllib.parse.urlparse(url)[0:2]
return not scheme and not netloc return not scheme and not netloc
def relURL(self, url): def relURL(self, url):
scheme = urlparse(url)[0] scheme = urllib.parse.urlparse(url)[0]
if self.restricted and scheme and scheme not in self.url_schemes: if self.restricted and scheme and scheme not in self.url_schemes:
return '#' return '#'
return url return url
-8
View File
@@ -1,8 +0,0 @@
from ebook_converter.polyglot.builtins import is_py3
if is_py3:
from functools import lru_cache
else:
from backports.functools_lru_cache import lru_cache
lru_cache
-10
View File
@@ -1,10 +0,0 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2019, Eli Schwartz <eschwartz@archlinux.org>
from ebook_converter.polyglot.builtins import is_py3
if is_py3:
from html.entities import name2codepoint
else:
from htmlentitydefs import name2codepoint
+7 -32
View File
@@ -1,15 +1,12 @@
from ebook_converter.polyglot.builtins import is_py3 from urllib.request import (build_opener, getproxies, install_opener,
HTTPBasicAuthHandler, HTTPCookieProcessor, HTTPDigestAuthHandler,
url2pathname, urlopen, Request)
from urllib.parse import (parse_qs, quote, unquote as uq, quote_plus, urldefrag,
urlencode, urljoin, urlparse, urlunparse, urlsplit, urlunsplit)
from urllib.error import HTTPError, URLError
if is_py3: def unquote(x, encoding='utf-8', errors='replace'):
from urllib.request import (build_opener, getproxies, install_opener, # noqa
HTTPBasicAuthHandler, HTTPCookieProcessor, HTTPDigestAuthHandler, # noqa
url2pathname, urlopen, Request) # noqa
from urllib.parse import (parse_qs, quote, unquote as uq, quote_plus, urldefrag, # noqa
urlencode, urljoin, urlparse, urlunparse, urlsplit, urlunsplit) # noqa
from urllib.error import HTTPError, URLError # noqa
def unquote(x, encoding='utf-8', errors='replace'):
binary = isinstance(x, bytes) binary = isinstance(x, bytes)
if binary: if binary:
x = x.decode(encoding, errors) x = x.decode(encoding, errors)
@@ -18,28 +15,6 @@ if is_py3:
ans = ans.encode(encoding, errors) ans = ans.encode(encoding, errors)
return ans return ans
else:
from urllib import (getproxies, quote, unquote as uq, quote_plus, url2pathname, # noqa
urlencode) # noqa
from urllib2 import (build_opener, install_opener, HTTPBasicAuthHandler, # noqa
HTTPCookieProcessor, HTTPDigestAuthHandler, HTTPError, URLError, # noqa
urlopen, Request) # noqa
from urlparse import (parse_qs, urldefrag, urljoin, urlparse, urlunparse, # noqa
urlsplit, urlunsplit) # noqa
def unquote(x, encoding='utf-8', errors='replace'):
# unquote must run on a bytestring and will return a bytestring
# If it runs on a unicode object, it returns a double encoded unicode
# string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
# and the latter is correct
binary = isinstance(x, bytes)
if not binary:
x = x.encode(encoding, errors)
ans = uq(x)
if not binary:
ans = ans.decode(encoding, errors)
return ans
def unquote_plus(x, encoding='utf-8', errors='replace'): def unquote_plus(x, encoding='utf-8', errors='replace'):
q, repl = (b'+', b' ') if isinstance(x, bytes) else ('+', ' ') q, repl = (b'+', b' ') if isinstance(x, bytes) else ('+', ' ')
+3 -2
View File
@@ -1,7 +1,7 @@
import re import re
import html.entities
from ebook_converter.polyglot.builtins import codepoint_to_chr from ebook_converter.polyglot.builtins import codepoint_to_chr
from ebook_converter.polyglot.html_entities import name2codepoint
from ebook_converter.constants import plugins, preferred_encoding from ebook_converter.constants import plugins, preferred_encoding
@@ -77,7 +77,8 @@ def unescape(text, rm=False, rchar=''):
else: else:
# named entity # named entity
try: try:
text = codepoint_to_chr(name2codepoint[text[1:-1]]) text = codepoint_to_chr(html.entities
.name2codepoint[text[1:-1]])
except KeyError: except KeyError:
pass pass
if rm: if rm:
+13 -7
View File
@@ -1,10 +1,16 @@
import os, errno, sys import errno
from threading import Thread import functools
import os
import sys
import threading
from ebook_converter import force_unicode from ebook_converter import force_unicode
from ebook_converter.constants import iswindows, get_windows_username, islinux, filesystem_encoding, ispy3 from ebook_converter.constants import filesystem_encoding
from ebook_converter.constants import get_windows_username
from ebook_converter.constants import islinux
from ebook_converter.constants import ispy3
from ebook_converter.constants import iswindows
from ebook_converter.utils.filenames import ascii_filename from ebook_converter.utils.filenames import ascii_filename
from ebook_converter.polyglot.functools import lru_cache
__license__ = 'GPL v3' __license__ = 'GPL v3'
@@ -24,7 +30,7 @@ def eintr_retry_call(func, *args, **kwargs):
raise raise
@lru_cache() @functools.lru_cache()
def socket_address(which): def socket_address(which):
if iswindows: if iswindows:
ans = r'\\.\pipe\Calibre' + which ans = r'\\.\pipe\Calibre' + which
@@ -58,12 +64,12 @@ def viewer_socket_address():
return socket_address('Viewer' if iswindows else 'viewer') return socket_address('Viewer' if iswindows else 'viewer')
class RC(Thread): class RC(threading.Thread):
def __init__(self, print_error=True, socket_address=None): def __init__(self, print_error=True, socket_address=None):
self.print_error = print_error self.print_error = print_error
self.socket_address = socket_address or gui_socket_address() self.socket_address = socket_address or gui_socket_address()
Thread.__init__(self) threading.Thread.__init__(self)
self.conn = None self.conn = None
self.daemon = True self.daemon = True