1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-04-20 13:11:27 +02:00

Removing is_py3 method and duplicated by urllib.

This commit is contained in:
2020-04-19 21:22:24 +02:00
parent b66cbd2c1e
commit ef7e2b10be
35 changed files with 267 additions and 254 deletions
+15 -9
View File
@@ -3,10 +3,18 @@ __license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
import sys, os, re, time, random, warnings
import math
import os
import pkg_resources
import random
import re
import sys
import time
import urllib.parse
import urllib.request
import warnings
from ebook_converter.polyglot.builtins import codepoint_to_chr, unicode_type, hasenv, native_string_type
from math import floor
from functools import partial
if not hasenv('CALIBRE_SHOW_DEPRECATION_WARNINGS'):
@@ -276,8 +284,7 @@ def extract(path, dir):
def get_proxies(debug=True):
from polyglot.urllib import getproxies
proxies = getproxies()
proxies = urllib.request.getproxies()
for key, proxy in list(proxies.items()):
if not proxy or '..' in proxy or key == 'auto':
del proxies[key]
@@ -338,10 +345,9 @@ def get_proxy_info(proxy_scheme, proxy_string):
is not available in the string. If an exception occurs parsing the string
this method returns None.
'''
from polyglot.urllib import urlparse
try:
proxy_url = '%s://%s'%(proxy_scheme, proxy_string)
urlinfo = urlparse(proxy_url)
urlinfo = urllib.parse.urlparse(proxy_url)
ans = {
'scheme': urlinfo.scheme,
'hostname': urlinfo.hostname,
@@ -414,13 +420,13 @@ def fit_image(width, height, pwidth, pheight):
scaled = height > pheight or width > pwidth
if height > pheight:
corrf = pheight / float(height)
width, height = floor(corrf*width), pheight
width, height = math.floor(corrf*width), pheight
if width > pwidth:
corrf = pwidth / float(width)
width, height = pwidth, floor(corrf*height)
width, height = pwidth, math.floor(corrf*height)
if height > pheight:
corrf = pheight / float(height)
width, height = floor(corrf*width), pheight
width, height = math.floor(corrf*width), pheight
return scaled, int(width), int(height)
@@ -2,7 +2,11 @@
CHM File decoding support
"""
import os
from lxml import html
from ebook_converter.polyglot.urllib import unquote as _unquote
from ebook_converter.ebooks.oeb.base import urlquote
from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.customize.conversion import InputFormatPlugin
from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.constants import filesystem_encoding
@@ -109,10 +113,7 @@ class CHMInput(InputFormatPlugin):
return oeb
def _create_html_root(self, hhcpath, log, encoding):
from lxml import html
from ebook_converter.polyglot.urllib import unquote as _unquote
from ebook_converter.ebooks.oeb.base import urlquote
from ebook_converter.ebooks.chardet import xml_to_unicode
hhcdata = self._read_file(hhcpath)
hhcdata = hhcdata.decode(encoding)
hhcdata = xml_to_unicode(hhcdata, verbose=True,
@@ -1,4 +1,7 @@
import os, shutil, re
import os
import re
import shutil
import urllib.parse
from ebook_converter.customize.conversion import (OutputFormatPlugin,
OptionRecommendation)
@@ -514,7 +517,7 @@ class EPUBOutput(OutputFormatPlugin):
'''
Perform toc link transforms to alleviate slow loading.
'''
from ebook_converter.ebooks.oeb.base import urldefrag, XPath
from ebook_converter.ebooks.oeb.base import XPath
from ebook_converter.ebooks.oeb.polish.toc import item_at_top
def frag_is_at_top(root, frag):
@@ -527,7 +530,7 @@ class EPUBOutput(OutputFormatPlugin):
def simplify_toc_entry(toc):
if toc.href:
href, frag = urldefrag(toc.href)
href, frag = urllib.parse.urldefrag(toc.href)
if frag:
for x in self.oeb.spine:
if x.href == href:
@@ -1,5 +1,8 @@
import re, tempfile, os
from functools import partial
import functools
import os
import re
import tempfile
import urllib.parse
from ebook_converter.constants import islinux, isbsd
from ebook_converter.customize.conversion import (InputFormatPlugin,
@@ -97,7 +100,7 @@ class HTMLInput(InputFormatPlugin):
import uuid
from ebook_converter.ebooks.conversion.plumber import create_oebbook
from ebook_converter.ebooks.oeb.base import (DirContainer,
rewrite_links, urlnormalize, urldefrag, BINARY_MIME, OEB_STYLES,
rewrite_links, urlnormalize, BINARY_MIME, OEB_STYLES,
xpath, urlquote)
from ebook_converter import guess_type
from ebook_converter.ebooks.oeb.transforms.metadata import \
@@ -163,7 +166,7 @@ class HTMLInput(InputFormatPlugin):
path = path.lower()
self.added_resources[path] = href
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
self.urldefrag = urldefrag
self.urldefrag = urllib.parse.urldefrag
self.guess_type, self.BINARY_MIME = guess_type, BINARY_MIME
self.log('Rewriting HTML links')
@@ -176,7 +179,8 @@ class HTMLInput(InputFormatPlugin):
item = oeb.manifest.hrefs[href]
except KeyError:
item = oeb.manifest.hrefs[urlnormalize(href)]
rewrite_links(item.data, partial(self.resource_adder, base=dpath))
rewrite_links(item.data,
functools.partial(self.resource_adder, base=dpath))
for item in oeb.manifest.values():
if item.media_type in self.OEB_STYLES:
@@ -186,7 +190,7 @@ class HTMLInput(InputFormatPlugin):
dpath = os.path.dirname(path)
break
css_parser.replaceUrls(item.data,
partial(self.resource_adder, base=dpath))
functools.partial(self.resource_adder, base=dpath))
toc = self.oeb.toc
self.oeb.auto_generated_toc = True
@@ -242,7 +246,6 @@ class HTMLInput(InputFormatPlugin):
return link, frag
def resource_adder(self, link_, base=None):
from ebook_converter.polyglot.urllib import quote
link, frag = self.link_to_local_path(link_, base=base)
if link is None:
return link_
@@ -287,9 +290,9 @@ class HTMLInput(InputFormatPlugin):
# file, therefore we quote it here.
if isinstance(bhref, unicode_type):
bhref = bhref.encode('utf-8')
item.html_input_href = as_unicode(quote(bhref))
item.html_input_href = as_unicode(urllib.parse.quote(bhref))
if guessed in self.OEB_STYLES:
item.override_css_fetch = partial(
item.override_css_fetch = functools.partial(
self.css_import_handler, os.path.dirname(link))
item.data
self.added_resources[link] = href
@@ -1,12 +1,17 @@
import os, re, shutil
from os.path import dirname, abspath, relpath as _relpath, exists, basename
import os
import pkg_resources
import re
import shutil
from lxml import etree
from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation
from ebook_converter import CurrentDir
from ebook_converter.ptempfile import PersistentTemporaryDirectory
from ebook_converter.customize.conversion import OutputFormatPlugin, OptionRecommendation
from ebook_converter.ebooks.oeb.base import element
from ebook_converter.polyglot.builtins import unicode_type
from ebook_converter.polyglot.urllib import unquote
from ebook_converter.ptempfile import PersistentTemporaryDirectory
from ebook_converter.utils.cleantext import clean_xml_chars
__license__ = 'GPL 3'
__copyright__ = '2010, Fabian Grassl <fg@jusmeum.de>'
@@ -14,7 +19,7 @@ __docformat__ = 'restructuredtext en'
def relpath(*args):
return _relpath(*args).replace(os.sep, '/')
return os.path.relpath(*args).replace(os.sep, '/')
class HTMLOutput(OutputFormatPlugin):
@@ -47,11 +52,7 @@ class HTMLOutput(OutputFormatPlugin):
'''
Generate table of contents
'''
from lxml import etree
from ebook_converter.polyglot.urllib import unquote
from ebook_converter.ebooks.oeb.base import element
from ebook_converter.utils.cleantext import clean_xml_chars
with CurrentDir(output_dir):
def build_node(current_node, parent=None):
if parent is None:
@@ -60,7 +61,8 @@ class HTMLOutput(OutputFormatPlugin):
parent = element(parent, ('ul'))
for node in current_node.nodes:
point = element(parent, 'li')
href = relpath(abspath(unquote(node.href)), dirname(ref_url))
href = relpath(os.path.abspath(unquote(node.href)),
os.path.dirname(ref_url))
if isinstance(href, bytes):
href = href.decode('utf-8')
link = element(point, 'a', href=clean_xml_chars(href))
@@ -131,10 +133,10 @@ class HTMLOutput(OutputFormatPlugin):
tempdir = os.path.realpath(PersistentTemporaryDirectory())
output_file = os.path.join(tempdir,
basename(re.sub(r'\.zip', '', output_path)+'.html'))
os.path.basename(re.sub(r'\.zip', '', output_path)+'.html'))
output_dir = re.sub(r'\.html', '', output_file)+'_files'
if not exists(output_dir):
if not os.path.exists(output_dir):
os.makedirs(output_dir)
css_path = output_dir+os.sep+'calibreHtmlOutBasicCss.css'
@@ -145,9 +147,10 @@ class HTMLOutput(OutputFormatPlugin):
html_toc = self.generate_html_toc(oeb_book, output_file, output_dir)
templite = Templite(template_html_index_data)
nextLink = oeb_book.spine[0].href
nextLink = relpath(output_dir+os.sep+nextLink, dirname(output_file))
cssLink = relpath(abspath(css_path), dirname(output_file))
tocUrl = relpath(output_file, dirname(output_file))
nextLink = relpath(output_dir+os.sep+nextLink,
os.path.dirname(output_file))
cssLink = relpath(os.path.abspath(css_path), os.path.dirname(output_file))
tocUrl = relpath(output_file, os.path.dirname(output_file))
t = templite.render(has_toc=bool(oeb_book.toc.count()),
toc=html_toc, meta=meta, nextLink=nextLink,
tocUrl=tocUrl, cssLink=cssLink,
@@ -158,9 +161,9 @@ class HTMLOutput(OutputFormatPlugin):
with CurrentDir(output_dir):
for item in oeb_book.manifest:
path = abspath(unquote(item.href))
dir = dirname(path)
if not exists(dir):
path = os.path.abspath(unquote(item.href))
dir = os.path.dirname(path)
if not os.path.exists(dir):
os.makedirs(dir)
if item.spine_position is not None:
with open(path, 'wb') as f:
@@ -171,8 +174,8 @@ class HTMLOutput(OutputFormatPlugin):
item.unload_data_from_memory(memory=path)
for item in oeb_book.spine:
path = abspath(unquote(item.href))
dir = dirname(path)
path = os.path.abspath(unquote(item.href))
dir = os.path.dirname(path)
root = item.data.getroottree()
# get & clean HTML <HEAD>-data
@@ -191,18 +194,18 @@ class HTMLOutput(OutputFormatPlugin):
# generate link to next page
if item.spine_position+1 < len(oeb_book.spine):
nextLink = oeb_book.spine[item.spine_position+1].href
nextLink = relpath(abspath(nextLink), dir)
nextLink = relpath(os.path.abspath(nextLink), dir)
else:
nextLink = None
# generate link to previous page
if item.spine_position > 0:
prevLink = oeb_book.spine[item.spine_position-1].href
prevLink = relpath(abspath(prevLink), dir)
prevLink = relpath(os.path.abspath(prevLink), dir)
else:
prevLink = None
cssLink = relpath(abspath(css_path), dir)
cssLink = relpath(os.path.abspath(css_path), dir)
tocUrl = relpath(output_file, dir)
firstContentPageLink = oeb_book.spine[0].href
@@ -222,8 +225,8 @@ class HTMLOutput(OutputFormatPlugin):
item.unload_data_from_memory(memory=path)
zfile = zipfile.ZipFile(output_path, "w")
zfile.add_dir(output_dir, basename(output_dir))
zfile.write(output_file, basename(output_file), zipfile.ZIP_DEFLATED)
zfile.add_dir(output_dir, os.path.basename(output_dir))
zfile.write(output_file, os.path.basename(output_file), zipfile.ZIP_DEFLATED)
if opts.extract_to:
if os.path.exists(opts.extract_to):
@@ -1,9 +1,14 @@
import os, re
import os
import re
from lxml import etree
from ebook_converter.customize.conversion import (OutputFormatPlugin,
OptionRecommendation)
from ebook_converter import CurrentDir
from ebook_converter.polyglot.urllib import unquote
from ebook_converter.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME, OEB_STYLES
from ebook_converter.ebooks.oeb.normalize_css import condense_sheet
__license__ = 'GPL 3'
@@ -21,14 +26,10 @@ class OEBOutput(OutputFormatPlugin):
recommendations = {('pretty_print', True, OptionRecommendation.HIGH)}
def convert(self, oeb_book, output_path, input_plugin, opts, log):
from ebook_converter.polyglot.urllib import unquote
from lxml import etree
self.log, self.opts = log, opts
if not os.path.exists(output_path):
os.makedirs(output_path)
from ebook_converter.ebooks.oeb.base import OPF_MIME, NCX_MIME, PAGE_MAP_MIME, OEB_STYLES
from ebook_converter.ebooks.oeb.normalize_css import condense_sheet
with CurrentDir(output_path):
results = oeb_book.to_opf2(page_map=True)
for key in (OPF_MIME, NCX_MIME, PAGE_MAP_MIME):
+7 -6
View File
@@ -1,9 +1,10 @@
import posixpath, re
from uuid import uuid4
import posixpath
import re
import urllib.parse
import uuid
from ebook_converter.utils.filenames import ascii_text
from ebook_converter.polyglot.builtins import unicode_type
from ebook_converter.polyglot.urllib import urlparse
__license__ = 'GPL v3'
@@ -67,7 +68,7 @@ class LinksManager(object):
self.namespace = namespace
self.log = log
self.document_relationships = document_relationships
self.top_anchor = unicode_type(uuid4().hex)
self.top_anchor = unicode_type(uuid.uuid4().hex)
self.anchor_map = {}
self.used_bookmark_names = set()
self.bmark_id = 0
@@ -100,7 +101,7 @@ class LinksManager(object):
def serialize_hyperlink(self, parent, link):
item, url, tooltip = link
purl = urlparse(url)
purl = urllib.parse.urlparse(url)
href = purl.path
def make_link(parent, anchor=None, id=None, tooltip=None):
@@ -133,7 +134,7 @@ class LinksManager(object):
def process_toc_node(self, toc, level=0):
href = toc.href
if href:
purl = urlparse(href)
purl = urllib.parse.urlparse(href)
href = purl.path
if href in self.document_hrefs:
key = (href, purl.fragment or self.top_anchor)
+5 -3
View File
@@ -1,8 +1,11 @@
"""
Transform OEB content into FB2 markup
"""
import re, textwrap, uuid
from datetime import datetime
import re
import textwrap
import urllib.parse
import uuid
from lxml import etree
@@ -14,7 +17,6 @@ from ebook_converter.utils.img import save_cover_data_to
from ebook_converter.ebooks.oeb.base import urlnormalize
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes
from ebook_converter.polyglot.binary import as_base64_unicode
from ebook_converter.polyglot.urllib import urlparse
__license__ = 'GPL 3'
@@ -508,7 +510,7 @@ class FB2MLizer(object):
tags.append('p')
if tag == 'a' and elem_tree.attrib.get('href', None):
# Handle only external links for now
if urlparse(elem_tree.attrib['href']).netloc:
if urllib.parse.urlparse(elem_tree.attrib['href']).netloc:
p_txt, p_tag = self.ensure_p()
fb2_out += p_txt
tags += p_tag
+11 -11
View File
@@ -1,14 +1,17 @@
"""
Input plugin for HTML or OPF ebooks.
"""
import os, re, sys, errno as gerrno
import errno
import os
import re
import sys
import urllib.parse
from ebook_converter.ebooks.oeb.base import urlunquote
from ebook_converter.ebooks.chardet import detect_xml_encoding
from ebook_converter.constants import iswindows
from ebook_converter import unicode_path, as_unicode, replace_entities
from ebook_converter.polyglot.builtins import is_py3, unicode_type
from ebook_converter.polyglot.urllib import urlparse, urlunparse
from ebook_converter.polyglot.builtins import unicode_type
__license__ = 'GPL v3'
@@ -29,7 +32,7 @@ class Link(object):
if iswindows and path.startswith('/'):
path = path[1:]
isabs = True
path = urlunparse(('', '', path, url.params, url.query, ''))
path = urllib.parse.urlunparse(('', '', path, url.params, url.query, ''))
path = urlunquote(path)
if isabs or os.path.isabs(path):
return path
@@ -43,7 +46,7 @@ class Link(object):
'''
assert isinstance(url, unicode_type) and isinstance(base, unicode_type)
self.url = url
self.parsed_url = urlparse(self.url)
self.parsed_url = urllib.parse.urlparse(self.url)
self.is_local = self.parsed_url.scheme in ('', 'file')
self.is_internal = self.is_local and not bool(self.parsed_url.path)
self.path = None
@@ -62,16 +65,13 @@ class Link(object):
def __str__(self):
return 'Link: %s --> %s'%(self.url, self.path)
if not is_py3:
__unicode__ = __str__
class IgnoreFile(Exception):
def __init__(self, msg, errno):
def __init__(self, msg, err_no):
Exception.__init__(self, msg)
self.doesnt_exist = errno == gerrno.ENOENT
self.errno = errno
self.errno = err_no
self.doesnt_exist = err_no == errno.ENOENT
class HTMLFile(object):
+2 -2
View File
@@ -3,6 +3,7 @@ Transform OEB content into a single (more or less) HTML file.
"""
import os
import re
import urllib.parse
from functools import partial
from lxml import html
@@ -13,7 +14,6 @@ from ebook_converter.ebooks.oeb.base import (
from ebook_converter.ebooks.oeb.stylizer import Stylizer
from ebook_converter.utils.logging import default_log
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes, as_bytes
from ebook_converter.polyglot.urllib import urldefrag
__license__ = 'GPL 3'
@@ -101,7 +101,7 @@ class OEB2HTML(object):
for attr in attribs:
if attr in link_attrs:
href = item.abshref(attribs[attr])
href, id = urldefrag(href)
href, id = urllib.parse.urldefrag(href)
if href in self.base_hrefs:
self.get_link_id(href, id)
@@ -12,6 +12,7 @@ import os
import re
import sys
import tempfile
import urllib.parse
from collections import deque
from functools import partial
from itertools import chain
@@ -37,7 +38,7 @@ from ebook_converter.ebooks.lrf.pylrs.pylrs import (
)
from ebook_converter.ptempfile import PersistentTemporaryFile
from ebook_converter.polyglot.builtins import getcwd, itervalues, string_or_bytes, unicode_type
from ebook_converter.polyglot.urllib import unquote, urlparse
from ebook_converter.polyglot.urllib import unquote
from PIL import Image as PILImage
@@ -51,7 +52,7 @@ def update_css(ncss, ocss):
def munge_paths(basepath, url):
purl = urlparse(unquote(url),)
purl = urllib.parse.urlparse(unquote(url),)
path, fragment = purl[2], purl[5]
if path:
path = path.replace('/', os.sep)
@@ -1471,7 +1472,8 @@ class HTMLConverter(object):
pass
elif tagname == 'a' and self.link_levels >= 0:
if tag.has_attr('href') and not self.link_exclude.match(tag['href']):
if urlparse(tag['href'])[0] not in ('', 'file'):
if urllib.parse.urlparse(tag['href'])[0] not in ('',
'file'):
self.process_children(tag, tag_css, tag_pseudo_css)
else:
path = munge_paths(self.target_prefix, tag['href'])[0]
@@ -1513,7 +1515,7 @@ class HTMLConverter(object):
dropcaps = tag.get('class') in ('libprs500_dropcaps', ['libprs500_dropcaps'])
self.process_image(path, tag_css, width, height,
dropcaps=dropcaps, rescale=True)
elif not urlparse(tag['src'])[0]:
elif not urllib.parse.urlparse(tag['src'])[0]:
self.log.warn('Could not find image: '+tag['src'])
else:
self.log.debug("Failed to process: %s"%unicode_type(tag))
+8 -5
View File
@@ -2,12 +2,15 @@
Provides abstraction for metadata reading.writing from a variety of ebook
formats.
"""
import os, sys, re
import os
import re
import sys
import urllib.parse
from ebook_converter import relpath, guess_type, prints, force_unicode
from ebook_converter.utils.config_base import tweaks
from ebook_converter.polyglot.builtins import codepoint_to_chr, unicode_type, getcwd, iteritems, itervalues, as_unicode
from ebook_converter.polyglot.urllib import quote, unquote, urlparse
from ebook_converter.polyglot.urllib import unquote
__license__ = 'GPL v3'
@@ -241,7 +244,7 @@ class Resource(object):
path = path.decode(sys.getfilesystemencoding())
self.path = path
else:
url = urlparse(href_or_path)
url = urllib.parse.urlparse(href_or_path)
if url[0] not in ('', 'file'):
self._href = href_or_path
else:
@@ -268,7 +271,7 @@ class Resource(object):
if self.path is None:
return self._href
f = self.fragment.encode('utf-8') if isinstance(self.fragment, unicode_type) else self.fragment
frag = '#'+as_unicode(quote(f)) if self.fragment else ''
frag = '#'+as_unicode(urllib.parse.quote(f)) if self.fragment else ''
if self.path == basedir:
return ''+frag
try:
@@ -277,7 +280,7 @@ class Resource(object):
rpath = self.path
if isinstance(rpath, unicode_type):
rpath = rpath.encode('utf-8')
return as_unicode(quote(rpath.replace(os.sep, '/')))+frag
return as_unicode(urllib.parse.quote(rpath.replace(os.sep, '/')))+frag
def set_basedir(self, path):
self._basedir = path
+13 -3
View File
@@ -1,7 +1,17 @@
"""
lxml based OPF parser.
"""
import re, sys, unittest, functools, os, uuid, glob, io, json, copy
import copy
import functools
import glob
import io
import json
import os
import re
import sys
import unittest
import urllib.parse
import uuid
from lxml import etree
@@ -18,7 +28,7 @@ from ebook_converter.utils.cleantext import clean_ascii_chars, clean_xml_chars
from ebook_converter.utils.config import tweaks
from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.polyglot.builtins import iteritems, unicode_type, getcwd
from ebook_converter.polyglot.urllib import unquote, urlparse
from ebook_converter.polyglot.urllib import unquote
__license__ = 'GPL v3'
@@ -76,7 +86,7 @@ class Resource(object): # {{{
self.path = path
else:
href_or_path = href_or_path
url = urlparse(href_or_path)
url = urllib.parse.urlparse(href_or_path)
if url[0] not in ('', 'file'):
self._href = href_or_path
else:
+11 -7
View File
@@ -1,5 +1,9 @@
import os, glob, re, functools
from collections import Counter
import collections
import functools
import glob
import os
import re
import urllib.parse
from lxml import etree
from lxml.builder import ElementMaker
@@ -9,7 +13,7 @@ from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.utils.cleantext import clean_xml_chars
from ebook_converter.polyglot.builtins import unicode_type, getcwd
from ebook_converter.polyglot.urllib import unquote, urlparse
from ebook_converter.polyglot.urllib import unquote
__license__ = 'GPL v3'
@@ -30,7 +34,7 @@ def parse_html_toc(data):
data = xml_to_unicode(data, strip_encoding_pats=True, resolve_entities=True)[0]
root = parse(clean_xml_chars(data), maybe_xhtml=True, keep_doctype=False, sanitize_names=True)
for a in root.xpath('//*[@href and local-name()="a"]'):
purl = urlparse(unquote(a.get('href')))
purl = urllib.parse.urlparse(unquote(a.get('href')))
href, fragment = purl[2], purl[5]
if not fragment:
fragment = None
@@ -142,7 +146,7 @@ class TOC(list):
if toc is not None:
if toc.lower() not in ('ncx', 'ncxtoc'):
toc = urlparse(unquote(toc))[2]
toc = urllib.parse.urlparse(unquote(toc))[2]
toc = toc.replace('/', os.sep)
if not os.path.isabs(toc):
toc = os.path.join(self.base_path, toc)
@@ -209,7 +213,7 @@ class TOC(list):
if content and text:
content = content[0]
# if get_attr(content, attr='src'):
purl = urlparse(content.get('src'))
purl = urllib.parse.urlparse(content.get('src'))
href, fragment = unquote(purl[2]), unquote(purl[5])
nd = dest.add_item(href, fragment, text)
nd.play_order = play_order
@@ -253,7 +257,7 @@ class TOC(list):
navmap = E.navMap()
root.append(navmap)
root.set('{http://www.w3.org/XML/1998/namespace}lang', 'en')
c = Counter()
c = collections.Counter()
def navpoint(parent, np):
text = np.text
+17 -15
View File
@@ -1,7 +1,10 @@
import struct, re, os
from collections import namedtuple
from itertools import repeat
from uuid import uuid4
import collections
import itertools
import os
import re
import struct
import urllib.parse
import uuid
from lxml import etree
@@ -16,21 +19,20 @@ from ebook_converter.ebooks.mobi.utils import read_font_record
from ebook_converter.ebooks.oeb.parse_utils import parse_html
from ebook_converter.ebooks.oeb.base import XPath, XHTML, xml2text
from ebook_converter.polyglot.builtins import unicode_type, getcwd, as_unicode
from ebook_converter.polyglot.urllib import urldefrag
__license__ = 'GPL v3'
__copyright__ = '2012, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
Part = namedtuple('Part',
Part = collections.namedtuple('Part',
'num type filename start end aid')
Elem = namedtuple('Elem',
Elem = collections.namedtuple('Elem',
'insert_pos toc_text file_number sequence_number start_pos '
'length')
FlowInfo = namedtuple('FlowInfo',
FlowInfo = collections.namedtuple('FlowInfo',
'type format dir fname')
# locate beginning and ending positions of tag with specific aid attribute
@@ -81,7 +83,7 @@ class Mobi8Reader(object):
def __call__(self):
self.mobi6_reader.check_for_drm()
self.aid_anchor_suffix = uuid4().hex.encode('utf-8')
self.aid_anchor_suffix = uuid.uuid4().hex.encode('utf-8')
bh = self.mobi6_reader.book_header
if self.mobi6_reader.kf8_type == 'joint':
offset = self.mobi6_reader.kf8_boundary + 2
@@ -127,7 +129,7 @@ class Mobi8Reader(object):
if self.header.skelidx != NULL_INDEX:
table = read_index(self.kf8_sections, self.header.skelidx,
self.header.codec)[0]
File = namedtuple('File',
File = collections.namedtuple('File',
'file_number name divtbl_count start_position length')
for i, text in enumerate(table):
@@ -149,7 +151,7 @@ class Mobi8Reader(object):
if self.header.othidx != NULL_INDEX:
table, cncx = read_index(self.kf8_sections, self.header.othidx,
self.header.codec)
Item = namedtuple('Item',
Item = collections.namedtuple('Item',
'type title pos_fid')
for i, ref_type in enumerate(table):
@@ -222,7 +224,7 @@ class Mobi8Reader(object):
self.parts.append(skeleton)
if divcnt < 1:
# Empty file
aidtext = unicode_type(uuid4())
aidtext = unicode_type(uuid.uuid4())
filename = aidtext + '.html'
self.partinfo.append(Part(skelnum, 'text', filename, skelpos,
baseptr, aidtext))
@@ -293,7 +295,7 @@ class Mobi8Reader(object):
for part in self.partinfo:
if pos >= part.start and pos < part.end:
return part
return Part(*repeat(None, len(Part._fields)))
return Part(*itertools.repeat(None, len(Part._fields)))
def get_id_tag_by_pos_fid(self, posfid, offset):
# first convert kindle:pos:fid and offset info to position in file
@@ -475,7 +477,7 @@ class Mobi8Reader(object):
for ref in guide:
if ref.type == 'toc':
href = ref.href()
href, frag = urldefrag(href)
href, frag = urllib.parse.urldefrag(href)
if os.path.exists(href.replace('/', os.sep)):
try:
toc = self.read_inline_toc(href, frag)
@@ -554,7 +556,7 @@ class Mobi8Reader(object):
if reached and elem.tag == XHTML('a') and elem.get('href',
False):
href = elem.get('href')
href, frag = urldefrag(href)
href, frag = urllib.parse.urldefrag(href)
href = base_href + '/' + href
text = xml2text(elem).strip()
if (text, href, frag) in seen:
@@ -1,7 +1,8 @@
import collections
import io
import re
import unicodedata
from collections import defaultdict
from io import BytesIO
import urllib.parse
from ebook_converter.ebooks.mobi.mobiml import MBP_NS
from ebook_converter.ebooks.mobi.utils import is_guide_ref_start
@@ -9,7 +10,6 @@ from ebook_converter.ebooks.oeb.base import (
OEB_DOCS, XHTML, XHTML_NS, XML_NS, namespace, prefixname, urlnormalize
)
from ebook_converter.polyglot.builtins import unicode_type, string_or_bytes
from ebook_converter.polyglot.urllib import urldefrag
__license__ = 'GPL v3'
@@ -17,12 +17,12 @@ __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
__docformat__ = 'restructuredtext en'
class Buf(BytesIO):
class Buf(io.BytesIO):
def write(self, x):
if isinstance(x, unicode_type):
x = x.encode('utf-8')
BytesIO.write(self, x)
io.BytesIO.write(self, x)
class Serializer(object):
@@ -63,7 +63,7 @@ class Serializer(object):
# Mapping of hrefs (urlnormalized) to a list of offsets into the buffer
# where filepos="..." elements are written corresponding to links that
# point to the href. This is used at the end to fill in the correct values.
self.href_offsets = defaultdict(list)
self.href_offsets = collections.defaultdict(list)
# List of offsets in the buffer of non linear items in the spine. These
# become uncrossable breaks in the MOBI
@@ -81,7 +81,7 @@ class Serializer(object):
item.is_article_start = item.is_article_end = False
def spine_item(tocitem):
href = urldefrag(tocitem.href)[0]
href = urllib.parse.urldefrag(tocitem.href)[0]
for item in self.oeb.spine:
if item.href == href:
return item
@@ -157,7 +157,7 @@ class Serializer(object):
hrefs = self.oeb.manifest.hrefs
buf.write(b'<guide>')
for ref in self.oeb.guide.values():
path = urldefrag(ref.href)[0]
path = urllib.parse.urldefrag(ref.href)[0]
if path not in hrefs or hrefs[path].media_type not in OEB_DOCS:
continue
@@ -188,7 +188,7 @@ class Serializer(object):
'''
hrefs = self.oeb.manifest.hrefs
try:
path, frag = urldefrag(urlnormalize(href))
path, frag = urllib.parse.urldefrag(urlnormalize(href))
except ValueError:
# Unparseable URL
return False
@@ -382,7 +382,7 @@ class Serializer(object):
if href not in id_offsets:
self.logger.warn('Hyperlink target %r not found' % href)
# Link to the top of the document, better than just ignoring
href, _ = urldefrag(href)
href, _ = urllib.parse.urldefrag(href)
if href in self.id_offsets:
ioff = self.id_offsets[href]
if is_start:
+15 -14
View File
@@ -5,6 +5,7 @@ import os, re, logging, sys, numbers
from collections import defaultdict
from itertools import count
from operator import attrgetter
import urllib.parse
from lxml import etree, html
from ebook_converter import force_unicode
@@ -17,7 +18,7 @@ from ebook_converter.ebooks.oeb.parse_utils import barename, XHTML_NS, namespace
from ebook_converter.utils.cleantext import clean_xml_chars
from ebook_converter.utils.short_uuid import uuid4
from ebook_converter.polyglot.builtins import iteritems, unicode_type, string_or_bytes, itervalues, codepoint_to_chr
from ebook_converter.polyglot.urllib import unquote as urlunquote, urldefrag, urljoin, urlparse, urlunparse
from ebook_converter.polyglot.urllib import unquote as urlunquote
__license__ = 'GPL v3'
@@ -185,13 +186,13 @@ def iterlinks(root, find_links_in_css=True):
if attrib in attribs:
value = el.get(attrib)
if codebase is not None:
value = urljoin(codebase, value)
value = urllib.parse.urljoin(codebase, value)
yield (el, attrib, value, 0)
if 'archive' in attribs:
for match in _archive_re.finditer(el.get('archive')):
value = match.group(0)
if codebase is not None:
value = urljoin(codebase, value)
value = urllib.parse.urljoin(codebase, value)
yield (el, 'archive', value, match.start())
else:
for attr in attribs:
@@ -217,7 +218,7 @@ def make_links_absolute(root, base_url):
came from)
'''
def link_repl(href):
return urljoin(base_url, href)
return urllib.parse.urljoin(base_url, href)
rewrite_links(root, link_repl)
@@ -463,16 +464,16 @@ def urlnormalize(href):
characters URL quoted.
"""
try:
parts = urlparse(href)
parts = urllib.parse.urlparse(href)
except ValueError as e:
raise ValueError('Failed to parse the URL: %r with underlying error: %s' % (href, as_unicode(e)))
if not parts.scheme or parts.scheme == 'file':
path, frag = urldefrag(href)
path, frag = urllib.parse.urldefrag(href)
parts = ('', '', path, '', '', frag)
parts = (part.replace('\\', '/') for part in parts)
parts = (urlunquote(part) for part in parts)
parts = (urlquote(part) for part in parts)
return urlunparse(parts)
return urllib.parse.urlunparse(parts)
def extract(elem):
@@ -1135,7 +1136,7 @@ class Manifest(object):
relative to this manifest item to a book-absolute reference.
"""
try:
purl = urlparse(href)
purl = urllib.parse.urlparse(href)
except ValueError:
return href
scheme = purl.scheme
@@ -1143,8 +1144,8 @@ class Manifest(object):
return href
purl = list(purl)
purl[0] = ''
href = urlunparse(purl)
path, frag = urldefrag(href)
href = urllib.parse.urlunparse(purl)
path, frag = urllib.parse.urldefrag(href)
if not path:
if frag:
return '#'.join((self.href, frag))
@@ -1423,7 +1424,7 @@ class Guide(object):
@property
def item(self):
"""The manifest item associated with this reference."""
path = urldefrag(self.href)[0]
path = uurllib.parse.rldefrag(self.href)[0]
hrefs = self.oeb.manifest.hrefs
return hrefs.get(path, None)
@@ -1596,7 +1597,7 @@ class TOC(object):
"""
prev = None
for node in list(self.nodes):
if prev and urldefrag(prev.href)[0] == urldefrag(node.href)[0]:
if prev and urllib.parse.urldefrag(prev.href)[0] == urllib.parse.urldefrag(node.href)[0]:
self.nodes.remove(node)
prev.nodes.append(node)
else:
@@ -1988,7 +1989,7 @@ class OEBBook(object):
def rel_href(base_href, href):
"""Convert the URL provided in :param:`href` to a URL relative to the URL
in :param:`base_href` """
if urlparse(href).scheme:
if urllib.parse.urlparse(href).scheme:
return href
if '/' not in base_href:
return href
@@ -2004,7 +2005,7 @@ def rel_href(base_href, href):
break
if not base:
return href
target, frag = urldefrag(href)
target, frag = urllib.parse.urldefrag(href)
target = target.split('/')
index = 0
for index in range(min(len(base), len(target))):
@@ -11,6 +11,7 @@ import uuid
from collections import defaultdict
from io import BytesIO
from itertools import count
import urllib.parse
from css_parser import getUrls, replaceUrls
@@ -49,7 +50,6 @@ from ebook_converter.utils.logging import default_log
from ebook_converter.utils.xml_parse import safe_xml_fromstring
from ebook_converter.utils.zipfile import ZipFile
from ebook_converter.polyglot.builtins import iteritems, unicode_type
from ebook_converter.polyglot.urllib import urlparse
exists, join, relpath = os.path.exists, os.path.join, os.path.relpath
@@ -107,7 +107,7 @@ def name_to_href(name, root, base=None, quote=urlquote):
def href_to_name(href, root, base=None):
base = root if base is None else os.path.dirname(name_to_abspath(base, root))
try:
purl = urlparse(href)
purl = urllib.parse.urlparse(href)
except ValueError:
return None
if purl.scheme or not purl.path:
+5 -5
View File
@@ -2,13 +2,13 @@ import codecs, shutil, os, posixpath
from ebook_converter.polyglot.builtins import iteritems, itervalues
from functools import partial
from collections import Counter, defaultdict
import urllib.parse
from ebook_converter import sanitize_file_name
from ebook_converter.ebooks.chardet import strip_encoding_declarations
from ebook_converter.ebooks.oeb.base import css_text
from ebook_converter.ebooks.oeb.polish.css import iter_declarations, remove_property_value
from ebook_converter.ebooks.oeb.polish.utils import extract
from ebook_converter.polyglot.urllib import urlparse, urlunparse
__license__ = 'GPL v3'
@@ -38,7 +38,7 @@ class LinkReplacer(object):
nname = self.link_map.get(name, None)
if not nname:
return url
purl = urlparse(url)
purl = urllib.parse.urlparse(url)
href = self.container.name_to_href(nname, self.base)
if purl.fragment:
nfrag = self.frag_map(name, purl.fragment)
@@ -68,12 +68,12 @@ class IdReplacer(object):
id_map = self.id_map.get(name)
if id_map is None:
return url
purl = urlparse(url)
purl = urllib.parse.urlparse(url)
nfrag = id_map.get(purl.fragment)
if nfrag is None:
return url
purl = purl._replace(fragment=nfrag)
href = urlunparse(purl)
href = urllib.parse.urlunparse(purl)
if href != url:
self.replaced = True
return href
@@ -89,7 +89,7 @@ class LinkRebaser(object):
def __call__(self, url):
if url and url.startswith('#'):
return url
purl = urlparse(url)
purl = urllib.parse.urlparse(url)
frag = purl.fragment
name = self.container.href_to_name(url, self.old_name)
if not name:
+4 -4
View File
@@ -1,12 +1,12 @@
import copy, os, re
from ebook_converter.polyglot.builtins import string_or_bytes
import urllib.parse
from ebook_converter.ebooks.oeb.base import barename, XPNSMAP, XPath, OPF, XHTML, OEB_DOCS
from ebook_converter.ebooks.oeb.polish.errors import MalformedMarkup
from ebook_converter.ebooks.oeb.polish.toc import node_from_loc
from ebook_converter.ebooks.oeb.polish.replace import LinkRebaser
from ebook_converter.polyglot.builtins import iteritems, unicode_type
from ebook_converter.polyglot.urllib import urlparse
__license__ = 'GPL v3'
@@ -160,7 +160,7 @@ class SplitLinkReplacer(object):
name = self.container.href_to_name(url, self.base)
if name != self.top_name:
return url
purl = urlparse(url)
purl = urllib.parse.urlparse(url)
if purl.fragment and purl.fragment in self.bottom_anchors:
url = self.container.name_to_href(self.bottom_name, self.base) + '#' + purl.fragment
self.replaced = True
@@ -225,7 +225,7 @@ def split(container, name, loc_or_xpath, before=True, totals=None):
else:
fname = container.href_to_name(url, name)
if fname == name:
purl = urlparse(url)
purl = urllib.parse.urlparse(url)
if purl.fragment in anchors_in_top:
if r is root2:
a.set('href', '%s#%s' % (container.name_to_href(name, bottom_name), purl.fragment))
@@ -310,7 +310,7 @@ class MergeLinkReplacer(object):
amap = self.anchor_map.get(name, None)
if amap is None:
return url
purl = urlparse(url)
purl = urllib.parse.urlparse(url)
frag = purl.fragment or ''
frag = amap.get(frag, frag)
url = self.container.name_to_href(self.master, self.base) + '#' + frag
+4 -4
View File
@@ -3,6 +3,7 @@ from collections import Counter, OrderedDict
from functools import partial
from operator import itemgetter
import pkg_resources
import urllib.parse
from lxml import etree
from lxml.builder import ElementMaker
@@ -16,7 +17,6 @@ from ebook_converter.ebooks.oeb.polish.opf import set_guide_item, get_book_langu
from ebook_converter.ebooks.oeb.polish.pretty import pretty_html_tree
from ebook_converter.utils.localization import get_lang, canonicalize_lang, lang_as_iso639_1
from ebook_converter.polyglot.builtins import iteritems, unicode_type
from ebook_converter.polyglot.urllib import urlparse
__license__ = 'GPL v3'
@@ -150,7 +150,7 @@ def add_from_navpoint(container, navpoint, parent, ncx_name):
href = content.get('src', None)
if href:
dest = container.href_to_name(href, base=ncx_name)
frag = urlparse(href).fragment or None
frag = urllib.parse.urlparse(href).fragment or None
return parent.add(text or None, dest or None, frag or None)
@@ -183,7 +183,7 @@ def parse_ncx(container, ncx_name):
href = pt.xpath('descendant::*[calibre:lower-case(local-name()) = "content"]/@src')
if href:
dest = container.href_to_name(href[0], base=ncx_name)
frag = urlparse(href[0]).fragment or None
frag = urllib.parse.urlparse(href[0]).fragment or None
toc_root.page_list.append({'dest': dest, 'pagenum': pagenum, 'frag': frag})
return toc_root
@@ -195,7 +195,7 @@ def add_from_li(container, li, parent, nav_name):
href = x.get('href')
if href:
dest = nav_name if href.startswith('#') else container.href_to_name(href, base=nav_name)
frag = urlparse(href).fragment or None
frag = urllib.parse.urlparse(href).fragment or None
break
return parent.add(text or None, dest or None, frag or None)
+12 -11
View File
@@ -3,6 +3,7 @@ Container-/OPF-based input OEBBook reader.
"""
import sys, os, uuid, copy, re, io
from collections import defaultdict
import urllib.parse
from lxml import etree
@@ -23,7 +24,7 @@ from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.constants import __appname__, __version__
from ebook_converter import guess_type, xml_replace_entities
from ebook_converter.polyglot.builtins import unicode_type
from ebook_converter.polyglot.urllib import unquote, urldefrag, urlparse
from ebook_converter.polyglot.urllib import unquote
__all__ = ['OEBReader']
@@ -203,12 +204,12 @@ class OEBReader(object):
for href in hrefs:
if isinstance(href, bytes):
href = href.decode('utf-8')
href, _ = urldefrag(href)
href, _ = urllib.parse.urldefrag(href)
if not href:
continue
try:
href = item.abshref(urlnormalize(href))
scheme = urlparse(href).scheme
scheme = urllib.parse.urlparse(href).scheme
except:
self.oeb.log.exception(
'Skipping invalid href: %r'%href)
@@ -221,9 +222,9 @@ class OEBReader(object):
except:
urls = []
for url in urls:
href, _ = urldefrag(url)
href, _ = urllib.parse.urldefrag(url)
href = item.abshref(urlnormalize(href))
scheme = urlparse(href).scheme
scheme = urllib.parse.urlparse(href).scheme
if not scheme and href not in known:
new.add(href)
unchecked.clear()
@@ -294,7 +295,7 @@ class OEBReader(object):
# TODO: handle fallback chains
continue
for href in selector(item.data):
href, _ = urldefrag(href)
href, _ = urllib.parse.urldefrag(href)
if not href:
continue
try:
@@ -350,7 +351,7 @@ class OEBReader(object):
manifest = self.oeb.manifest
for elem in xpath(opf, '/o2:package/o2:guide/o2:reference'):
ref_href = elem.get('href')
path = urlnormalize(urldefrag(ref_href)[0])
path = urlnormalize(urllib.parse.urldefrag(ref_href)[0])
if path not in manifest.hrefs:
corrected_href = None
for href in manifest.hrefs:
@@ -393,7 +394,7 @@ class OEBReader(object):
# This node is useless
continue
href = item.abshref(urlnormalize(href[0])) if href and href[0] else ''
path, _ = urldefrag(href)
path, _ = urllib.parse.urldefrag(href)
if path and path not in self.oeb.manifest.hrefs:
path = urlnormalize(path)
if href and path not in self.oeb.manifest.hrefs:
@@ -468,7 +469,7 @@ class OEBReader(object):
href = site.get('href')
if not title or not href:
continue
path, _ = urldefrag(urlnormalize(href))
path, _ = urllib.parse.urldefrag(urlnormalize(href))
if path not in self.oeb.manifest.hrefs:
self.logger.warn('TOC reference %r not found' % href)
continue
@@ -480,7 +481,7 @@ class OEBReader(object):
if 'toc' not in self.oeb.guide:
return False
self.log.debug('Reading TOC from HTML...')
itempath, frag = urldefrag(self.oeb.guide['toc'].href)
itempath, frag = urllib.parse.urldefrag(self.oeb.guide['toc'].href)
item = self.oeb.manifest.hrefs[itempath]
html = item.data
if frag:
@@ -496,7 +497,7 @@ class OEBReader(object):
for anchor in xpath(html, './/h:a[@href]'):
href = anchor.attrib['href']
href = item.abshref(urlnormalize(href))
path, frag = urldefrag(href)
path, frag = urllib.parse.urldefrag(href)
if path not in self.oeb.manifest.hrefs:
continue
title = xml2text(anchor)
@@ -1,4 +1,5 @@
import textwrap
import urllib.parse
from ebook_converter import guess_type
from ebook_converter.utils.imghdr import identify
@@ -93,7 +94,6 @@ class CoverManager(object):
return -1, -1
def insert_cover(self):
from ebook_converter.ebooks.oeb.base import urldefrag
g, m = self.oeb.guide, self.oeb.manifest
item = None
href = None
@@ -124,7 +124,7 @@ class CoverManager(object):
data=safe_xml_fromstring(tp))
else:
item = self.oeb.manifest.hrefs[
urldefrag(self.oeb.guide['titlepage'].href)[0]]
urllib.parse.urldefrag(self.oeb.guide['titlepage'].href)[0]]
if item is not None:
self.oeb.spine.insert(0, item, True)
if 'cover' not in self.oeb.guide.refs:
@@ -1,9 +1,9 @@
import posixpath
import urllib.parse
from lxml import etree
from ebook_converter.ebooks.oeb.base import rewrite_links, urlnormalize
from ebook_converter.polyglot.urllib import urldefrag, urlparse
__license__ = 'GPL v3'
@@ -38,7 +38,7 @@ class RenameFiles(object): # {{{
if self.oeb.guide:
for ref in self.oeb.guide.values():
href = urlnormalize(ref.href)
href, frag = urldefrag(href)
href, frag = urllib.parse.urldefrag(href)
replacement = self.rename_map.get(href, None)
if replacement is not None:
nhref = replacement
@@ -52,7 +52,7 @@ class RenameFiles(object): # {{{
def fix_toc_entry(self, toc):
if toc.href:
href = urlnormalize(toc.href)
href, frag = urldefrag(href)
href, frag = urllib.parse.urldefrag(href)
replacement = self.rename_map.get(href, None)
if replacement is not None:
@@ -66,11 +66,11 @@ class RenameFiles(object): # {{{
def url_replacer(self, orig_url):
url = urlnormalize(orig_url)
parts = urlparse(url)
parts = urllib.parse.urlparse(url)
if parts.scheme:
# Only rewrite local URLs
return orig_url
path, frag = urldefrag(url)
path, frag = urllib.parse.urldefrag(url)
if self.renamed_items_map:
orig_item = self.renamed_items_map.get(self.current_item.href, self.current_item)
else:
@@ -2,10 +2,11 @@ import sys, os, re
from xml.sax.saxutils import escape
from string import Formatter
import pkg_resources
import urllib.parse
from ebook_converter import guess_type, strftime
from ebook_converter.constants import iswindows
from ebook_converter.ebooks.oeb.base import XPath, XHTML_NS, XHTML, xml2text, urldefrag, urlnormalize
from ebook_converter.ebooks.oeb.base import XPath, XHTML_NS, XHTML, xml2text, urlnormalize
from ebook_converter.library.comments import comments_to_html, markdown
from ebook_converter.utils.date import is_date_undefined, as_local_time
from ebook_converter.ebooks.chardet import strip_encoding_declarations
@@ -73,7 +74,7 @@ class RemoveFirstImage(Base):
self.log.warn('Could not find first image to remove')
if deleted_item is not None:
for item in list(self.oeb.toc):
href = urldefrag(item.href)[0]
href = urllib.parse.urldefrag(item.href)[0]
if href == deleted_item.href:
self.oeb.toc.remove(item)
self.oeb.guide.remove_by_href(deleted_item.href)
@@ -1,7 +1,9 @@
"""
SVG rasterization transform.
"""
import os, re
import os
import re
import urllib.parse
# from PyQt5.Qt import (
# Qt, QByteArray, QBuffer, QIODevice, QColor, QImage, QPainter, QSvgRenderer)
@@ -14,7 +16,6 @@ from ebook_converter.ebooks.oeb.stylizer import Stylizer
from ebook_converter.ptempfile import PersistentTemporaryFile
from ebook_converter.utils.imghdr import what
from ebook_converter.polyglot.builtins import unicode_type
from ebook_converter.polyglot.urllib import urldefrag
__license__ = 'GPL v3'
@@ -114,7 +115,7 @@ class SVGRasterizer(object):
hrefs = self.oeb.manifest.hrefs
for elem in xpath(svg, '//svg:*[@xl:href]'):
href = urlnormalize(elem.attrib[XLINK('href')])
path = urldefrag(href)[0]
path = urllib.parse.urldefrag(href)[0]
if not path:
continue
abshref = item.abshref(path)
@@ -5,6 +5,7 @@ assumes a prior call to the flatcss transform.
"""
import os, functools, collections, re, copy
from collections import OrderedDict
import urllib.parse
from lxml.etree import XPath as _XPath
from lxml import etree
@@ -12,7 +13,7 @@ from lxml import etree
from ebook_converter import as_unicode, force_unicode
from ebook_converter.ebooks.epub import rules
from ebook_converter.ebooks.oeb.base import (OEB_STYLES, XPNSMAP as NAMESPACES,
urldefrag, rewrite_links, XHTML, urlnormalize)
rewrite_links, XHTML, urlnormalize)
from ebook_converter.ebooks.oeb.polish.split import do_split
from ebook_converter.polyglot.builtins import iteritems, unicode_type
from ebook_converter.polyglot.urllib import unquote
@@ -162,7 +163,7 @@ class Split(object):
rewrite_links(item.data, self.rewrite_links)
def rewrite_links(self, url):
href, frag = urldefrag(url)
href, frag = urllib.parse.urldefrag(url)
try:
href = self.current_item.abshref(href)
except ValueError:
@@ -453,7 +454,7 @@ class FlowSplitter(object):
if self.oeb.guide:
for ref in self.oeb.guide.values():
href, frag = urldefrag(ref.href)
href, frag = urllib.parse.urldefrag(ref.href)
if href == self.item.href:
nhref = self.anchor_map[frag if frag else None]
if frag:
@@ -462,7 +463,7 @@ class FlowSplitter(object):
def fix_toc_entry(toc):
if toc.href:
href, frag = urldefrag(toc.href)
href, frag = urllib.parse.urldefrag(toc.href)
if href == self.item.href:
nhref = self.anchor_map[frag if frag else None]
if frag:
@@ -476,7 +477,7 @@ class FlowSplitter(object):
if self.oeb.pages:
for page in self.oeb.pages:
href, frag = urldefrag(page.href)
href, frag = urllib.parse.urldefrag(page.href)
if href == self.item.href:
nhref = self.anchor_map[frag if frag else None]
if frag:
@@ -1,4 +1,6 @@
import re, uuid
import re
import uuid
import urllib.parse
from lxml import etree
from collections import OrderedDict, Counter
@@ -6,7 +8,6 @@ from collections import OrderedDict, Counter
from ebook_converter.ebooks.oeb.base import XPNSMAP, TOC, XHTML, xml2text, barename
from ebook_converter.ebooks import ConversionError
from ebook_converter.polyglot.builtins import itervalues, unicode_type
from ebook_converter.polyglot.urllib import urlparse
__license__ = 'GPL v3'
@@ -209,7 +210,7 @@ class DetectStructure(object):
for a in XPath('//h:a[@href]')(item.data):
href = a.get('href')
try:
purl = urlparse(href)
purl = urllib.parse.urlparse(href)
except ValueError:
self.log.warning('Ignoring malformed URL:', href)
continue
@@ -1,9 +1,10 @@
"""
OPF manifest trimming transform.
"""
import urllib.parse
from ebook_converter.ebooks.oeb.base import CSS_MIME, OEB_DOCS
from ebook_converter.ebooks.oeb.base import urlnormalize, iterlinks
from ebook_converter.polyglot.urllib import urldefrag
__license__ = 'GPL v3'
@@ -32,7 +33,7 @@ class ManifestTrimmer(object):
elif item.value in oeb.manifest.ids:
used.add(oeb.manifest.ids[item.value])
for ref in oeb.guide.values():
path, _ = urldefrag(ref.href)
path, _ = urllib.parse.urldefrag(ref.href)
if path in oeb.manifest.hrefs:
used.add(oeb.manifest.hrefs[path])
# TOC items are required to be in the spine
+5 -4
View File
@@ -4,11 +4,12 @@ PyTextile
A Humane Web Text Generator
"""
import re
import urllib.request
import urllib.parse
import uuid
from ebook_converter.utils.smartypants import smartyPants
from ebook_converter.polyglot.builtins import unicode_type
from ebook_converter.polyglot.urllib import urlopen, urlparse
# Last upstream version basis
@@ -85,7 +86,7 @@ def getimagesize(url):
try:
p = ImageFile.Parser()
f = urlopen(url)
f = urllib.request.urlopen(url)
while True:
s = f.read(1024)
if not s:
@@ -777,11 +778,11 @@ class Textile(object):
True
"""
(scheme, netloc) = urlparse(url)[0:2]
(scheme, netloc) = urllib.parse.urlparse(url)[0:2]
return not scheme and not netloc
def relURL(self, url):
scheme = urlparse(url)[0]
scheme = urllib.parse.urlparse(url)[0]
if self.restricted and scheme and scheme not in self.url_schemes:
return '#'
return url
-8
View File
@@ -1,8 +0,0 @@
from ebook_converter.polyglot.builtins import is_py3
if is_py3:
from functools import lru_cache
else:
from backports.functools_lru_cache import lru_cache
lru_cache
-10
View File
@@ -1,10 +0,0 @@
#!/usr/bin/env python2
# vim:fileencoding=utf-8
# License: GPL v3 Copyright: 2019, Eli Schwartz <eschwartz@archlinux.org>
from ebook_converter.polyglot.builtins import is_py3
if is_py3:
from html.entities import name2codepoint
else:
from htmlentitydefs import name2codepoint
+6 -31
View File
@@ -1,14 +1,11 @@
from ebook_converter.polyglot.builtins import is_py3
from urllib.request import (build_opener, getproxies, install_opener,
HTTPBasicAuthHandler, HTTPCookieProcessor, HTTPDigestAuthHandler,
url2pathname, urlopen, Request)
from urllib.parse import (parse_qs, quote, unquote as uq, quote_plus, urldefrag,
urlencode, urljoin, urlparse, urlunparse, urlsplit, urlunsplit)
from urllib.error import HTTPError, URLError
if is_py3:
from urllib.request import (build_opener, getproxies, install_opener, # noqa
HTTPBasicAuthHandler, HTTPCookieProcessor, HTTPDigestAuthHandler, # noqa
url2pathname, urlopen, Request) # noqa
from urllib.parse import (parse_qs, quote, unquote as uq, quote_plus, urldefrag, # noqa
urlencode, urljoin, urlparse, urlunparse, urlsplit, urlunsplit) # noqa
from urllib.error import HTTPError, URLError # noqa
def unquote(x, encoding='utf-8', errors='replace'):
binary = isinstance(x, bytes)
if binary:
@@ -18,28 +15,6 @@ if is_py3:
ans = ans.encode(encoding, errors)
return ans
else:
from urllib import (getproxies, quote, unquote as uq, quote_plus, url2pathname, # noqa
urlencode) # noqa
from urllib2 import (build_opener, install_opener, HTTPBasicAuthHandler, # noqa
HTTPCookieProcessor, HTTPDigestAuthHandler, HTTPError, URLError, # noqa
urlopen, Request) # noqa
from urlparse import (parse_qs, urldefrag, urljoin, urlparse, urlunparse, # noqa
urlsplit, urlunsplit) # noqa
def unquote(x, encoding='utf-8', errors='replace'):
# unquote must run on a bytestring and will return a bytestring
# If it runs on a unicode object, it returns a double encoded unicode
# string: unquote(u'%C3%A4') != unquote(b'%C3%A4').decode('utf-8')
# and the latter is correct
binary = isinstance(x, bytes)
if not binary:
x = x.encode(encoding, errors)
ans = uq(x)
if not binary:
ans = ans.decode(encoding, errors)
return ans
def unquote_plus(x, encoding='utf-8', errors='replace'):
q, repl = (b'+', b' ') if isinstance(x, bytes) else ('+', ' ')
+3 -2
View File
@@ -1,7 +1,7 @@
import re
import html.entities
from ebook_converter.polyglot.builtins import codepoint_to_chr
from ebook_converter.polyglot.html_entities import name2codepoint
from ebook_converter.constants import plugins, preferred_encoding
@@ -77,7 +77,8 @@ def unescape(text, rm=False, rchar=''):
else:
# named entity
try:
text = codepoint_to_chr(name2codepoint[text[1:-1]])
text = codepoint_to_chr(html.entities
.name2codepoint[text[1:-1]])
except KeyError:
pass
if rm:
+13 -7
View File
@@ -1,10 +1,16 @@
import os, errno, sys
from threading import Thread
import errno
import functools
import os
import sys
import threading
from ebook_converter import force_unicode
from ebook_converter.constants import iswindows, get_windows_username, islinux, filesystem_encoding, ispy3
from ebook_converter.constants import filesystem_encoding
from ebook_converter.constants import get_windows_username
from ebook_converter.constants import islinux
from ebook_converter.constants import ispy3
from ebook_converter.constants import iswindows
from ebook_converter.utils.filenames import ascii_filename
from ebook_converter.polyglot.functools import lru_cache
__license__ = 'GPL v3'
@@ -24,7 +30,7 @@ def eintr_retry_call(func, *args, **kwargs):
raise
@lru_cache()
@functools.lru_cache()
def socket_address(which):
if iswindows:
ans = r'\\.\pipe\Calibre' + which
@@ -58,12 +64,12 @@ def viewer_socket_address():
return socket_address('Viewer' if iswindows else 'viewer')
class RC(Thread):
class RC(threading.Thread):
def __init__(self, print_error=True, socket_address=None):
self.print_error = print_error
self.socket_address = socket_address or gui_socket_address()
Thread.__init__(self)
threading.Thread.__init__(self)
self.conn = None
self.daemon = True