mirror of
https://github.com/gryf/ebook-converter.git
synced 2026-02-01 19:45:45 +01:00
Cleanup, cleanup
This commit is contained in:
@@ -18,6 +18,8 @@ To build and run ebook converter, you'll need:
|
||||
- Python 3.6 or newer
|
||||
- `Liberation fonts`_
|
||||
- setuptools
|
||||
- ``pdftohtml``, ``pdfinfo`` and ``pdftoppm`` from `poppler`_ project for
|
||||
conversion from PDF available in ``$PATH``
|
||||
|
||||
No Python2 support. Even if Calibre probably still is able to run on Python2, I
|
||||
do not have an intention to support it.
|
||||
@@ -101,7 +103,7 @@ managers), i.e:
|
||||
|
||||
.. code:: shell-session
|
||||
|
||||
$ virtualenv venv
|
||||
$ python -m venv venv
|
||||
$ . venv/bin/activate
|
||||
(venv) $ git clone https://github.com/gryf/ebook-converter
|
||||
(venv) $ cd ebook-converter
|
||||
@@ -128,3 +130,4 @@ for details.
|
||||
.. _pypi: https://pypi.python.org
|
||||
.. _Liberation fonts: https://github.com/liberationfonts/liberation-fonts
|
||||
.. _Kindle periodical: https://sellercentral.amazon.com/gp/help/external/help.html?itemID=202047960&language=en-US
|
||||
.. _poppler: https://poppler.freedesktop.org/
|
||||
|
||||
@@ -10,17 +10,15 @@ import sys
|
||||
from functools import partial
|
||||
|
||||
from ebook_converter import constants_old
|
||||
from ebook_converter.constants_old import islinux, isfrozen, \
|
||||
isbsd, __appname__, __version__, __author__, \
|
||||
from ebook_converter.constants_old import isfrozen, \
|
||||
__appname__, __version__, __author__, \
|
||||
config_dir
|
||||
from ebook_converter.ebooks.html_entities import html5_entities
|
||||
|
||||
|
||||
if False:
|
||||
# Prevent pyflakes from complaining
|
||||
__appname__, islinux, __version__
|
||||
isfrozen, __author__
|
||||
isbsd, config_dir
|
||||
__appname__, __version__, isfrozen, __author__, config_dir
|
||||
|
||||
|
||||
def init_mimetypes():
|
||||
|
||||
@@ -6,11 +6,10 @@ import os
|
||||
import sys
|
||||
|
||||
|
||||
__appname__ = 'calibre'
|
||||
__appname__ = 'ebook-converter'
|
||||
numeric_version = (4, 12, 0)
|
||||
__version__ = '.'.join(map(str, numeric_version))
|
||||
git_version = None
|
||||
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
|
||||
__version__ = '.'.join([str(x) for x in numeric_version])
|
||||
__author__ = "foobar"
|
||||
|
||||
'''
|
||||
Various run time constants.
|
||||
@@ -18,16 +17,14 @@ Various run time constants.
|
||||
|
||||
|
||||
_plat = sys.platform.lower()
|
||||
isosx = 'darwin' in _plat
|
||||
isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
|
||||
isosx = 'darwin' in _plat
|
||||
isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
|
||||
isfreebsd = 'freebsd' in _plat
|
||||
isnetbsd = 'netbsd' in _plat
|
||||
isdragonflybsd = 'dragonfly' in _plat
|
||||
isbsd = isfreebsd or isnetbsd or isdragonflybsd
|
||||
ishaiku = 'haiku1' in _plat
|
||||
islinux = True
|
||||
isfrozen = hasattr(sys, 'frozen')
|
||||
isunix = isosx or islinux or ishaiku
|
||||
isfrozen = hasattr(sys, 'frozen')
|
||||
isunix = True
|
||||
isportable = os.getenv('CALIBRE_PORTABLE_BUILD') is not None
|
||||
isxp = isoldvista = False
|
||||
is64bit = sys.maxsize > (1 << 32)
|
||||
@@ -40,7 +37,7 @@ TOC_DIALOG_APP_UID = 'com.calibre-ebook.toc-editor'
|
||||
try:
|
||||
preferred_encoding = locale.getpreferredencoding()
|
||||
codecs.lookup(preferred_encoding)
|
||||
except:
|
||||
except Exception:
|
||||
preferred_encoding = 'utf-8'
|
||||
|
||||
fcntl = importlib.import_module('fcntl')
|
||||
@@ -214,12 +211,9 @@ del dv
|
||||
|
||||
def get_version():
|
||||
'''Return version string for display to user '''
|
||||
if git_version is not None:
|
||||
v = git_version
|
||||
else:
|
||||
v = __version__
|
||||
if numeric_version[-1] == 0:
|
||||
v = v[:-2]
|
||||
v = __version__
|
||||
if numeric_version[-1] == 0:
|
||||
v = v[:-2]
|
||||
if is_running_from_develop:
|
||||
v += '*'
|
||||
|
||||
|
||||
@@ -121,7 +121,6 @@ def render_html_data(path_to_html, width, height):
|
||||
result = {}
|
||||
|
||||
def report_error(text=''):
|
||||
__import__('pdb').set_trace()
|
||||
print(f'Failed to render {path_to_html}')
|
||||
# file=sys.stderr)
|
||||
if text:
|
||||
|
||||
@@ -2,10 +2,8 @@ import functools
|
||||
import mimetypes
|
||||
import os
|
||||
import re
|
||||
import tempfile
|
||||
import urllib.parse
|
||||
|
||||
from ebook_converter.constants_old import islinux, isbsd
|
||||
from ebook_converter.customize.conversion import InputFormatPlugin
|
||||
from ebook_converter.customize.conversion import OptionRecommendation
|
||||
from ebook_converter.utils.localization import get_lang
|
||||
@@ -55,7 +53,6 @@ class HTMLInput(InputFormatPlugin):
|
||||
|
||||
def convert(self, stream, opts, file_ext, log,
|
||||
accelerators):
|
||||
self._is_case_sensitive = None
|
||||
basedir = os.getcwd()
|
||||
self.opts = opts
|
||||
|
||||
@@ -81,14 +78,6 @@ class HTMLInput(InputFormatPlugin):
|
||||
return create_oebbook(log, stream.name, opts,
|
||||
encoding=opts.input_encoding)
|
||||
|
||||
def is_case_sensitive(self, path):
|
||||
if getattr(self, '_is_case_sensitive', None) is not None:
|
||||
return self._is_case_sensitive
|
||||
if not path or not os.path.exists(path):
|
||||
return islinux or isbsd
|
||||
self._is_case_sensitive = not (os.path.exists(path.lower()) and os.path.exists(path.upper()))
|
||||
return self._is_case_sensitive
|
||||
|
||||
def create_oebbook(self, htmlpath, basedir, opts, log, mi):
|
||||
import uuid
|
||||
from ebook_converter.ebooks.conversion.plumber import create_oebbook
|
||||
@@ -154,8 +143,6 @@ class HTMLInput(InputFormatPlugin):
|
||||
self.log = log
|
||||
self.log('Normalizing filename cases')
|
||||
for path, href in htmlfile_map.items():
|
||||
if not self.is_case_sensitive(path):
|
||||
path = path.lower()
|
||||
self.added_resources[path] = href
|
||||
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
|
||||
self.urldefrag = urllib.parse.urldefrag
|
||||
@@ -252,8 +239,6 @@ class HTMLInput(InputFormatPlugin):
|
||||
if os.path.isdir(link):
|
||||
self.log.warn(link_, 'is a link to a directory. Ignoring.')
|
||||
return link_
|
||||
if not self.is_case_sensitive(tempfile.gettempdir()):
|
||||
link = link.lower()
|
||||
if link not in self.added_resources:
|
||||
bhref = os.path.basename(link)
|
||||
id, href = self.oeb.manifest.generate(id='added', href=sanitize_file_name(bhref))
|
||||
|
||||
@@ -1,27 +1,17 @@
|
||||
"""
|
||||
Read meta information from PDF files
|
||||
"""
|
||||
import os, subprocess, shutil, re
|
||||
from functools import partial
|
||||
import functools
|
||||
import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
|
||||
from ebook_converter.ptempfile import TemporaryDirectory
|
||||
from ebook_converter.ebooks.metadata import (
|
||||
MetaInformation, string_to_authors, check_isbn, check_doi)
|
||||
|
||||
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
|
||||
|
||||
|
||||
def get_tools():
|
||||
from ebook_converter.ebooks.pdf.pdftohtml import PDFTOHTML
|
||||
base = os.path.dirname(PDFTOHTML)
|
||||
suffix = ''
|
||||
pdfinfo = os.path.join(base, 'pdfinfo') + suffix
|
||||
pdftoppm = os.path.join(base, 'pdftoppm') + suffix
|
||||
return pdfinfo, pdftoppm
|
||||
|
||||
|
||||
def read_info(outputdir, get_cover):
|
||||
''' Read info dict and cover from a pdf file named src.pdf in outputdir.
|
||||
Note that this function changes the cwd to outputdir and is therefore not
|
||||
@@ -29,7 +19,8 @@ def read_info(outputdir, get_cover):
|
||||
way to pass unicode paths via command line arguments. This also ensures
|
||||
that if poppler crashes, no stale file handles are left for the original
|
||||
file, only for src.pdf.'''
|
||||
pdfinfo, pdftoppm = get_tools()
|
||||
pdfinfo = 'pdfinfo'
|
||||
pdftoppm = 'pdftoppm'
|
||||
source_file = os.path.join(outputdir, 'src.pdf')
|
||||
cover_file = os.path.join(outputdir, 'cover')
|
||||
ans = {}
|
||||
@@ -55,8 +46,8 @@ def read_info(outputdir, get_cover):
|
||||
ans[field] = val.strip()
|
||||
|
||||
# Now read XMP metadata
|
||||
# Versions of poppler before 0.47.0 used to print out both the Info dict and
|
||||
# XMP metadata packet together. However, since that changed in
|
||||
# Versions of poppler before 0.47.0 used to print out both the Info dict
|
||||
# and XMP metadata packet together. However, since that changed in
|
||||
# https://cgit.freedesktop.org/poppler/poppler/commit/?id=c91483aceb1b640771f572cb3df9ad707e5cad0d
|
||||
# we can no longer rely on it.
|
||||
try:
|
||||
@@ -77,13 +68,14 @@ def read_info(outputdir, get_cover):
|
||||
subprocess.check_call([pdftoppm, '-singlefile', '-jpeg',
|
||||
'-cropbox', source_file, cover_file])
|
||||
except subprocess.CalledProcessError as e:
|
||||
print('pdftoppm errored out with return code: {e.returncode}')
|
||||
print(f'pdftoppm errored out with return code: {e.returncode}')
|
||||
|
||||
return ans
|
||||
|
||||
|
||||
def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg', prefix='page-images'):
|
||||
pdftoppm = get_tools()[1]
|
||||
def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg',
|
||||
prefix='page-images'):
|
||||
pdftoppm = 'pdftoppm'
|
||||
outputdir = os.path.abspath(outputdir)
|
||||
args = {}
|
||||
try:
|
||||
@@ -92,11 +84,12 @@ def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg', pr
|
||||
'-l', str(last), pdfpath, os.path.join(outputdir, prefix)
|
||||
], **args)
|
||||
except subprocess.CalledProcessError as e:
|
||||
raise ValueError('Failed to render PDF, pdftoppm errorcode: %s'%e.returncode)
|
||||
raise ValueError('Failed to render PDF, pdftoppm errorcode: %s' %
|
||||
e.returncode)
|
||||
|
||||
|
||||
def is_pdf_encrypted(path_to_pdf):
|
||||
pdfinfo = get_tools()[0]
|
||||
pdfinfo = 'pdfinfo'
|
||||
raw = subprocess.check_output([pdfinfo, path_to_pdf])
|
||||
q = re.search(br'^Encrypted:\s*(\S+)', raw, flags=re.MULTILINE)
|
||||
if q is not None:
|
||||
@@ -149,7 +142,7 @@ def get_metadata(stream, cover=True):
|
||||
|
||||
# Look for recognizable identifiers in the info dict, if they were not
|
||||
# found in the XMP metadata
|
||||
for scheme, check_func in {'doi':check_doi, 'isbn':check_isbn}.items():
|
||||
for scheme, check_func in {'doi': check_doi, 'isbn': check_isbn}.items():
|
||||
if scheme not in mi.get_identifiers():
|
||||
for k, v in info.items():
|
||||
if k != 'xmp_metadata':
|
||||
@@ -163,9 +156,7 @@ def get_metadata(stream, cover=True):
|
||||
return mi
|
||||
|
||||
|
||||
get_quick_metadata = partial(get_metadata, cover=False)
|
||||
|
||||
#from ebook_converter.utils.podofo import set_metadata as podofo_set_metadata
|
||||
get_quick_metadata = functools.partial(get_metadata, cover=False)
|
||||
|
||||
|
||||
def set_metadata(stream, mi):
|
||||
|
||||
@@ -3,12 +3,10 @@ import os
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
import sys
|
||||
|
||||
from lxml import etree
|
||||
|
||||
from ebook_converter import CurrentDir, xml_replace_entities
|
||||
from ebook_converter.constants_old import isbsd, islinux, isosx
|
||||
from ebook_converter.ebooks import ConversionError, DRMError
|
||||
from ebook_converter.ebooks.chardet import xml_to_unicode
|
||||
from ebook_converter.ptempfile import PersistentTemporaryFile
|
||||
@@ -16,21 +14,10 @@ from ebook_converter.utils.cleantext import clean_xml_chars
|
||||
from ebook_converter.utils.ipc import eintr_retry_call
|
||||
|
||||
|
||||
PDFTOHTML = 'pdftohtml'
|
||||
|
||||
|
||||
def popen(cmd, **kw):
|
||||
return subprocess.Popen(cmd, **kw)
|
||||
|
||||
|
||||
if isosx and hasattr(sys, 'frameworks_dir'):
|
||||
base = os.path.join(os.path.dirname(sys.frameworks_dir), 'utils.app',
|
||||
'Contents', 'MacOS')
|
||||
PDFTOHTML = os.path.join(base, PDFTOHTML)
|
||||
if (islinux or isbsd) and getattr(sys, 'frozen', False):
|
||||
PDFTOHTML = os.path.join(sys.executables_location, 'bin', 'pdftohtml')
|
||||
|
||||
|
||||
def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
|
||||
'''
|
||||
Convert the pdf into html using the pdftohtml app.
|
||||
@@ -49,12 +36,9 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
|
||||
def a(x):
|
||||
return os.path.basename(x)
|
||||
|
||||
exe = PDFTOHTML
|
||||
cmd = [exe, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge',
|
||||
cmd = ['pdftohtml', '-enc', 'UTF-8', '-noframes', '-p', '-nomerge',
|
||||
'-nodrm', a(pdfsrc), a(index)]
|
||||
|
||||
if isbsd:
|
||||
cmd.remove('-nodrm')
|
||||
if no_images:
|
||||
cmd.append('-i')
|
||||
if as_xml:
|
||||
@@ -105,11 +89,9 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
|
||||
|
||||
i.write(raw.encode('utf-8'))
|
||||
|
||||
cmd = [exe, '-f', '1', '-l', '1', '-xml', '-i', '-enc', 'UTF-8',
|
||||
'-noframes', '-p', '-nomerge', '-nodrm', '-q', '-stdout',
|
||||
a(pdfsrc)]
|
||||
if isbsd:
|
||||
cmd.remove('-nodrm')
|
||||
cmd = ['pdftohtml', '-f', '1', '-l', '1', '-xml', '-i', '-enc',
|
||||
'UTF-8', '-noframes', '-p', '-nomerge', '-nodrm', '-q',
|
||||
'-stdout', a(pdfsrc)]
|
||||
p = popen(cmd, stdout=subprocess.PIPE)
|
||||
raw = p.stdout.read().strip()
|
||||
if p.wait() == 0 and raw:
|
||||
|
||||
@@ -5,6 +5,7 @@ import builtins
|
||||
import fcntl
|
||||
import locale
|
||||
import sys
|
||||
import threading
|
||||
|
||||
from ebook_converter import constants_old
|
||||
|
||||
@@ -22,7 +23,9 @@ if not _run_once:
|
||||
try:
|
||||
base_dir()
|
||||
except EnvironmentError:
|
||||
pass # Ignore this error during startup, so we can show a better error message to the user later.
|
||||
# Ignore this error during startup, so we can show a better error
|
||||
# message to the user later.
|
||||
pass
|
||||
|
||||
#
|
||||
# Convert command line arguments to unicode
|
||||
@@ -51,7 +54,6 @@ if not _run_once:
|
||||
import ebook_converter.utils.resources as resources
|
||||
resources
|
||||
|
||||
#
|
||||
# Initialize locale
|
||||
# Import string as we do not want locale specific
|
||||
# string.whitespace/printable, on windows especially, this causes problems.
|
||||
@@ -87,28 +89,26 @@ if not _run_once:
|
||||
bound_signal.connect(slot, **kw)
|
||||
builtins.__dict__['connect_lambda'] = connect_lambda
|
||||
|
||||
if constants_old.islinux or constants_old.isosx or constants_old.isfreebsd:
|
||||
# Name all threads at the OS level created using the threading module, see
|
||||
# http://bugs.python.org/issue15500
|
||||
import threading
|
||||
# Name all threads at the OS level created using the threading module, see
|
||||
# http://bugs.python.org/issue15500
|
||||
|
||||
orig_start = threading.Thread.start
|
||||
orig_start = threading.Thread.start
|
||||
|
||||
def new_start(self):
|
||||
orig_start(self)
|
||||
try:
|
||||
name = self.name
|
||||
if not name or name.startswith('Thread-'):
|
||||
name = self.__class__.__name__
|
||||
if name == 'Thread':
|
||||
name = self.name
|
||||
if name:
|
||||
if isinstance(name, str):
|
||||
name = name.encode('ascii', 'replace').decode('ascii')
|
||||
constants_old.plugins['speedup'][0].set_thread_name(name[:15])
|
||||
except Exception:
|
||||
pass # Don't care about failure to set name
|
||||
threading.Thread.start = new_start
|
||||
def new_start(self):
|
||||
orig_start(self)
|
||||
try:
|
||||
name = self.name
|
||||
if not name or name.startswith('Thread-'):
|
||||
name = self.__class__.__name__
|
||||
if name == 'Thread':
|
||||
name = self.name
|
||||
if name:
|
||||
if isinstance(name, str):
|
||||
name = name.encode('ascii', 'replace').decode('ascii')
|
||||
constants_old.plugins['speedup'][0].set_thread_name(name[:15])
|
||||
except Exception:
|
||||
pass # Don't care about failure to set name
|
||||
threading.Thread.start = new_start
|
||||
|
||||
|
||||
def test_lopen():
|
||||
|
||||
@@ -1,16 +1,4 @@
|
||||
import errno
|
||||
import functools
|
||||
import os
|
||||
import sys
|
||||
import threading
|
||||
|
||||
from ebook_converter import force_unicode
|
||||
from ebook_converter.constants_old import filesystem_encoding
|
||||
from ebook_converter.constants_old import islinux
|
||||
from ebook_converter.utils.filenames import ascii_filename
|
||||
|
||||
|
||||
VADDRESS = None
|
||||
|
||||
|
||||
def eintr_retry_call(func, *args, **kwargs):
|
||||
@@ -21,47 +9,3 @@ def eintr_retry_call(func, *args, **kwargs):
|
||||
if getattr(e, 'errno', None) == errno.EINTR:
|
||||
continue
|
||||
raise
|
||||
|
||||
|
||||
@functools.lru_cache()
|
||||
def socket_address(which):
|
||||
|
||||
user = force_unicode(os.environ.get('USER') or os.path.basename(os.path.expanduser('~')), filesystem_encoding)
|
||||
sock_name = '{}-calibre-{}.socket'.format(ascii_filename(user).replace(' ', '_'), which)
|
||||
if islinux:
|
||||
ans = '\0' + sock_name
|
||||
else:
|
||||
from tempfile import gettempdir
|
||||
tmp = force_unicode(gettempdir(), filesystem_encoding)
|
||||
ans = os.path.join(tmp, sock_name)
|
||||
return ans
|
||||
|
||||
|
||||
def gui_socket_address():
|
||||
return socket_address('gui')
|
||||
|
||||
|
||||
def viewer_socket_address():
|
||||
return socket_address('viewer')
|
||||
|
||||
|
||||
class RC(threading.Thread):
|
||||
|
||||
def __init__(self, print_error=True, socket_address=None):
|
||||
self.print_error = print_error
|
||||
self.socket_address = socket_address or gui_socket_address()
|
||||
threading.Thread.__init__(self)
|
||||
self.conn = None
|
||||
self.daemon = True
|
||||
|
||||
def run(self):
|
||||
from multiprocessing.connection import Client
|
||||
self.done = False
|
||||
try:
|
||||
self.conn = Client(self.socket_address)
|
||||
self.done = True
|
||||
except Exception:
|
||||
if self.print_error:
|
||||
print('Failed to connect to address {}', file=sys.stderr).format(repr(self.socket_address))
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
|
||||
Reference in New Issue
Block a user