1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-03-01 22:25:46 +01:00

Cleanup, cleanup

This commit is contained in:
2020-07-17 18:59:45 +02:00
parent 5ac8451668
commit 45b6bb5b2c
10 changed files with 63 additions and 167 deletions

View File

@@ -18,6 +18,8 @@ To build and run ebook converter, you'll need:
- Python 3.6 or newer - Python 3.6 or newer
- `Liberation fonts`_ - `Liberation fonts`_
- setuptools - setuptools
- ``pdftohtml``, ``pdfinfo`` and ``pdftoppm`` from `poppler`_ project for
conversion from PDF available in ``$PATH``
No Python2 support. Even if Calibre probably still is able to run on Python2, I No Python2 support. Even if Calibre probably still is able to run on Python2, I
do not have an intention to support it. do not have an intention to support it.
@@ -101,7 +103,7 @@ managers), i.e:
.. code:: shell-session .. code:: shell-session
$ virtualenv venv $ python -m venv venv
$ . venv/bin/activate $ . venv/bin/activate
(venv) $ git clone https://github.com/gryf/ebook-converter (venv) $ git clone https://github.com/gryf/ebook-converter
(venv) $ cd ebook-converter (venv) $ cd ebook-converter
@@ -128,3 +130,4 @@ for details.
.. _pypi: https://pypi.python.org .. _pypi: https://pypi.python.org
.. _Liberation fonts: https://github.com/liberationfonts/liberation-fonts .. _Liberation fonts: https://github.com/liberationfonts/liberation-fonts
.. _Kindle periodical: https://sellercentral.amazon.com/gp/help/external/help.html?itemID=202047960&language=en-US .. _Kindle periodical: https://sellercentral.amazon.com/gp/help/external/help.html?itemID=202047960&language=en-US
.. _poppler: https://poppler.freedesktop.org/

View File

@@ -10,17 +10,15 @@ import sys
from functools import partial from functools import partial
from ebook_converter import constants_old from ebook_converter import constants_old
from ebook_converter.constants_old import islinux, isfrozen, \ from ebook_converter.constants_old import isfrozen, \
isbsd, __appname__, __version__, __author__, \ __appname__, __version__, __author__, \
config_dir config_dir
from ebook_converter.ebooks.html_entities import html5_entities from ebook_converter.ebooks.html_entities import html5_entities
if False: if False:
# Prevent pyflakes from complaining # Prevent pyflakes from complaining
__appname__, islinux, __version__ __appname__, __version__, isfrozen, __author__, config_dir
isfrozen, __author__
isbsd, config_dir
def init_mimetypes(): def init_mimetypes():

View File

@@ -6,11 +6,10 @@ import os
import sys import sys
__appname__ = 'calibre' __appname__ = 'ebook-converter'
numeric_version = (4, 12, 0) numeric_version = (4, 12, 0)
__version__ = '.'.join(map(str, numeric_version)) __version__ = '.'.join([str(x) for x in numeric_version])
git_version = None __author__ = "foobar"
__author__ = "Kovid Goyal <kovid@kovidgoyal.net>"
''' '''
Various run time constants. Various run time constants.
@@ -18,16 +17,14 @@ Various run time constants.
_plat = sys.platform.lower() _plat = sys.platform.lower()
isosx = 'darwin' in _plat isosx = 'darwin' in _plat
isnewosx = isosx and getattr(sys, 'new_app_bundle', False) isnewosx = isosx and getattr(sys, 'new_app_bundle', False)
isfreebsd = 'freebsd' in _plat isfreebsd = 'freebsd' in _plat
isnetbsd = 'netbsd' in _plat isnetbsd = 'netbsd' in _plat
isdragonflybsd = 'dragonfly' in _plat isdragonflybsd = 'dragonfly' in _plat
isbsd = isfreebsd or isnetbsd or isdragonflybsd
ishaiku = 'haiku1' in _plat ishaiku = 'haiku1' in _plat
islinux = True isfrozen = hasattr(sys, 'frozen')
isfrozen = hasattr(sys, 'frozen') isunix = True
isunix = isosx or islinux or ishaiku
isportable = os.getenv('CALIBRE_PORTABLE_BUILD') is not None isportable = os.getenv('CALIBRE_PORTABLE_BUILD') is not None
isxp = isoldvista = False isxp = isoldvista = False
is64bit = sys.maxsize > (1 << 32) is64bit = sys.maxsize > (1 << 32)
@@ -40,7 +37,7 @@ TOC_DIALOG_APP_UID = 'com.calibre-ebook.toc-editor'
try: try:
preferred_encoding = locale.getpreferredencoding() preferred_encoding = locale.getpreferredencoding()
codecs.lookup(preferred_encoding) codecs.lookup(preferred_encoding)
except: except Exception:
preferred_encoding = 'utf-8' preferred_encoding = 'utf-8'
fcntl = importlib.import_module('fcntl') fcntl = importlib.import_module('fcntl')
@@ -214,12 +211,9 @@ del dv
def get_version(): def get_version():
'''Return version string for display to user ''' '''Return version string for display to user '''
if git_version is not None: v = __version__
v = git_version if numeric_version[-1] == 0:
else: v = v[:-2]
v = __version__
if numeric_version[-1] == 0:
v = v[:-2]
if is_running_from_develop: if is_running_from_develop:
v += '*' v += '*'

View File

@@ -121,7 +121,6 @@ def render_html_data(path_to_html, width, height):
result = {} result = {}
def report_error(text=''): def report_error(text=''):
__import__('pdb').set_trace()
print(f'Failed to render {path_to_html}') print(f'Failed to render {path_to_html}')
# file=sys.stderr) # file=sys.stderr)
if text: if text:

View File

@@ -2,10 +2,8 @@ import functools
import mimetypes import mimetypes
import os import os
import re import re
import tempfile
import urllib.parse import urllib.parse
from ebook_converter.constants_old import islinux, isbsd
from ebook_converter.customize.conversion import InputFormatPlugin from ebook_converter.customize.conversion import InputFormatPlugin
from ebook_converter.customize.conversion import OptionRecommendation from ebook_converter.customize.conversion import OptionRecommendation
from ebook_converter.utils.localization import get_lang from ebook_converter.utils.localization import get_lang
@@ -55,7 +53,6 @@ class HTMLInput(InputFormatPlugin):
def convert(self, stream, opts, file_ext, log, def convert(self, stream, opts, file_ext, log,
accelerators): accelerators):
self._is_case_sensitive = None
basedir = os.getcwd() basedir = os.getcwd()
self.opts = opts self.opts = opts
@@ -81,14 +78,6 @@ class HTMLInput(InputFormatPlugin):
return create_oebbook(log, stream.name, opts, return create_oebbook(log, stream.name, opts,
encoding=opts.input_encoding) encoding=opts.input_encoding)
def is_case_sensitive(self, path):
if getattr(self, '_is_case_sensitive', None) is not None:
return self._is_case_sensitive
if not path or not os.path.exists(path):
return islinux or isbsd
self._is_case_sensitive = not (os.path.exists(path.lower()) and os.path.exists(path.upper()))
return self._is_case_sensitive
def create_oebbook(self, htmlpath, basedir, opts, log, mi): def create_oebbook(self, htmlpath, basedir, opts, log, mi):
import uuid import uuid
from ebook_converter.ebooks.conversion.plumber import create_oebbook from ebook_converter.ebooks.conversion.plumber import create_oebbook
@@ -154,8 +143,6 @@ class HTMLInput(InputFormatPlugin):
self.log = log self.log = log
self.log('Normalizing filename cases') self.log('Normalizing filename cases')
for path, href in htmlfile_map.items(): for path, href in htmlfile_map.items():
if not self.is_case_sensitive(path):
path = path.lower()
self.added_resources[path] = href self.added_resources[path] = href
self.urlnormalize, self.DirContainer = urlnormalize, DirContainer self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
self.urldefrag = urllib.parse.urldefrag self.urldefrag = urllib.parse.urldefrag
@@ -252,8 +239,6 @@ class HTMLInput(InputFormatPlugin):
if os.path.isdir(link): if os.path.isdir(link):
self.log.warn(link_, 'is a link to a directory. Ignoring.') self.log.warn(link_, 'is a link to a directory. Ignoring.')
return link_ return link_
if not self.is_case_sensitive(tempfile.gettempdir()):
link = link.lower()
if link not in self.added_resources: if link not in self.added_resources:
bhref = os.path.basename(link) bhref = os.path.basename(link)
id, href = self.oeb.manifest.generate(id='added', href=sanitize_file_name(bhref)) id, href = self.oeb.manifest.generate(id='added', href=sanitize_file_name(bhref))

View File

@@ -1,27 +1,17 @@
""" """
Read meta information from PDF files Read meta information from PDF files
""" """
import os, subprocess, shutil, re import functools
from functools import partial import os
import re
import shutil
import subprocess
from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.ptempfile import TemporaryDirectory
from ebook_converter.ebooks.metadata import ( from ebook_converter.ebooks.metadata import (
MetaInformation, string_to_authors, check_isbn, check_doi) MetaInformation, string_to_authors, check_isbn, check_doi)
__license__ = 'GPL v3'
__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
def get_tools():
from ebook_converter.ebooks.pdf.pdftohtml import PDFTOHTML
base = os.path.dirname(PDFTOHTML)
suffix = ''
pdfinfo = os.path.join(base, 'pdfinfo') + suffix
pdftoppm = os.path.join(base, 'pdftoppm') + suffix
return pdfinfo, pdftoppm
def read_info(outputdir, get_cover): def read_info(outputdir, get_cover):
''' Read info dict and cover from a pdf file named src.pdf in outputdir. ''' Read info dict and cover from a pdf file named src.pdf in outputdir.
Note that this function changes the cwd to outputdir and is therefore not Note that this function changes the cwd to outputdir and is therefore not
@@ -29,7 +19,8 @@ def read_info(outputdir, get_cover):
way to pass unicode paths via command line arguments. This also ensures way to pass unicode paths via command line arguments. This also ensures
that if poppler crashes, no stale file handles are left for the original that if poppler crashes, no stale file handles are left for the original
file, only for src.pdf.''' file, only for src.pdf.'''
pdfinfo, pdftoppm = get_tools() pdfinfo = 'pdfinfo'
pdftoppm = 'pdftoppm'
source_file = os.path.join(outputdir, 'src.pdf') source_file = os.path.join(outputdir, 'src.pdf')
cover_file = os.path.join(outputdir, 'cover') cover_file = os.path.join(outputdir, 'cover')
ans = {} ans = {}
@@ -55,8 +46,8 @@ def read_info(outputdir, get_cover):
ans[field] = val.strip() ans[field] = val.strip()
# Now read XMP metadata # Now read XMP metadata
# Versions of poppler before 0.47.0 used to print out both the Info dict and # Versions of poppler before 0.47.0 used to print out both the Info dict
# XMP metadata packet together. However, since that changed in # and XMP metadata packet together. However, since that changed in
# https://cgit.freedesktop.org/poppler/poppler/commit/?id=c91483aceb1b640771f572cb3df9ad707e5cad0d # https://cgit.freedesktop.org/poppler/poppler/commit/?id=c91483aceb1b640771f572cb3df9ad707e5cad0d
# we can no longer rely on it. # we can no longer rely on it.
try: try:
@@ -77,13 +68,14 @@ def read_info(outputdir, get_cover):
subprocess.check_call([pdftoppm, '-singlefile', '-jpeg', subprocess.check_call([pdftoppm, '-singlefile', '-jpeg',
'-cropbox', source_file, cover_file]) '-cropbox', source_file, cover_file])
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
print('pdftoppm errored out with return code: {e.returncode}') print(f'pdftoppm errored out with return code: {e.returncode}')
return ans return ans
def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg', prefix='page-images'): def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg',
pdftoppm = get_tools()[1] prefix='page-images'):
pdftoppm = 'pdftoppm'
outputdir = os.path.abspath(outputdir) outputdir = os.path.abspath(outputdir)
args = {} args = {}
try: try:
@@ -92,11 +84,12 @@ def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg', pr
'-l', str(last), pdfpath, os.path.join(outputdir, prefix) '-l', str(last), pdfpath, os.path.join(outputdir, prefix)
], **args) ], **args)
except subprocess.CalledProcessError as e: except subprocess.CalledProcessError as e:
raise ValueError('Failed to render PDF, pdftoppm errorcode: %s'%e.returncode) raise ValueError('Failed to render PDF, pdftoppm errorcode: %s' %
e.returncode)
def is_pdf_encrypted(path_to_pdf): def is_pdf_encrypted(path_to_pdf):
pdfinfo = get_tools()[0] pdfinfo = 'pdfinfo'
raw = subprocess.check_output([pdfinfo, path_to_pdf]) raw = subprocess.check_output([pdfinfo, path_to_pdf])
q = re.search(br'^Encrypted:\s*(\S+)', raw, flags=re.MULTILINE) q = re.search(br'^Encrypted:\s*(\S+)', raw, flags=re.MULTILINE)
if q is not None: if q is not None:
@@ -149,7 +142,7 @@ def get_metadata(stream, cover=True):
# Look for recognizable identifiers in the info dict, if they were not # Look for recognizable identifiers in the info dict, if they were not
# found in the XMP metadata # found in the XMP metadata
for scheme, check_func in {'doi':check_doi, 'isbn':check_isbn}.items(): for scheme, check_func in {'doi': check_doi, 'isbn': check_isbn}.items():
if scheme not in mi.get_identifiers(): if scheme not in mi.get_identifiers():
for k, v in info.items(): for k, v in info.items():
if k != 'xmp_metadata': if k != 'xmp_metadata':
@@ -163,9 +156,7 @@ def get_metadata(stream, cover=True):
return mi return mi
get_quick_metadata = partial(get_metadata, cover=False) get_quick_metadata = functools.partial(get_metadata, cover=False)
#from ebook_converter.utils.podofo import set_metadata as podofo_set_metadata
def set_metadata(stream, mi): def set_metadata(stream, mi):

View File

@@ -3,12 +3,10 @@ import os
import re import re
import shutil import shutil
import subprocess import subprocess
import sys
from lxml import etree from lxml import etree
from ebook_converter import CurrentDir, xml_replace_entities from ebook_converter import CurrentDir, xml_replace_entities
from ebook_converter.constants_old import isbsd, islinux, isosx
from ebook_converter.ebooks import ConversionError, DRMError from ebook_converter.ebooks import ConversionError, DRMError
from ebook_converter.ebooks.chardet import xml_to_unicode from ebook_converter.ebooks.chardet import xml_to_unicode
from ebook_converter.ptempfile import PersistentTemporaryFile from ebook_converter.ptempfile import PersistentTemporaryFile
@@ -16,21 +14,10 @@ from ebook_converter.utils.cleantext import clean_xml_chars
from ebook_converter.utils.ipc import eintr_retry_call from ebook_converter.utils.ipc import eintr_retry_call
PDFTOHTML = 'pdftohtml'
def popen(cmd, **kw): def popen(cmd, **kw):
return subprocess.Popen(cmd, **kw) return subprocess.Popen(cmd, **kw)
if isosx and hasattr(sys, 'frameworks_dir'):
base = os.path.join(os.path.dirname(sys.frameworks_dir), 'utils.app',
'Contents', 'MacOS')
PDFTOHTML = os.path.join(base, PDFTOHTML)
if (islinux or isbsd) and getattr(sys, 'frozen', False):
PDFTOHTML = os.path.join(sys.executables_location, 'bin', 'pdftohtml')
def pdftohtml(output_dir, pdf_path, no_images, as_xml=False): def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
''' '''
Convert the pdf into html using the pdftohtml app. Convert the pdf into html using the pdftohtml app.
@@ -49,12 +36,9 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
def a(x): def a(x):
return os.path.basename(x) return os.path.basename(x)
exe = PDFTOHTML cmd = ['pdftohtml', '-enc', 'UTF-8', '-noframes', '-p', '-nomerge',
cmd = [exe, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge',
'-nodrm', a(pdfsrc), a(index)] '-nodrm', a(pdfsrc), a(index)]
if isbsd:
cmd.remove('-nodrm')
if no_images: if no_images:
cmd.append('-i') cmd.append('-i')
if as_xml: if as_xml:
@@ -105,11 +89,9 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
i.write(raw.encode('utf-8')) i.write(raw.encode('utf-8'))
cmd = [exe, '-f', '1', '-l', '1', '-xml', '-i', '-enc', 'UTF-8', cmd = ['pdftohtml', '-f', '1', '-l', '1', '-xml', '-i', '-enc',
'-noframes', '-p', '-nomerge', '-nodrm', '-q', '-stdout', 'UTF-8', '-noframes', '-p', '-nomerge', '-nodrm', '-q',
a(pdfsrc)] '-stdout', a(pdfsrc)]
if isbsd:
cmd.remove('-nodrm')
p = popen(cmd, stdout=subprocess.PIPE) p = popen(cmd, stdout=subprocess.PIPE)
raw = p.stdout.read().strip() raw = p.stdout.read().strip()
if p.wait() == 0 and raw: if p.wait() == 0 and raw:

View File

@@ -5,6 +5,7 @@ import builtins
import fcntl import fcntl
import locale import locale
import sys import sys
import threading
from ebook_converter import constants_old from ebook_converter import constants_old
@@ -22,7 +23,9 @@ if not _run_once:
try: try:
base_dir() base_dir()
except EnvironmentError: except EnvironmentError:
pass # Ignore this error during startup, so we can show a better error message to the user later. # Ignore this error during startup, so we can show a better error
# message to the user later.
pass
# #
# Convert command line arguments to unicode # Convert command line arguments to unicode
@@ -51,7 +54,6 @@ if not _run_once:
import ebook_converter.utils.resources as resources import ebook_converter.utils.resources as resources
resources resources
#
# Initialize locale # Initialize locale
# Import string as we do not want locale specific # Import string as we do not want locale specific
# string.whitespace/printable, on windows especially, this causes problems. # string.whitespace/printable, on windows especially, this causes problems.
@@ -87,28 +89,26 @@ if not _run_once:
bound_signal.connect(slot, **kw) bound_signal.connect(slot, **kw)
builtins.__dict__['connect_lambda'] = connect_lambda builtins.__dict__['connect_lambda'] = connect_lambda
if constants_old.islinux or constants_old.isosx or constants_old.isfreebsd: # Name all threads at the OS level created using the threading module, see
# Name all threads at the OS level created using the threading module, see # http://bugs.python.org/issue15500
# http://bugs.python.org/issue15500
import threading
orig_start = threading.Thread.start orig_start = threading.Thread.start
def new_start(self): def new_start(self):
orig_start(self) orig_start(self)
try: try:
name = self.name name = self.name
if not name or name.startswith('Thread-'): if not name or name.startswith('Thread-'):
name = self.__class__.__name__ name = self.__class__.__name__
if name == 'Thread': if name == 'Thread':
name = self.name name = self.name
if name: if name:
if isinstance(name, str): if isinstance(name, str):
name = name.encode('ascii', 'replace').decode('ascii') name = name.encode('ascii', 'replace').decode('ascii')
constants_old.plugins['speedup'][0].set_thread_name(name[:15]) constants_old.plugins['speedup'][0].set_thread_name(name[:15])
except Exception: except Exception:
pass # Don't care about failure to set name pass # Don't care about failure to set name
threading.Thread.start = new_start threading.Thread.start = new_start
def test_lopen(): def test_lopen():

View File

@@ -1,16 +1,4 @@
import errno import errno
import functools
import os
import sys
import threading
from ebook_converter import force_unicode
from ebook_converter.constants_old import filesystem_encoding
from ebook_converter.constants_old import islinux
from ebook_converter.utils.filenames import ascii_filename
VADDRESS = None
def eintr_retry_call(func, *args, **kwargs): def eintr_retry_call(func, *args, **kwargs):
@@ -21,47 +9,3 @@ def eintr_retry_call(func, *args, **kwargs):
if getattr(e, 'errno', None) == errno.EINTR: if getattr(e, 'errno', None) == errno.EINTR:
continue continue
raise raise
@functools.lru_cache()
def socket_address(which):
user = force_unicode(os.environ.get('USER') or os.path.basename(os.path.expanduser('~')), filesystem_encoding)
sock_name = '{}-calibre-{}.socket'.format(ascii_filename(user).replace(' ', '_'), which)
if islinux:
ans = '\0' + sock_name
else:
from tempfile import gettempdir
tmp = force_unicode(gettempdir(), filesystem_encoding)
ans = os.path.join(tmp, sock_name)
return ans
def gui_socket_address():
return socket_address('gui')
def viewer_socket_address():
return socket_address('viewer')
class RC(threading.Thread):
def __init__(self, print_error=True, socket_address=None):
self.print_error = print_error
self.socket_address = socket_address or gui_socket_address()
threading.Thread.__init__(self)
self.conn = None
self.daemon = True
def run(self):
from multiprocessing.connection import Client
self.done = False
try:
self.conn = Client(self.socket_address)
self.done = True
except Exception:
if self.print_error:
print('Failed to connect to address {}', file=sys.stderr).format(repr(self.socket_address))
import traceback
traceback.print_exc()

View File

@@ -1,6 +1,6 @@
[metadata] [metadata]
name = ebook-converter name = ebook-converter
version = 4.9.1 version = 4.12.0
summary = Convert ebook between different formats summary = Convert ebook between different formats
description-file = description-file =
README.rst README.rst