From 45b6bb5b2cc151e36c4b299c5b8540ed615cca26 Mon Sep 17 00:00:00 2001 From: gryf Date: Fri, 17 Jul 2020 18:59:45 +0200 Subject: [PATCH] Cleanup, cleanup --- README.rst | 5 +- ebook_converter/__init__.py | 8 +-- ebook_converter/constants_old.py | 28 ++++------ ebook_converter/ebooks/__init__.py | 1 - .../ebooks/conversion/plugins/html_input.py | 15 ----- ebook_converter/ebooks/metadata/pdf.py | 45 ++++++--------- ebook_converter/ebooks/pdf/pdftohtml.py | 26 ++------- ebook_converter/startup.py | 44 +++++++-------- ebook_converter/utils/ipc/__init__.py | 56 ------------------- setup.cfg | 2 +- 10 files changed, 63 insertions(+), 167 deletions(-) diff --git a/README.rst b/README.rst index 446309c..d125154 100644 --- a/README.rst +++ b/README.rst @@ -18,6 +18,8 @@ To build and run ebook converter, you'll need: - Python 3.6 or newer - `Liberation fonts`_ - setuptools +- ``pdftohtml``, ``pdfinfo`` and ``pdftoppm`` from `poppler`_ project for + conversion from PDF available in ``$PATH`` No Python2 support. Even if Calibre probably still is able to run on Python2, I do not have an intention to support it. @@ -101,7 +103,7 @@ managers), i.e: .. code:: shell-session - $ virtualenv venv + $ python -m venv venv $ . venv/bin/activate (venv) $ git clone https://github.com/gryf/ebook-converter (venv) $ cd ebook-converter @@ -128,3 +130,4 @@ for details. .. _pypi: https://pypi.python.org .. _Liberation fonts: https://github.com/liberationfonts/liberation-fonts .. _Kindle periodical: https://sellercentral.amazon.com/gp/help/external/help.html?itemID=202047960&language=en-US +.. _poppler: https://poppler.freedesktop.org/ diff --git a/ebook_converter/__init__.py b/ebook_converter/__init__.py index ae80283..14d6c6e 100644 --- a/ebook_converter/__init__.py +++ b/ebook_converter/__init__.py @@ -10,17 +10,15 @@ import sys from functools import partial from ebook_converter import constants_old -from ebook_converter.constants_old import islinux, isfrozen, \ - isbsd, __appname__, __version__, __author__, \ +from ebook_converter.constants_old import isfrozen, \ + __appname__, __version__, __author__, \ config_dir from ebook_converter.ebooks.html_entities import html5_entities if False: # Prevent pyflakes from complaining - __appname__, islinux, __version__ - isfrozen, __author__ - isbsd, config_dir + __appname__, __version__, isfrozen, __author__, config_dir def init_mimetypes(): diff --git a/ebook_converter/constants_old.py b/ebook_converter/constants_old.py index 6526cce..896343b 100644 --- a/ebook_converter/constants_old.py +++ b/ebook_converter/constants_old.py @@ -6,11 +6,10 @@ import os import sys -__appname__ = 'calibre' +__appname__ = 'ebook-converter' numeric_version = (4, 12, 0) -__version__ = '.'.join(map(str, numeric_version)) -git_version = None -__author__ = "Kovid Goyal " +__version__ = '.'.join([str(x) for x in numeric_version]) +__author__ = "foobar" ''' Various run time constants. @@ -18,16 +17,14 @@ Various run time constants. _plat = sys.platform.lower() -isosx = 'darwin' in _plat -isnewosx = isosx and getattr(sys, 'new_app_bundle', False) +isosx = 'darwin' in _plat +isnewosx = isosx and getattr(sys, 'new_app_bundle', False) isfreebsd = 'freebsd' in _plat isnetbsd = 'netbsd' in _plat isdragonflybsd = 'dragonfly' in _plat -isbsd = isfreebsd or isnetbsd or isdragonflybsd ishaiku = 'haiku1' in _plat -islinux = True -isfrozen = hasattr(sys, 'frozen') -isunix = isosx or islinux or ishaiku +isfrozen = hasattr(sys, 'frozen') +isunix = True isportable = os.getenv('CALIBRE_PORTABLE_BUILD') is not None isxp = isoldvista = False is64bit = sys.maxsize > (1 << 32) @@ -40,7 +37,7 @@ TOC_DIALOG_APP_UID = 'com.calibre-ebook.toc-editor' try: preferred_encoding = locale.getpreferredencoding() codecs.lookup(preferred_encoding) -except: +except Exception: preferred_encoding = 'utf-8' fcntl = importlib.import_module('fcntl') @@ -214,12 +211,9 @@ del dv def get_version(): '''Return version string for display to user ''' - if git_version is not None: - v = git_version - else: - v = __version__ - if numeric_version[-1] == 0: - v = v[:-2] + v = __version__ + if numeric_version[-1] == 0: + v = v[:-2] if is_running_from_develop: v += '*' diff --git a/ebook_converter/ebooks/__init__.py b/ebook_converter/ebooks/__init__.py index e2a4eef..22d6e3b 100644 --- a/ebook_converter/ebooks/__init__.py +++ b/ebook_converter/ebooks/__init__.py @@ -121,7 +121,6 @@ def render_html_data(path_to_html, width, height): result = {} def report_error(text=''): - __import__('pdb').set_trace() print(f'Failed to render {path_to_html}') # file=sys.stderr) if text: diff --git a/ebook_converter/ebooks/conversion/plugins/html_input.py b/ebook_converter/ebooks/conversion/plugins/html_input.py index 637030d..d6f0481 100644 --- a/ebook_converter/ebooks/conversion/plugins/html_input.py +++ b/ebook_converter/ebooks/conversion/plugins/html_input.py @@ -2,10 +2,8 @@ import functools import mimetypes import os import re -import tempfile import urllib.parse -from ebook_converter.constants_old import islinux, isbsd from ebook_converter.customize.conversion import InputFormatPlugin from ebook_converter.customize.conversion import OptionRecommendation from ebook_converter.utils.localization import get_lang @@ -55,7 +53,6 @@ class HTMLInput(InputFormatPlugin): def convert(self, stream, opts, file_ext, log, accelerators): - self._is_case_sensitive = None basedir = os.getcwd() self.opts = opts @@ -81,14 +78,6 @@ class HTMLInput(InputFormatPlugin): return create_oebbook(log, stream.name, opts, encoding=opts.input_encoding) - def is_case_sensitive(self, path): - if getattr(self, '_is_case_sensitive', None) is not None: - return self._is_case_sensitive - if not path or not os.path.exists(path): - return islinux or isbsd - self._is_case_sensitive = not (os.path.exists(path.lower()) and os.path.exists(path.upper())) - return self._is_case_sensitive - def create_oebbook(self, htmlpath, basedir, opts, log, mi): import uuid from ebook_converter.ebooks.conversion.plumber import create_oebbook @@ -154,8 +143,6 @@ class HTMLInput(InputFormatPlugin): self.log = log self.log('Normalizing filename cases') for path, href in htmlfile_map.items(): - if not self.is_case_sensitive(path): - path = path.lower() self.added_resources[path] = href self.urlnormalize, self.DirContainer = urlnormalize, DirContainer self.urldefrag = urllib.parse.urldefrag @@ -252,8 +239,6 @@ class HTMLInput(InputFormatPlugin): if os.path.isdir(link): self.log.warn(link_, 'is a link to a directory. Ignoring.') return link_ - if not self.is_case_sensitive(tempfile.gettempdir()): - link = link.lower() if link not in self.added_resources: bhref = os.path.basename(link) id, href = self.oeb.manifest.generate(id='added', href=sanitize_file_name(bhref)) diff --git a/ebook_converter/ebooks/metadata/pdf.py b/ebook_converter/ebooks/metadata/pdf.py index 984cf6f..0b1ac1f 100644 --- a/ebook_converter/ebooks/metadata/pdf.py +++ b/ebook_converter/ebooks/metadata/pdf.py @@ -1,27 +1,17 @@ """ Read meta information from PDF files """ -import os, subprocess, shutil, re -from functools import partial +import functools +import os +import re +import shutil +import subprocess from ebook_converter.ptempfile import TemporaryDirectory from ebook_converter.ebooks.metadata import ( MetaInformation, string_to_authors, check_isbn, check_doi) -__license__ = 'GPL v3' -__copyright__ = '2008, Kovid Goyal ' - - -def get_tools(): - from ebook_converter.ebooks.pdf.pdftohtml import PDFTOHTML - base = os.path.dirname(PDFTOHTML) - suffix = '' - pdfinfo = os.path.join(base, 'pdfinfo') + suffix - pdftoppm = os.path.join(base, 'pdftoppm') + suffix - return pdfinfo, pdftoppm - - def read_info(outputdir, get_cover): ''' Read info dict and cover from a pdf file named src.pdf in outputdir. Note that this function changes the cwd to outputdir and is therefore not @@ -29,7 +19,8 @@ def read_info(outputdir, get_cover): way to pass unicode paths via command line arguments. This also ensures that if poppler crashes, no stale file handles are left for the original file, only for src.pdf.''' - pdfinfo, pdftoppm = get_tools() + pdfinfo = 'pdfinfo' + pdftoppm = 'pdftoppm' source_file = os.path.join(outputdir, 'src.pdf') cover_file = os.path.join(outputdir, 'cover') ans = {} @@ -55,8 +46,8 @@ def read_info(outputdir, get_cover): ans[field] = val.strip() # Now read XMP metadata - # Versions of poppler before 0.47.0 used to print out both the Info dict and - # XMP metadata packet together. However, since that changed in + # Versions of poppler before 0.47.0 used to print out both the Info dict + # and XMP metadata packet together. However, since that changed in # https://cgit.freedesktop.org/poppler/poppler/commit/?id=c91483aceb1b640771f572cb3df9ad707e5cad0d # we can no longer rely on it. try: @@ -77,13 +68,14 @@ def read_info(outputdir, get_cover): subprocess.check_call([pdftoppm, '-singlefile', '-jpeg', '-cropbox', source_file, cover_file]) except subprocess.CalledProcessError as e: - print('pdftoppm errored out with return code: {e.returncode}') + print(f'pdftoppm errored out with return code: {e.returncode}') return ans -def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg', prefix='page-images'): - pdftoppm = get_tools()[1] +def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg', + prefix='page-images'): + pdftoppm = 'pdftoppm' outputdir = os.path.abspath(outputdir) args = {} try: @@ -92,11 +84,12 @@ def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg', pr '-l', str(last), pdfpath, os.path.join(outputdir, prefix) ], **args) except subprocess.CalledProcessError as e: - raise ValueError('Failed to render PDF, pdftoppm errorcode: %s'%e.returncode) + raise ValueError('Failed to render PDF, pdftoppm errorcode: %s' % + e.returncode) def is_pdf_encrypted(path_to_pdf): - pdfinfo = get_tools()[0] + pdfinfo = 'pdfinfo' raw = subprocess.check_output([pdfinfo, path_to_pdf]) q = re.search(br'^Encrypted:\s*(\S+)', raw, flags=re.MULTILINE) if q is not None: @@ -149,7 +142,7 @@ def get_metadata(stream, cover=True): # Look for recognizable identifiers in the info dict, if they were not # found in the XMP metadata - for scheme, check_func in {'doi':check_doi, 'isbn':check_isbn}.items(): + for scheme, check_func in {'doi': check_doi, 'isbn': check_isbn}.items(): if scheme not in mi.get_identifiers(): for k, v in info.items(): if k != 'xmp_metadata': @@ -163,9 +156,7 @@ def get_metadata(stream, cover=True): return mi -get_quick_metadata = partial(get_metadata, cover=False) - -#from ebook_converter.utils.podofo import set_metadata as podofo_set_metadata +get_quick_metadata = functools.partial(get_metadata, cover=False) def set_metadata(stream, mi): diff --git a/ebook_converter/ebooks/pdf/pdftohtml.py b/ebook_converter/ebooks/pdf/pdftohtml.py index 1f62166..8c8b03f 100644 --- a/ebook_converter/ebooks/pdf/pdftohtml.py +++ b/ebook_converter/ebooks/pdf/pdftohtml.py @@ -3,12 +3,10 @@ import os import re import shutil import subprocess -import sys from lxml import etree from ebook_converter import CurrentDir, xml_replace_entities -from ebook_converter.constants_old import isbsd, islinux, isosx from ebook_converter.ebooks import ConversionError, DRMError from ebook_converter.ebooks.chardet import xml_to_unicode from ebook_converter.ptempfile import PersistentTemporaryFile @@ -16,21 +14,10 @@ from ebook_converter.utils.cleantext import clean_xml_chars from ebook_converter.utils.ipc import eintr_retry_call -PDFTOHTML = 'pdftohtml' - - def popen(cmd, **kw): return subprocess.Popen(cmd, **kw) -if isosx and hasattr(sys, 'frameworks_dir'): - base = os.path.join(os.path.dirname(sys.frameworks_dir), 'utils.app', - 'Contents', 'MacOS') - PDFTOHTML = os.path.join(base, PDFTOHTML) -if (islinux or isbsd) and getattr(sys, 'frozen', False): - PDFTOHTML = os.path.join(sys.executables_location, 'bin', 'pdftohtml') - - def pdftohtml(output_dir, pdf_path, no_images, as_xml=False): ''' Convert the pdf into html using the pdftohtml app. @@ -49,12 +36,9 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False): def a(x): return os.path.basename(x) - exe = PDFTOHTML - cmd = [exe, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge', + cmd = ['pdftohtml', '-enc', 'UTF-8', '-noframes', '-p', '-nomerge', '-nodrm', a(pdfsrc), a(index)] - if isbsd: - cmd.remove('-nodrm') if no_images: cmd.append('-i') if as_xml: @@ -105,11 +89,9 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False): i.write(raw.encode('utf-8')) - cmd = [exe, '-f', '1', '-l', '1', '-xml', '-i', '-enc', 'UTF-8', - '-noframes', '-p', '-nomerge', '-nodrm', '-q', '-stdout', - a(pdfsrc)] - if isbsd: - cmd.remove('-nodrm') + cmd = ['pdftohtml', '-f', '1', '-l', '1', '-xml', '-i', '-enc', + 'UTF-8', '-noframes', '-p', '-nomerge', '-nodrm', '-q', + '-stdout', a(pdfsrc)] p = popen(cmd, stdout=subprocess.PIPE) raw = p.stdout.read().strip() if p.wait() == 0 and raw: diff --git a/ebook_converter/startup.py b/ebook_converter/startup.py index b1227ca..a440577 100644 --- a/ebook_converter/startup.py +++ b/ebook_converter/startup.py @@ -5,6 +5,7 @@ import builtins import fcntl import locale import sys +import threading from ebook_converter import constants_old @@ -22,7 +23,9 @@ if not _run_once: try: base_dir() except EnvironmentError: - pass # Ignore this error during startup, so we can show a better error message to the user later. + # Ignore this error during startup, so we can show a better error + # message to the user later. + pass # # Convert command line arguments to unicode @@ -51,7 +54,6 @@ if not _run_once: import ebook_converter.utils.resources as resources resources - # # Initialize locale # Import string as we do not want locale specific # string.whitespace/printable, on windows especially, this causes problems. @@ -87,28 +89,26 @@ if not _run_once: bound_signal.connect(slot, **kw) builtins.__dict__['connect_lambda'] = connect_lambda - if constants_old.islinux or constants_old.isosx or constants_old.isfreebsd: - # Name all threads at the OS level created using the threading module, see - # http://bugs.python.org/issue15500 - import threading + # Name all threads at the OS level created using the threading module, see + # http://bugs.python.org/issue15500 - orig_start = threading.Thread.start + orig_start = threading.Thread.start - def new_start(self): - orig_start(self) - try: - name = self.name - if not name or name.startswith('Thread-'): - name = self.__class__.__name__ - if name == 'Thread': - name = self.name - if name: - if isinstance(name, str): - name = name.encode('ascii', 'replace').decode('ascii') - constants_old.plugins['speedup'][0].set_thread_name(name[:15]) - except Exception: - pass # Don't care about failure to set name - threading.Thread.start = new_start + def new_start(self): + orig_start(self) + try: + name = self.name + if not name or name.startswith('Thread-'): + name = self.__class__.__name__ + if name == 'Thread': + name = self.name + if name: + if isinstance(name, str): + name = name.encode('ascii', 'replace').decode('ascii') + constants_old.plugins['speedup'][0].set_thread_name(name[:15]) + except Exception: + pass # Don't care about failure to set name + threading.Thread.start = new_start def test_lopen(): diff --git a/ebook_converter/utils/ipc/__init__.py b/ebook_converter/utils/ipc/__init__.py index 7285615..add5650 100644 --- a/ebook_converter/utils/ipc/__init__.py +++ b/ebook_converter/utils/ipc/__init__.py @@ -1,16 +1,4 @@ import errno -import functools -import os -import sys -import threading - -from ebook_converter import force_unicode -from ebook_converter.constants_old import filesystem_encoding -from ebook_converter.constants_old import islinux -from ebook_converter.utils.filenames import ascii_filename - - -VADDRESS = None def eintr_retry_call(func, *args, **kwargs): @@ -21,47 +9,3 @@ def eintr_retry_call(func, *args, **kwargs): if getattr(e, 'errno', None) == errno.EINTR: continue raise - - -@functools.lru_cache() -def socket_address(which): - - user = force_unicode(os.environ.get('USER') or os.path.basename(os.path.expanduser('~')), filesystem_encoding) - sock_name = '{}-calibre-{}.socket'.format(ascii_filename(user).replace(' ', '_'), which) - if islinux: - ans = '\0' + sock_name - else: - from tempfile import gettempdir - tmp = force_unicode(gettempdir(), filesystem_encoding) - ans = os.path.join(tmp, sock_name) - return ans - - -def gui_socket_address(): - return socket_address('gui') - - -def viewer_socket_address(): - return socket_address('viewer') - - -class RC(threading.Thread): - - def __init__(self, print_error=True, socket_address=None): - self.print_error = print_error - self.socket_address = socket_address or gui_socket_address() - threading.Thread.__init__(self) - self.conn = None - self.daemon = True - - def run(self): - from multiprocessing.connection import Client - self.done = False - try: - self.conn = Client(self.socket_address) - self.done = True - except Exception: - if self.print_error: - print('Failed to connect to address {}', file=sys.stderr).format(repr(self.socket_address)) - import traceback - traceback.print_exc() diff --git a/setup.cfg b/setup.cfg index 0f8352b..d41409e 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,6 @@ [metadata] name = ebook-converter -version = 4.9.1 +version = 4.12.0 summary = Convert ebook between different formats description-file = README.rst