Cleanup, cleanup

2026-05-02 20:00:52 +02:00 · 2020-07-17 18:59:45 +02:00
parent 5ac8451668
commit 45b6bb5b2c
10 changed files with 63 additions and 167 deletions
@@ -121,7 +121,6 @@ def render_html_data(path_to_html, width, height):
    result = {}

    def report_error(text=''):
-        __import__('pdb').set_trace()
        print(f'Failed to render {path_to_html}')
        # file=sys.stderr)
        if text:
@@ -2,10 +2,8 @@ import functools
 import mimetypes
 import os
 import re
-import tempfile
 import urllib.parse

-from ebook_converter.constants_old import islinux, isbsd
 from ebook_converter.customize.conversion import InputFormatPlugin
 from ebook_converter.customize.conversion import OptionRecommendation
 from ebook_converter.utils.localization import get_lang
@@ -55,7 +53,6 @@ class HTMLInput(InputFormatPlugin):

    def convert(self, stream, opts, file_ext, log,
                accelerators):
-        self._is_case_sensitive = None
        basedir = os.getcwd()
        self.opts = opts

@@ -81,14 +78,6 @@ class HTMLInput(InputFormatPlugin):
        return create_oebbook(log, stream.name, opts,
                encoding=opts.input_encoding)

-    def is_case_sensitive(self, path):
-        if getattr(self, '_is_case_sensitive', None) is not None:
-            return self._is_case_sensitive
-        if not path or not os.path.exists(path):
-            return islinux or isbsd
-        self._is_case_sensitive = not (os.path.exists(path.lower()) and os.path.exists(path.upper()))
-        return self._is_case_sensitive
-
    def create_oebbook(self, htmlpath, basedir, opts, log, mi):
        import uuid
        from ebook_converter.ebooks.conversion.plumber import create_oebbook
@@ -154,8 +143,6 @@ class HTMLInput(InputFormatPlugin):
        self.log = log
        self.log('Normalizing filename cases')
        for path, href in htmlfile_map.items():
-            if not self.is_case_sensitive(path):
-                path = path.lower()
            self.added_resources[path] = href
        self.urlnormalize, self.DirContainer = urlnormalize, DirContainer
        self.urldefrag = urllib.parse.urldefrag
@@ -252,8 +239,6 @@ class HTMLInput(InputFormatPlugin):
        if os.path.isdir(link):
            self.log.warn(link_, 'is a link to a directory. Ignoring.')
            return link_
-        if not self.is_case_sensitive(tempfile.gettempdir()):
-            link = link.lower()
        if link not in self.added_resources:
            bhref = os.path.basename(link)
            id, href = self.oeb.manifest.generate(id='added', href=sanitize_file_name(bhref))
@@ -1,27 +1,17 @@
 """
 Read meta information from PDF files
 """
-import os, subprocess, shutil, re
-from functools import partial
+import functools
+import os
+import re
+import shutil
+import subprocess

 from ebook_converter.ptempfile import TemporaryDirectory
 from ebook_converter.ebooks.metadata import (
    MetaInformation, string_to_authors, check_isbn, check_doi)


-__license__ = 'GPL v3'
-__copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
-
-
-def get_tools():
-    from ebook_converter.ebooks.pdf.pdftohtml import PDFTOHTML
-    base = os.path.dirname(PDFTOHTML)
-    suffix = ''
-    pdfinfo = os.path.join(base, 'pdfinfo') + suffix
-    pdftoppm = os.path.join(base, 'pdftoppm') + suffix
-    return pdfinfo, pdftoppm
-
-
 def read_info(outputdir, get_cover):
    ''' Read info dict and cover from a pdf file named src.pdf in outputdir.
    Note that this function changes the cwd to outputdir and is therefore not
@@ -29,7 +19,8 @@ def read_info(outputdir, get_cover):
    way to pass unicode paths via command line arguments. This also ensures
    that if poppler crashes, no stale file handles are left for the original
    file, only for src.pdf.'''
-    pdfinfo, pdftoppm = get_tools()
+    pdfinfo = 'pdfinfo'
+    pdftoppm = 'pdftoppm'
    source_file = os.path.join(outputdir, 'src.pdf')
    cover_file = os.path.join(outputdir, 'cover')
    ans = {}
@@ -55,8 +46,8 @@ def read_info(outputdir, get_cover):
            ans[field] = val.strip()

    # Now read XMP metadata
-    # Versions of poppler before 0.47.0 used to print out both the Info dict and
-    # XMP metadata packet together. However, since that changed in
+    # Versions of poppler before 0.47.0 used to print out both the Info dict
+    # and XMP metadata packet together. However, since that changed in
    # https://cgit.freedesktop.org/poppler/poppler/commit/?id=c91483aceb1b640771f572cb3df9ad707e5cad0d
    # we can no longer rely on it.
    try:
@@ -77,13 +68,14 @@ def read_info(outputdir, get_cover):
            subprocess.check_call([pdftoppm, '-singlefile', '-jpeg',
                                   '-cropbox', source_file, cover_file])
        except subprocess.CalledProcessError as e:
-            print('pdftoppm errored out with return code: {e.returncode}')
+            print(f'pdftoppm errored out with return code: {e.returncode}')

    return ans


-def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg', prefix='page-images'):
-    pdftoppm = get_tools()[1]
+def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg',
+                prefix='page-images'):
+    pdftoppm = 'pdftoppm'
    outputdir = os.path.abspath(outputdir)
    args = {}
    try:
@@ -92,11 +84,12 @@ def page_images(pdfpath, outputdir='.', first=1, last=1, image_format='jpeg', pr
            '-l', str(last), pdfpath, os.path.join(outputdir, prefix)
        ], **args)
    except subprocess.CalledProcessError as e:
-        raise ValueError('Failed to render PDF, pdftoppm errorcode: %s'%e.returncode)
+        raise ValueError('Failed to render PDF, pdftoppm errorcode: %s' %
+                         e.returncode)


 def is_pdf_encrypted(path_to_pdf):
-    pdfinfo = get_tools()[0]
+    pdfinfo = 'pdfinfo'
    raw = subprocess.check_output([pdfinfo, path_to_pdf])
    q = re.search(br'^Encrypted:\s*(\S+)', raw, flags=re.MULTILINE)
    if q is not None:
@@ -149,7 +142,7 @@ def get_metadata(stream, cover=True):

    # Look for recognizable identifiers in the info dict, if they were not
    # found in the XMP metadata
-    for scheme, check_func in {'doi':check_doi, 'isbn':check_isbn}.items():
+    for scheme, check_func in {'doi': check_doi, 'isbn': check_isbn}.items():
        if scheme not in mi.get_identifiers():
            for k, v in info.items():
                if k != 'xmp_metadata':
@@ -163,9 +156,7 @@ def get_metadata(stream, cover=True):
    return mi


-get_quick_metadata = partial(get_metadata, cover=False)
-
-#from ebook_converter.utils.podofo import set_metadata as podofo_set_metadata
+get_quick_metadata = functools.partial(get_metadata, cover=False)


 def set_metadata(stream, mi):
@@ -3,12 +3,10 @@ import os
 import re
 import shutil
 import subprocess
-import sys

 from lxml import etree

 from ebook_converter import CurrentDir, xml_replace_entities
-from ebook_converter.constants_old import isbsd, islinux, isosx
 from ebook_converter.ebooks import ConversionError, DRMError
 from ebook_converter.ebooks.chardet import xml_to_unicode
 from ebook_converter.ptempfile import PersistentTemporaryFile
@@ -16,21 +14,10 @@ from ebook_converter.utils.cleantext import clean_xml_chars
 from ebook_converter.utils.ipc import eintr_retry_call


-PDFTOHTML = 'pdftohtml'
-
-
 def popen(cmd, **kw):
    return subprocess.Popen(cmd, **kw)


-if isosx and hasattr(sys, 'frameworks_dir'):
-    base = os.path.join(os.path.dirname(sys.frameworks_dir), 'utils.app',
-                        'Contents', 'MacOS')
-    PDFTOHTML = os.path.join(base, PDFTOHTML)
-if (islinux or isbsd) and getattr(sys, 'frozen', False):
-    PDFTOHTML = os.path.join(sys.executables_location, 'bin', 'pdftohtml')
-
-
 def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
    '''
    Convert the pdf into html using the pdftohtml app.
@@ -49,12 +36,9 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):
        def a(x):
            return os.path.basename(x)

-        exe = PDFTOHTML
-        cmd = [exe, '-enc', 'UTF-8', '-noframes', '-p', '-nomerge',
+        cmd = ['pdftohtml', '-enc', 'UTF-8', '-noframes', '-p', '-nomerge',
               '-nodrm', a(pdfsrc), a(index)]

-        if isbsd:
-            cmd.remove('-nodrm')
        if no_images:
            cmd.append('-i')
        if as_xml:
@@ -105,11 +89,9 @@ def pdftohtml(output_dir, pdf_path, no_images, as_xml=False):

                i.write(raw.encode('utf-8'))

-            cmd = [exe, '-f', '1', '-l', '1', '-xml', '-i', '-enc', 'UTF-8',
-                   '-noframes', '-p', '-nomerge', '-nodrm', '-q', '-stdout',
-                   a(pdfsrc)]
-            if isbsd:
-                cmd.remove('-nodrm')
+            cmd = ['pdftohtml', '-f', '1', '-l', '1', '-xml', '-i', '-enc',
+                   'UTF-8', '-noframes', '-p', '-nomerge', '-nodrm', '-q',
+                   '-stdout', a(pdfsrc)]
            p = popen(cmd, stdout=subprocess.PIPE)
            raw = p.stdout.read().strip()
            if p.wait() == 0 and raw: