Move specific sanitize_file_name out of init module.

sanitize_file_name function has been moved to utility module filenames, where it logically belongs.
2026-04-06 12:53:34 +02:00 · 2020-10-13 19:29:25 +02:00
parent 847e133878
commit 250d0eeea7
4 changed files with 42 additions and 40 deletions
--- a/ebook_converter/init.py
+++ b/ebook_converter/init.py
@@ -9,40 +9,6 @@ from ebook_converter import constants_old
 from ebook_converter.ebooks.html_entities import html5_entities
 def sanitize_file_name(name, substitute='_'):
    """
    Sanitize the filename `name`. All invalid characters are replaced by
    `substitute`. The set of invalid characters is the union of the invalid
    characters in Windows, macOS and Linux. Also removes leading and trailing
    whitespace.
    **WARNING:** This function also replaces path separators, so only pass
    file names and not full paths to it.
    """
    if isinstance(name, bytes):
        name = name.decode(constants_old.filesystem_encoding, 'replace')
    if isinstance(substitute, bytes):
        substitute = substitute.decode(constants_old.filesystem_encoding,
                                       'replace')
    chars = (substitute
             if c in set(('\\', '|', '?', '*', '<', '"', ':', '>', '+', '/') +
                         tuple(map(chr, range(32)))) else c for c in name)
    one = ''.join(chars)
    one = re.sub(r'\s', ' ', one).strip()
    bname, ext = os.path.splitext(one)
    one = re.sub(r'^\.+$', '_', bname)
    one = one.replace('..', substitute)
    one += ext
    # Windows doesn't like path components that end with a period or space
    if one and one[-1] in ('.', ' '):
        one = one[:-1]+'_'
    # Names starting with a period are hidden on Unix
    if one.startswith('.'):
        one = '_' + one[1:]
    return one
 def fit_image(width, height, pwidth, pheight):
    """
    Fit image in box of width pwidth and height pheight.
--- a/ebook_converter/ebooks/oeb/polish/replace.py
+++ b/ebook_converter/ebooks/oeb/polish/replace.py
@@ -3,7 +3,7 @@ from functools import partial
 from collections import Counter, defaultdict
 import urllib.parse
-from ebook_converter import sanitize_file_name
+from ebook_converter.utils import filenames as fms
 from ebook_converter.ebooks.chardet import strip_encoding_declarations
 from ebook_converter.ebooks.oeb.base import css_text
 from ebook_converter.ebooks.oeb.polish.css import iter_declarations, remove_property_value
@@ -203,7 +203,7 @@ def rename_files(container, file_map):
 def replace_file(container, name, path, basename, force_mt=None):
    dirname, base = name.rpartition('/')[0::2]
-    nname = sanitize_file_name(basename)
+    nname = fms.sanitize_file_name(basename)
    if dirname:
        nname = dirname + '/' + nname
    with open(path, 'rb') as src:
--- a/ebook_converter/utils/filenames.py
+++ b/ebook_converter/utils/filenames.py
@@ -4,15 +4,51 @@ meaning as possible.
 """
 import errno
 import os
 import re
 import shutil
 from math import ceil
-from ebook_converter import force_unicode, sanitize_file_name
+from ebook_converter import constants_old
 from ebook_converter import force_unicode
 from ebook_converter.constants_old import (filesystem_encoding,
                                           preferred_encoding)
 from ebook_converter.utils.localization import get_udc
 def sanitize_file_name(name, substitute='_'):
    """
    Sanitize the filename `name`. All invalid characters are replaced by
    `substitute`. The set of invalid characters is the union of the invalid
    characters in Windows, macOS and Linux. Also removes leading and trailing
    whitespace.
    **WARNING:** This function also replaces path separators, so only pass
    file names and not full paths to it.
    """
    if isinstance(name, bytes):
        name = name.decode(constants_old.filesystem_encoding, 'replace')
    if isinstance(substitute, bytes):
        substitute = substitute.decode(constants_old.filesystem_encoding,
                                       'replace')
    chars = (substitute
             if c in set(('\\', '|', '?', '*', '<', '"', ':', '>', '+', '/') +
                         tuple(map(chr, range(32)))) else c for c in name)
    one = ''.join(chars)
    one = re.sub(r'\s', ' ', one).strip()
    bname, ext = os.path.splitext(one)
    one = re.sub(r'^\.+$', '_', bname)
    one = one.replace('..', substitute)
    one += ext
    # Windows doesn't like path components that end with a period or space
    if one and one[-1] in ('.', ' '):
        one = one[:-1]+'_'
    # Names starting with a period are hidden on Unix
    if one.startswith('.'):
        one = '_' + one[1:]
    return one
 def ascii_text(orig):
    udc = get_udc()
    try:
--- a/ebook_converter/utils/zipfile.py
+++ b/ebook_converter/utils/zipfile.py
@@ -7,7 +7,7 @@ import binascii
 from contextlib import closing
 from tempfile import SpooledTemporaryFile
-from ebook_converter import sanitize_file_name
+from ebook_converter.utils import filenames as fms
 from ebook_converter.constants_old import filesystem_encoding
 from ebook_converter.ebooks.chardet import detect
 from ebook_converter.polyglot.builtins import as_bytes
@@ -1135,7 +1135,7 @@ class ZipFile:
                os.makedirs(upperdirs)
            except:  # Added by Kovid
                targetpath = os.path.join(base_target,
-                        sanitize_file_name(fname))
+                        fms.sanitize_file_name(fname))
                upperdirs = os.path.dirname(targetpath)
                if upperdirs and not os.path.exists(upperdirs):
                    os.makedirs(upperdirs)
@@ -1156,7 +1156,7 @@ class ZipFile:
                except:
                    # Try sanitizing the file name to remove invalid characters
                    components = list(os.path.split(targetpath))
-                    components[-1] = sanitize_file_name(components[-1])
+                    components[-1] = fms.sanitize_file_name(components[-1])
                    targetpath = os.sep.join(components)
                    with open(targetpath, 'wb') as target:
                        shutil.copyfileobj(source, target)