Move specific sanitize_file_name out of init module.

sanitize_file_name function has been moved to utility module filenames, where it logically belongs.
2026-03-19 00:13:52 +01:00 · 2020-10-13 19:29:25 +02:00
parent 847e133878
commit 250d0eeea7
4 changed files with 42 additions and 40 deletions
--- a/ebook_converter/init.py
+++ b/ebook_converter/init.py
@@ -9,40 +9,6 @@ from ebook_converter import constants_old
 from ebook_converter.ebooks.html_entities import html5_entities


-def sanitize_file_name(name, substitute='_'):
-    """
-    Sanitize the filename `name`. All invalid characters are replaced by
-    `substitute`. The set of invalid characters is the union of the invalid
-    characters in Windows, macOS and Linux. Also removes leading and trailing
-    whitespace.
-
-    **WARNING:** This function also replaces path separators, so only pass
-    file names and not full paths to it.
-    """
-
-    if isinstance(name, bytes):
-        name = name.decode(constants_old.filesystem_encoding, 'replace')
-    if isinstance(substitute, bytes):
-        substitute = substitute.decode(constants_old.filesystem_encoding,
-                                       'replace')
-    chars = (substitute
-             if c in set(('\\', '|', '?', '*', '<', '"', ':', '>', '+', '/') +
-                         tuple(map(chr, range(32)))) else c for c in name)
-    one = ''.join(chars)
-    one = re.sub(r'\s', ' ', one).strip()
-    bname, ext = os.path.splitext(one)
-    one = re.sub(r'^\.+$', '_', bname)
-    one = one.replace('..', substitute)
-    one += ext
-    # Windows doesn't like path components that end with a period or space
-    if one and one[-1] in ('.', ' '):
-        one = one[:-1]+'_'
-    # Names starting with a period are hidden on Unix
-    if one.startswith('.'):
-        one = '_' + one[1:]
-    return one
-
-
 def fit_image(width, height, pwidth, pheight):
    """
    Fit image in box of width pwidth and height pheight.
--- a/ebook_converter/ebooks/oeb/polish/replace.py
+++ b/ebook_converter/ebooks/oeb/polish/replace.py
@@ -3,7 +3,7 @@ from functools import partial
 from collections import Counter, defaultdict
 import urllib.parse

-from ebook_converter import sanitize_file_name
+from ebook_converter.utils import filenames as fms
 from ebook_converter.ebooks.chardet import strip_encoding_declarations
 from ebook_converter.ebooks.oeb.base import css_text
 from ebook_converter.ebooks.oeb.polish.css import iter_declarations, remove_property_value
@@ -203,7 +203,7 @@ def rename_files(container, file_map):

 def replace_file(container, name, path, basename, force_mt=None):
    dirname, base = name.rpartition('/')[0::2]
-    nname = sanitize_file_name(basename)
+    nname = fms.sanitize_file_name(basename)
    if dirname:
        nname = dirname + '/' + nname
    with open(path, 'rb') as src:
--- a/ebook_converter/utils/filenames.py
+++ b/ebook_converter/utils/filenames.py
@@ -4,15 +4,51 @@ meaning as possible.
 """
 import errno
 import os
+import re
 import shutil
 from math import ceil

-from ebook_converter import force_unicode, sanitize_file_name
+from ebook_converter import constants_old
+from ebook_converter import force_unicode
 from ebook_converter.constants_old import (filesystem_encoding,
                                           preferred_encoding)
 from ebook_converter.utils.localization import get_udc


+def sanitize_file_name(name, substitute='_'):
+    """
+    Sanitize the filename `name`. All invalid characters are replaced by
+    `substitute`. The set of invalid characters is the union of the invalid
+    characters in Windows, macOS and Linux. Also removes leading and trailing
+    whitespace.
+
+    **WARNING:** This function also replaces path separators, so only pass
+    file names and not full paths to it.
+    """
+
+    if isinstance(name, bytes):
+        name = name.decode(constants_old.filesystem_encoding, 'replace')
+    if isinstance(substitute, bytes):
+        substitute = substitute.decode(constants_old.filesystem_encoding,
+                                       'replace')
+    chars = (substitute
+             if c in set(('\\', '|', '?', '*', '<', '"', ':', '>', '+', '/') +
+                         tuple(map(chr, range(32)))) else c for c in name)
+    one = ''.join(chars)
+    one = re.sub(r'\s', ' ', one).strip()
+    bname, ext = os.path.splitext(one)
+    one = re.sub(r'^\.+$', '_', bname)
+    one = one.replace('..', substitute)
+    one += ext
+    # Windows doesn't like path components that end with a period or space
+    if one and one[-1] in ('.', ' '):
+        one = one[:-1]+'_'
+    # Names starting with a period are hidden on Unix
+    if one.startswith('.'):
+        one = '_' + one[1:]
+    return one
+
+
 def ascii_text(orig):
    udc = get_udc()
    try:
--- a/ebook_converter/utils/zipfile.py
+++ b/ebook_converter/utils/zipfile.py
@@ -7,7 +7,7 @@ import binascii
 from contextlib import closing
 from tempfile import SpooledTemporaryFile

-from ebook_converter import sanitize_file_name
+from ebook_converter.utils import filenames as fms
 from ebook_converter.constants_old import filesystem_encoding
 from ebook_converter.ebooks.chardet import detect
 from ebook_converter.polyglot.builtins import as_bytes
@@ -1135,7 +1135,7 @@ class ZipFile:
                os.makedirs(upperdirs)
            except:  # Added by Kovid
                targetpath = os.path.join(base_target,
-                        sanitize_file_name(fname))
+                        fms.sanitize_file_name(fname))
                upperdirs = os.path.dirname(targetpath)
                if upperdirs and not os.path.exists(upperdirs):
                    os.makedirs(upperdirs)
@@ -1156,7 +1156,7 @@ class ZipFile:
                except:
                    # Try sanitizing the file name to remove invalid characters
                    components = list(os.path.split(targetpath))
-                    components[-1] = sanitize_file_name(components[-1])
+                    components[-1] = fms.sanitize_file_name(components[-1])
                    targetpath = os.sep.join(components)
                    with open(targetpath, 'wb') as target:
                        shutil.copyfileobj(source, target)