1
0
mirror of https://github.com/gryf/ebook-converter.git synced 2026-01-22 12:54:12 +01:00
Files
ebook-converter/ebook_converter/utils/filenames.py
2020-06-17 19:28:07 +02:00

352 lines
12 KiB
Python

"""
Make strings safe for use as ASCII filenames, while trying to preserve as much
meaning as possible.
"""
import errno
import os
import shutil
import time
from math import ceil
from ebook_converter import force_unicode, prints, sanitize_file_name
from ebook_converter.constants_old import (
filesystem_encoding, plugins, preferred_encoding, isosx
)
from ebook_converter.utils.localization import get_udc
def ascii_text(orig):
udc = get_udc()
try:
ascii = udc.decode(orig)
except Exception:
if isinstance(orig, str):
orig = orig.encode('ascii', 'replace')
ascii = orig.decode(preferred_encoding, 'replace')
if isinstance(ascii, bytes):
ascii = ascii.decode('ascii', 'replace')
return ascii
def ascii_filename(orig, substitute='_'):
if isinstance(substitute, bytes):
substitute = substitute.decode(filesystem_encoding)
orig = ascii_text(orig).replace('?', '_')
ans = ''.join(x if ord(x) >= 32 else substitute for x in orig)
return sanitize_file_name(ans, substitute=substitute)
def shorten_component(s, by_what):
l = len(s)
if l < by_what:
return s
l = (l - by_what)//2
if l <= 0:
return s
return s[:l] + s[-l:]
def limit_component(x, limit=254):
# windows and macs use utf-16 codepoints for length, linux uses arbitrary
# binary data, but we will assume utf-8
filename_encoding_for_length = 'utf-8'
def encoded_length():
q = x if isinstance(x, bytes) else x.encode(filename_encoding_for_length)
return len(q)
while encoded_length() > limit:
delta = encoded_length() - limit
x = shorten_component(x, max(2, delta // 2))
return x
def shorten_components_to(length, components, more_to_take=0, last_has_extension=True):
components = [limit_component(cx) for cx in components]
filepath = os.sep.join(components)
extra = len(filepath) - (length - more_to_take)
if extra < 1:
return components
deltas = []
for x in components:
pct = len(x)/float(len(filepath))
deltas.append(int(ceil(pct*extra)))
ans = []
for i, x in enumerate(components):
delta = deltas[i]
if delta > len(x):
r = x[0] if x is components[-1] else ''
else:
if last_has_extension and x is components[-1]:
b, e = os.path.splitext(x)
if e == '.':
e = ''
r = shorten_component(b, delta)+e
if r.startswith('.'):
r = x[0]+r
else:
r = shorten_component(x, delta)
r = r.strip()
if not r:
r = x.strip()[0] if x.strip() else 'x'
ans.append(r)
if len(os.sep.join(ans)) > length:
return shorten_components_to(length, components, more_to_take+2)
return ans
def find_executable_in_path(name, path=None):
if path is None:
path = os.environ.get('PATH', '')
exts = ('',)
path = path.split(os.pathsep)
for x in path:
for ext in exts:
q = os.path.abspath(os.path.join(x, name)) + ext
if os.access(q, os.X_OK):
return q
def is_case_sensitive(path):
'''
Return True if the filesystem is case sensitive.
path must be the path to an existing directory. You must have permission
to create and delete files in this directory. The results of this test
apply to the filesystem containing the directory in path.
'''
is_case_sensitive = False
name1, name2 = ('calibre_test_case_sensitivity.txt',
'calibre_TesT_CaSe_sensitiVitY.Txt')
f1, f2 = os.path.join(path, name1), os.path.join(path, name2)
if os.path.exists(f1):
os.remove(f1)
open(f1, 'w').close()
is_case_sensitive = not os.path.exists(f2)
os.remove(f1)
return is_case_sensitive
def case_preserving_open_file(path, mode='wb', mkdir_mode=0o777):
'''
Open the file pointed to by path with the specified mode. If any
directories in path do not exist, they are created. Returns the
opened file object and the path to the opened file object. This path is
guaranteed to have the same case as the on disk path. For case insensitive
filesystems, the returned path may be different from the passed in path.
The returned path is always unicode and always an absolute path.
If mode is None, then this function assumes that path points to a directory
and return the path to the directory as the file object.
mkdir_mode specifies the mode with which any missing directories in path
are created.
'''
if isinstance(path, bytes):
path = path.decode(filesystem_encoding)
path = os.path.abspath(path)
sep = force_unicode(os.sep, 'ascii')
if path.endswith(sep):
path = path[:-1]
if not path:
raise ValueError('Path must not point to root')
components = path.split(sep)
if not components:
raise ValueError('Invalid path: %r'%path)
cpath = sep
bdir = path if mode is None else os.path.dirname(path)
if not os.path.exists(bdir):
os.makedirs(bdir, mkdir_mode)
# Walk all the directories in path, putting the on disk case version of
# the directory into cpath
dirs = components[1:] if mode is None else components[1:-1]
for comp in dirs:
cdir = os.path.join(cpath, comp)
cl = comp.lower()
try:
candidates = [c for c in os.listdir(cpath) if c.lower() == cl]
except:
# Dont have permission to do the listdir, assume the case is
# correct as we have no way to check it.
pass
else:
if len(candidates) == 1:
cdir = os.path.join(cpath, candidates[0])
# else: We are on a case sensitive file system so cdir must already
# be correct
cpath = cdir
if mode is None:
ans = fpath = cpath
else:
fname = components[-1]
ans = open(os.path.join(cpath, fname), mode)
# Ensure file and all its metadata is written to disk so that subsequent
# listdir() has file name in it. I don't know if this is actually
# necessary, but given the diversity of platforms, best to be safe.
ans.flush()
os.fsync(ans.fileno())
cl = fname.lower()
try:
candidates = [c for c in os.listdir(cpath) if c.lower() == cl]
except EnvironmentError:
# The containing directory, somehow disappeared?
candidates = []
if len(candidates) == 1:
fpath = os.path.join(cpath, candidates[0])
else:
# We are on a case sensitive filesystem
fpath = os.path.join(cpath, fname)
return ans, fpath
def samefile(src, dst):
'''
Check if two paths point to the same actual file on the filesystem. Handles
symlinks, case insensitivity, mapped drives, etc.
Returns True iff both paths exist and point to the same file on disk.
Note: On windows will return True if the two string are identical (up to
case) even if the file does not exist. This is because I have no way of
knowing how reliable the GetFileInformationByHandle method is.
'''
if hasattr(os.path, 'samefile'):
# Unix
try:
return os.path.samefile(src, dst)
except EnvironmentError:
return False
# All other platforms: check for same pathname.
samestring = (os.path.normcase(os.path.abspath(src)) ==
os.path.normcase(os.path.abspath(dst)))
return samestring
def hardlink_file(src, dest):
os.link(src, dest)
def nlinks_file(path):
' Return number of hardlinks to the file '
return os.stat(path).st_nlink
def atomic_rename(oldpath, newpath):
'''Replace the file newpath with the file oldpath. Can fail if the files
are on different volumes. If succeeds, guaranteed to be atomic. newpath may
or may not exist. If it exists, it is replaced. '''
os.rename(oldpath, newpath)
def remove_dir_if_empty(path, ignore_metadata_caches=False):
''' Remove a directory if it is empty or contains only the folder metadata
caches from different OSes. To delete the folder if it contains only
metadata caches, set ignore_metadata_caches to True.'''
try:
os.rmdir(path)
except OSError as e:
if e.errno == errno.ENOTEMPTY or len(os.listdir(path)) > 0:
# Some linux systems appear to raise an EPERM instead of an
# ENOTEMPTY, see https://bugs.launchpad.net/bugs/1240797
if ignore_metadata_caches:
try:
found = False
for x in os.listdir(path):
if x.lower() in {'.ds_store', 'thumbs.db'}:
found = True
x = os.path.join(path, x)
if os.path.isdir(x):
import shutil
shutil.rmtree(x)
else:
os.remove(x)
except Exception: # We could get an error, if, for example, windows has locked Thumbs.db
found = False
if found:
remove_dir_if_empty(path)
return
raise
expanduser = os.path.expanduser
def format_permissions(st_mode):
import stat
for func, letter in (x.split(':') for x in 'REG:- DIR:d BLK:b CHR:c FIFO:p LNK:l SOCK:s'.split()):
if getattr(stat, 'S_IS' + func)(st_mode):
break
else:
letter = '?'
rwx = ('---', '--x', '-w-', '-wx', 'r--', 'r-x', 'rw-', 'rwx')
ans = [letter] + list(rwx[(st_mode >> 6) & 7]) + list(rwx[(st_mode >> 3) & 7]) + list(rwx[(st_mode & 7)])
if st_mode & stat.S_ISUID:
ans[3] = 's' if (st_mode & stat.S_IXUSR) else 'S'
if st_mode & stat.S_ISGID:
ans[6] = 's' if (st_mode & stat.S_IXGRP) else 'l'
if st_mode & stat.S_ISVTX:
ans[9] = 't' if (st_mode & stat.S_IXUSR) else 'T'
return ''.join(ans)
def copyfile(src, dest):
shutil.copyfile(src, dest)
try:
shutil.copystat(src, dest)
except Exception:
pass
def get_hardlink_function(src, dest):
return os.link
def copyfile_using_links(path, dest, dest_is_dir=True, filecopyfunc=copyfile):
path, dest = os.path.abspath(path), os.path.abspath(dest)
if dest_is_dir:
dest = os.path.join(dest, os.path.basename(path))
hardlink = get_hardlink_function(path, dest)
try:
hardlink(path, dest)
except Exception:
filecopyfunc(path, dest)
def copytree_using_links(path, dest, dest_is_parent=True, filecopyfunc=copyfile):
path, dest = os.path.abspath(path), os.path.abspath(dest)
if dest_is_parent:
dest = os.path.join(dest, os.path.basename(path))
hardlink = get_hardlink_function(path, dest)
try:
os.makedirs(dest)
except EnvironmentError as e:
if e.errno != errno.EEXIST:
raise
for dirpath, dirnames, filenames in os.walk(path):
base = os.path.relpath(dirpath, path)
dest_base = os.path.join(dest, base)
for dname in dirnames:
try:
os.mkdir(os.path.join(dest_base, dname))
except EnvironmentError as e:
if e.errno != errno.EEXIST:
raise
for fname in filenames:
src, df = os.path.join(dirpath, fname), os.path.join(dest_base, fname)
try:
hardlink(src, df)
except Exception:
filecopyfunc(src, df)