mirror of
https://github.com/gryf/slack-backup.git
synced 2025-12-17 11:30:25 +01:00
Changed behavoiur for duplicates
Till now, if we download certain files (like those attached to the conversation) and we already have the file with the same name, number in format '%03d' was added just before extension. That way there could be possibility, that the very same file will be downloaded and stored multiple times, like: file.png file.001.png file.002.png ... This commit prevents that by adding comparison between files we already have and file which is downloaded from slack. Adding another file with additional number will only have place when stored file and downloaded have different content.
This commit is contained in:
@@ -5,6 +5,7 @@ to local ones, so that sophisticated writers can make a use of it
|
||||
import functools
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
|
||||
import requests
|
||||
|
||||
@@ -67,10 +68,26 @@ class Download(object):
|
||||
if not self._hier_created:
|
||||
self._create_assets_dir()
|
||||
|
||||
fname = self.prepare_filepath(url, filetype)
|
||||
filepath = self.get_filepath(url, filetype)
|
||||
temp_file = utils.get_temp_name()
|
||||
|
||||
self._download(url, fname)
|
||||
return fname
|
||||
self._download(url, temp_file)
|
||||
|
||||
if filepath and os.path.exists(filepath) and filetype != 'avatar':
|
||||
if not utils.same_files(filepath, temp_file):
|
||||
logging.warning("File `%s' already exist, renamed to `%s'",
|
||||
filepath,
|
||||
self.calculate_new_filename(filepath,
|
||||
filetype))
|
||||
filepath = self.calculate_new_filename(filepath, filetype)
|
||||
shutil.move(temp_file, filepath)
|
||||
else:
|
||||
logging.debug("File `%s' already exist, skipping", filepath)
|
||||
os.unlink(filepath)
|
||||
else:
|
||||
shutil.move(temp_file, filepath)
|
||||
|
||||
return filepath
|
||||
|
||||
def _create_assets_dir(self):
|
||||
for path in (self._files, self._images):
|
||||
@@ -78,8 +95,8 @@ class Download(object):
|
||||
|
||||
self._hier_created = True
|
||||
|
||||
def prepare_filepath(self, url, filetype):
|
||||
"""Prepare directory where to download file into"""
|
||||
def get_filepath(self, url, filetype):
|
||||
"""Get full path and filename for the file"""
|
||||
|
||||
typemap = {'avatar': self._images,
|
||||
'file': self._files}
|
||||
@@ -91,7 +108,8 @@ class Download(object):
|
||||
|
||||
splitted = url.split('/')
|
||||
|
||||
if len(splitted) == 7 and 'slack.com' in splitted[2]:
|
||||
if len(splitted) == 7 and ('slack.com' in splitted[2] or
|
||||
'slack-edge.com' in splitted[2]):
|
||||
part = url.split('/')[-3]
|
||||
fname = url.split('/')[-1]
|
||||
else:
|
||||
@@ -105,7 +123,9 @@ class Download(object):
|
||||
utils.makedirs(os.path.join(path, part))
|
||||
path = os.path.join(path, part)
|
||||
|
||||
path = os.path.join(path, fname)
|
||||
return os.path.join(path, fname)
|
||||
|
||||
def calculate_new_filename(self, path, filetype):
|
||||
count = 1
|
||||
|
||||
while filetype != 'avatar' and os.path.exists(path):
|
||||
|
||||
@@ -4,6 +4,8 @@ Some utils functions. Jsut to not copypaste the code around
|
||||
import errno
|
||||
import os
|
||||
import logging
|
||||
import tempfile
|
||||
import hashlib
|
||||
|
||||
|
||||
def makedirs(path):
|
||||
@@ -19,3 +21,24 @@ def makedirs(path):
|
||||
logging.error("Cannot create `%s'. There is some file on the "
|
||||
"way; cannot proceed.", path)
|
||||
raise
|
||||
|
||||
|
||||
def get_temp_name():
|
||||
"""Return temporary file name"""
|
||||
fdesc, fname = tempfile.mkstemp()
|
||||
os.close(fdesc)
|
||||
return fname
|
||||
|
||||
|
||||
def same_files(file1, file2):
|
||||
"""
|
||||
Compare files by calculating hash for each of them. Return True if hash is
|
||||
identical, False otherwise
|
||||
"""
|
||||
with open(file1, 'rb') as fobj:
|
||||
hash1 = hashlib.sha256(fobj.read())
|
||||
|
||||
with open(file2, 'rb') as fobj:
|
||||
hash2 = hashlib.sha256(fobj.read())
|
||||
|
||||
return hash1.hexdigest() == hash2.hexdigest()
|
||||
|
||||
Reference in New Issue
Block a user