1
0
mirror of https://github.com/gryf/slack-backup.git synced 2025-12-17 11:30:25 +01:00

Changed behavoiur for duplicates

Till now, if we download certain files (like those attached to the
conversation) and we already have the file with the same name, number in
format '%03d' was added just before extension. That way there could be
possibility, that the very same file will be downloaded and stored
multiple times, like:

file.png
file.001.png
file.002.png
...

This commit prevents that by adding comparison between files we already
have and file which is downloaded from slack. Adding another file with
additional number will only have place when stored file and downloaded
have different content.
This commit is contained in:
2017-11-01 18:40:47 +01:00
parent a077317cb4
commit b2048b03e0
3 changed files with 69 additions and 23 deletions

View File

@@ -5,6 +5,7 @@ to local ones, so that sophisticated writers can make a use of it
import functools import functools
import logging import logging
import os import os
import shutil
import requests import requests
@@ -67,10 +68,26 @@ class Download(object):
if not self._hier_created: if not self._hier_created:
self._create_assets_dir() self._create_assets_dir()
fname = self.prepare_filepath(url, filetype) filepath = self.get_filepath(url, filetype)
temp_file = utils.get_temp_name()
self._download(url, fname) self._download(url, temp_file)
return fname
if filepath and os.path.exists(filepath) and filetype != 'avatar':
if not utils.same_files(filepath, temp_file):
logging.warning("File `%s' already exist, renamed to `%s'",
filepath,
self.calculate_new_filename(filepath,
filetype))
filepath = self.calculate_new_filename(filepath, filetype)
shutil.move(temp_file, filepath)
else:
logging.debug("File `%s' already exist, skipping", filepath)
os.unlink(filepath)
else:
shutil.move(temp_file, filepath)
return filepath
def _create_assets_dir(self): def _create_assets_dir(self):
for path in (self._files, self._images): for path in (self._files, self._images):
@@ -78,8 +95,8 @@ class Download(object):
self._hier_created = True self._hier_created = True
def prepare_filepath(self, url, filetype): def get_filepath(self, url, filetype):
"""Prepare directory where to download file into""" """Get full path and filename for the file"""
typemap = {'avatar': self._images, typemap = {'avatar': self._images,
'file': self._files} 'file': self._files}
@@ -91,7 +108,8 @@ class Download(object):
splitted = url.split('/') splitted = url.split('/')
if len(splitted) == 7 and 'slack.com' in splitted[2]: if len(splitted) == 7 and ('slack.com' in splitted[2] or
'slack-edge.com' in splitted[2]):
part = url.split('/')[-3] part = url.split('/')[-3]
fname = url.split('/')[-1] fname = url.split('/')[-1]
else: else:
@@ -105,7 +123,9 @@ class Download(object):
utils.makedirs(os.path.join(path, part)) utils.makedirs(os.path.join(path, part))
path = os.path.join(path, part) path = os.path.join(path, part)
path = os.path.join(path, fname) return os.path.join(path, fname)
def calculate_new_filename(self, path, filetype):
count = 1 count = 1
while filetype != 'avatar' and os.path.exists(path): while filetype != 'avatar' and os.path.exists(path):

View File

@@ -4,6 +4,8 @@ Some utils functions. Jsut to not copypaste the code around
import errno import errno
import os import os
import logging import logging
import tempfile
import hashlib
def makedirs(path): def makedirs(path):
@@ -19,3 +21,24 @@ def makedirs(path):
logging.error("Cannot create `%s'. There is some file on the " logging.error("Cannot create `%s'. There is some file on the "
"way; cannot proceed.", path) "way; cannot proceed.", path)
raise raise
def get_temp_name():
"""Return temporary file name"""
fdesc, fname = tempfile.mkstemp()
os.close(fdesc)
return fname
def same_files(file1, file2):
"""
Compare files by calculating hash for each of them. Return True if hash is
identical, False otherwise
"""
with open(file1, 'rb') as fobj:
hash1 = hashlib.sha256(fobj.read())
with open(file2, 'rb') as fobj:
hash2 = hashlib.sha256(fobj.read())
return hash1.hexdigest() == hash2.hexdigest()

View File

@@ -1,5 +1,5 @@
from unittest import TestCase from unittest import TestCase
from unittest.mock import MagicMock from unittest import mock
from slack_backup import client from slack_backup import client
from slack_backup import objects as o from slack_backup import objects as o
@@ -365,27 +365,27 @@ class TestApiCalls(TestCase):
def test_channels_list(self): def test_channels_list(self):
cl = client.Client(FakeArgs()) cl = client.Client(FakeArgs())
cl.slack.api_call = MagicMock(return_value=CHANNELS) cl.slack.api_call = mock.MagicMock(return_value=CHANNELS)
channels = cl._channels_list() channels = cl._channels_list()
self.assertListEqual(CHANNELS['channels'], channels) self.assertListEqual(CHANNELS['channels'], channels)
def test_users_list(self): def test_users_list(self):
cl = client.Client(FakeArgs()) cl = client.Client(FakeArgs())
cl.slack.api_call = MagicMock(return_value=USERS) cl.slack.api_call = mock.MagicMock(return_value=USERS)
users = cl._users_list() users = cl._users_list()
self.assertListEqual(USERS['members'], users) self.assertListEqual(USERS['members'], users)
def test_channels_history(self): def test_channels_history(self):
cl = client.Client(FakeArgs()) cl = client.Client(FakeArgs())
cl.slack.api_call = MagicMock(return_value=USERS) cl.slack.api_call = mock.MagicMock(return_value=USERS)
cl.downloader._download = MagicMock(return_value=None) cl.downloader._download = mock.MagicMock(return_value=None)
cl.update_users() cl.update_users()
cl.slack.api_call = MagicMock(return_value=CHANNELS) cl.slack.api_call = mock.MagicMock(return_value=CHANNELS)
cl.update_channels() cl.update_channels()
cl.slack.api_call = MagicMock() cl.slack.api_call = mock.MagicMock()
cl.slack.api_call.side_effect = [MSGS, MSG2, MSG3] cl.slack.api_call.side_effect = [MSGS, MSG2, MSG3]
channel = cl.q(o.Channel).filter(o.Channel.slackid == channel = cl.q(o.Channel).filter(o.Channel.slackid ==
@@ -408,8 +408,8 @@ class TestClient(TestCase):
def test_update_users(self): def test_update_users(self):
cl = client.Client(FakeArgs()) cl = client.Client(FakeArgs())
cl.slack.api_call = MagicMock(return_value=USERS) cl.slack.api_call = mock.MagicMock(return_value=USERS)
cl.downloader._download = MagicMock(return_value=None) cl.downloader._download = mock.MagicMock(return_value=None)
cl.update_users() cl.update_users()
users = cl.session.query(o.User).all() users = cl.session.query(o.User).all()
self.assertEqual(len(users), 4) self.assertEqual(len(users), 4)
@@ -429,19 +429,22 @@ class TestMessage(TestCase):
args.channels = ['general'] args.channels = ['general']
self.cl = client.Client(args) self.cl = client.Client(args)
self.cl.downloader.authorize = MagicMock() self.cl.downloader.authorize = mock.MagicMock()
self.cl.slack.api_call = MagicMock(return_value=USERS) self.cl.slack.api_call = mock.MagicMock(return_value=USERS)
self.cl.downloader._download = MagicMock(return_value=None) self.cl.downloader._download = mock.MagicMock(return_value=None)
self.cl.update_users() self.cl.update_users()
self.cl.slack.api_call = MagicMock(return_value=CHANNELS) self.cl.slack.api_call = mock.MagicMock(return_value=CHANNELS)
self.cl.update_channels() self.cl.update_channels()
self.cl.slack.api_call = MagicMock() self.cl.slack.api_call = mock.MagicMock()
def test_update_history(self): @mock.patch('slack_backup.download.Download.download')
def test_update_history(self, download):
self.cl.downloader._download = MagicMock(return_value=None) download.return_value = 'foo'
self.cl.downloader._download = mock.MagicMock(return_value=None)
self.cl.slack.api_call.side_effect = [MSGS, MSG3] self.cl.slack.api_call.side_effect = [MSGS, MSG3]
self.cl.update_history() self.cl.update_history()
self.assertEqual(len(self.cl.q(o.Message).all()), 5) self.assertEqual(len(self.cl.q(o.Message).all()), 5)