1
0
mirror of https://github.com/gryf/slack-backup.git synced 2025-12-17 11:30:25 +01:00

11 Commits
v0.4.3 ... v0.6

Author SHA1 Message Date
9a3c80333d Version bump 2017-11-01 18:46:33 +01:00
b2048b03e0 Changed behavoiur for duplicates
Till now, if we download certain files (like those attached to the
conversation) and we already have the file with the same name, number in
format '%03d' was added just before extension. That way there could be
possibility, that the very same file will be downloaded and stored
multiple times, like:

file.png
file.001.png
file.002.png
...

This commit prevents that by adding comparison between files we already
have and file which is downloaded from slack. Adding another file with
additional number will only have place when stored file and downloaded
have different content.
2017-11-01 18:40:52 +01:00
a077317cb4 Added retry mechanism for getting assets 2017-11-01 18:38:31 +01:00
ce2888d441 Added colors for loglevels 2017-11-01 12:45:35 +01:00
f2a78f4a52 Add message body to log 2017-11-01 11:27:01 +01:00
64d4b09468 Fix for handling messages of different types than 'message' 2017-08-06 09:22:38 +02:00
5f9f290ba4 Fix for message comment.
If comment is sent by the user, different structure of the data is sent.
First of all, the type of this message is "message", but it contain
dictionary under 'comment' key, which can be confusing, which contain
needed data (like user id). For this kind of messages, in case of lack
of 'user' in main dict, dict['comment']['user'] will be used for getting
user identifier, while dict['text'] remains as a message text.
2017-02-13 19:57:31 +01:00
Roman Dobosz
08a0a82435 Changed absolute to relative for filepaths stored in File objects 2016-12-03 18:43:49 +01:00
Roman Dobosz
a42506dff9 Fix for new fnames in case of already existing ones 2016-12-03 18:14:28 +01:00
Roman Dobosz
0d7607cf3c Added log for updating specific channel messages 2016-12-02 17:46:27 +01:00
9ddd470b54 Move commands functions to its own module 2016-11-28 19:05:26 +01:00
7 changed files with 277 additions and 157 deletions

View File

@@ -1,122 +1,10 @@
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Create backup for certain date for specified channel in slack
Execute commands for slack-backup
"""
import argparse
import logging
from slack_backup import client
from slack_backup import config
def setup_logger(args):
"""Setup logger format and level"""
level = logging.WARNING
if args.quiet:
level = logging.ERROR
if args.quiet > 1:
level = logging.CRITICAL
if args.verbose:
level = logging.INFO
if args.verbose > 1:
level = logging.DEBUG
logging.basicConfig(level=level,
format="%(asctime)s %(levelname)s: %(message)s")
def generate_raport(args):
"""Generate logs"""
slack = client.Client(args)
slack.generate_history()
def fetch_data(args):
"""Fetch and store data"""
slack = client.Client(args)
slack.update()
def main():
"""Main function"""
parser = argparse.ArgumentParser()
subparser = parser.add_subparsers(dest='parser')
subparser.required = True
fetch = subparser.add_parser('fetch', help='Update local db with Slack'
' data')
fetch.add_argument('-t', '--token', default=None, help='Slack token - '
'a string, which can be generated/obtained via '
'https://api.slack.com/docs/oauth-test-tokens page.')
fetch.add_argument('-u', '--user', default=None, help='Username for your '
'Slack account')
fetch.add_argument('-p', '--password', default=None, help='Password for '
'your Slack account.')
fetch.add_argument('-e', '--team', default=None, help='Team name, which '
'is part of slack url, for example: if url is '
'"https://team.slack.com" than "team" is a name of '
'the team.')
fetch.add_argument('-v', '--verbose', help='Be verbose. Adding more "v" '
'will increase verbosity', action="count",
default=None)
fetch.add_argument('-q', '--quiet', help='Be quiet. Adding more "q" will'
' decrease verbosity', action="count", default=None)
fetch.add_argument('-c', '--channels', default=None, nargs='+',
help='List of channels to perform actions on. '
'Default is all channels.')
fetch.add_argument('-d', '--database', default=None,
help='Path to the database file.')
fetch.add_argument('-i', '--config', default=None,
help='Use specific config file.')
fetch.set_defaults(func=fetch_data)
generate = subparser.add_parser('generate', help='Generate logs out of '
'data in provided database')
generate.add_argument('-o', '--output', default=None, help="Output "
"directory for store logs. All logs are organised "
"per channel. By default it's `logs' directory")
generate.add_argument('-f', '--format', default=None,
choices=('text', 'none'),
help='Output format. Default is none; only database '
'is updated by latest messages for all/selected '
'channels.')
generate.add_argument('-t', '--theme', default=None,
choices=('plain', 'unicode'),
help='Choose theme for text output. It doesn\'t '
'affect other output formats.')
generate.add_argument('-v', '--verbose', help='Be verbose. Adding more '
'"v" will increase verbosity', action="count",
default=None)
generate.add_argument('-q', '--quiet', help='Be quiet. Adding more "q" '
'will decrease verbosity', action="count",
default=None)
generate.add_argument('-c', '--channels', default=[], nargs='+',
help='List of channels to perform actions on. '
'Default is all channels.')
generate.add_argument('-d', '--database', default=None,
help='Path to the database file.')
generate.add_argument('-i', '--config', default=None,
help='Use specific config file.')
generate.set_defaults(func=generate_raport)
args = parser.parse_args()
cfg = config.Config()
msg = cfg.update(args)
setup_logger(args)
logging.info(msg)
args.func(args)
from slack_backup import command
if __name__ == "__main__":
main()
command.main()

View File

@@ -10,7 +10,7 @@ except ImportError:
setup(name="slack-backup",
packages=["slack_backup"],
version="0.4.3",
version="0.6",
description="Make copy of slack converstaions",
author="Roman Dobosz",
author_email="gryf73@gmail.com",

View File

@@ -3,6 +3,7 @@ Create backup for certain date for specified channel in slack
"""
from datetime import datetime
import getpass
import json
import logging
import os
@@ -112,6 +113,7 @@ class Client(object):
channels = all_channels
for channel in channels:
logging.info("Getting messages for channel `%s'", channel.name)
latest = self.q(o.Message).\
filter(o.Message.channel == channel).\
order_by(o.Message.ts.desc()).first()
@@ -146,10 +148,20 @@ class Client(object):
Create message with corresponding possible metadata, like reactions,
files etc.
"""
user = self.q(o.User).\
filter(o.User.slackid == data['user']).one()
if data['type'] != 'message':
logging.info("Skipping message of type `%s'.", data['type'])
return
if data['type'] == 'message' and not data['text'].strip():
logging.debug('Message data: %s', json.dumps(data))
try:
user = self.q(o.User).\
filter(o.User.slackid == data['user']).one()
except KeyError:
user = self.q(o.User).\
filter(o.User.slackid == data['comment']['user']).one()
if not data['text'].strip():
logging.info("Skipping message from `%s' since it's empty",
user.name)
return
@@ -255,7 +267,7 @@ class Client(object):
if not database:
return 'assets'
path = os.path.dirname(os.path.abspath(database))
path = os.path.dirname(database)
return os.path.join(path, 'assets')
def _channels_list(self):

141
slack_backup/command.py Normal file
View File

@@ -0,0 +1,141 @@
"""
Create backup for certain date for specified channel in slack
"""
import argparse
import logging
import platform
from slack_backup import client
from slack_backup import config
def setup_logger(args):
"""Setup logger format and level"""
if platform.system() != "Windows":
# hack to have colors in terminal
logging.addLevelName(logging.DEBUG,
"\033[1;30m%s\033[1;0m" %
logging.getLevelName(logging.DEBUG))
logging.addLevelName(logging.INFO,
"\033[1;32m%s\033[1;0m" %
logging.getLevelName(logging.INFO))
logging.addLevelName(logging.WARNING,
"\033[1;33m%s\033[1;0m" %
logging.getLevelName(logging.WARNING))
logging.addLevelName(logging.ERROR,
"\033[1;31m%s\033[1;0m" %
logging.getLevelName(logging.ERROR))
logging.addLevelName(logging.CRITICAL,
"\033[7;31m%s\033[1;0m" %
logging.getLevelName(logging.CRITICAL))
level = logging.WARNING
if args.quiet:
level = logging.ERROR
if args.quiet > 1:
level = logging.CRITICAL
if args.verbose:
level = logging.INFO
if args.verbose > 1:
level = logging.DEBUG
logging.basicConfig(level=level,
format="%(asctime)s %(levelname)s: %(message)s")
def generate_raport(args):
"""Generate logs"""
slack = client.Client(args)
slack.generate_history()
def fetch_data(args):
"""Fetch and store data"""
slack = client.Client(args)
slack.update()
def main():
"""Main function"""
parser = argparse.ArgumentParser()
subparser = parser.add_subparsers(dest='parser')
subparser.required = True
fetch = subparser.add_parser('fetch', help='Update local db with Slack'
' data')
fetch.add_argument('-t', '--token', default=None, help='Slack token - '
'a string, which can be generated/obtained via '
'https://api.slack.com/docs/oauth-test-tokens page.')
fetch.add_argument('-u', '--user', default=None, help='Username for your '
'Slack account')
fetch.add_argument('-p', '--password', default=None, help='Password for '
'your Slack account.')
fetch.add_argument('-e', '--team', default=None, help='Team name, which '
'is part of slack url, for example: if url is '
'"https://team.slack.com" than "team" is a name of '
'the team.')
fetch.add_argument('-v', '--verbose', help='Be verbose. Adding more "v" '
'will increase verbosity', action="count",
default=None)
fetch.add_argument('-q', '--quiet', help='Be quiet. Adding more "q" will'
' decrease verbosity', action="count", default=None)
fetch.add_argument('-c', '--channels', default=None, nargs='+',
help='List of channels to perform actions on. '
'Default is all channels.')
fetch.add_argument('-d', '--database', default=None,
help='Path to the database file.')
fetch.add_argument('-i', '--config', default=None,
help='Use specific config file.')
fetch.set_defaults(func=fetch_data)
generate = subparser.add_parser('generate', help='Generate logs out of '
'data in provided database')
generate.add_argument('-o', '--output', default=None, help="Output "
"directory for store logs. All logs are organised "
"per channel. By default it's `logs' directory")
generate.add_argument('-f', '--format', default=None,
choices=('text', 'none'),
help='Output format. Default is none; only database '
'is updated by latest messages for all/selected '
'channels.')
generate.add_argument('-t', '--theme', default=None,
choices=('plain', 'unicode'),
help='Choose theme for text output. It doesn\'t '
'affect other output formats.')
generate.add_argument('-v', '--verbose', help='Be verbose. Adding more '
'"v" will increase verbosity', action="count",
default=None)
generate.add_argument('-q', '--quiet', help='Be quiet. Adding more "q" '
'will decrease verbosity', action="count",
default=None)
generate.add_argument('-c', '--channels', default=[], nargs='+',
help='List of channels to perform actions on. '
'Default is all channels.')
generate.add_argument('-d', '--database', default=None,
help='Path to the database file.')
generate.add_argument('-i', '--config', default=None,
help='Use specific config file.')
generate.set_defaults(func=generate_raport)
args = parser.parse_args()
cfg = config.Config()
msg = cfg.update(args)
setup_logger(args)
logging.info(msg)
args.func(args)

View File

@@ -2,14 +2,45 @@
Module for download files, store them in local filesystem and convert the URLs
to local ones, so that sophisticated writers can make a use of it
"""
import functools
import logging
import os
import shutil
import requests
from slack_backup import utils
def retry(count):
"""
Decorator for a case, when there is some network hiccup, or slack servers
are too busy to respond or on connection timeout. Parameter count says how
many times it should try to perform request.
"""
def wrapper(func):
@functools.wraps(func)
def inner(obj, *args, **kwargs):
counter = count
while counter:
counter -= 1
try:
return func(obj, *args, **kwargs)
except requests.exceptions.RequestException as exc:
if not counter:
logging.error('Request for %s failed. Reported '
'reason: %s', args[0], exc.__doc__)
raise
logging.warning('Request for %s failed. Reported '
'reason: %s. Retrying.', args[0],
exc.__doc__)
# Renew the session before retry
obj.authorize()
return inner
return wrapper
class NotAuthorizedError(requests.HTTPError):
pass
@@ -18,7 +49,7 @@ class Download(object):
"""Download class for taking care of Slack internally uploaded files"""
def __init__(self, args, assets_dir):
self.session = requests.session()
self.session = None
self.team = args.team
self.user = args.user
self.password = args.password
@@ -37,10 +68,26 @@ class Download(object):
if not self._hier_created:
self._create_assets_dir()
fname = self.prepare_filepath(url, filetype)
filepath = self.get_filepath(url, filetype)
temp_file = utils.get_temp_name()
self._download(url, fname)
return fname
self._download(url, temp_file)
if filepath and os.path.exists(filepath) and filetype != 'avatar':
if not utils.same_files(filepath, temp_file):
logging.warning("File `%s' already exist, renamed to `%s'",
filepath,
self.calculate_new_filename(filepath,
filetype))
filepath = self.calculate_new_filename(filepath, filetype)
shutil.move(temp_file, filepath)
else:
logging.debug("File `%s' already exist, skipping", filepath)
os.unlink(filepath)
else:
shutil.move(temp_file, filepath)
return filepath
def _create_assets_dir(self):
for path in (self._files, self._images):
@@ -48,20 +95,21 @@ class Download(object):
self._hier_created = True
def prepare_filepath(self, url, filetype):
"""Prepare directory where to download file into"""
def get_filepath(self, url, filetype):
"""Get full path and filename for the file"""
typemap = {'avatar': self._images,
'file': self._files}
if filetype == 'file' and not self._authorized:
logging.info("There was no (valid) credentials passed, therefore "
"file `%s' cannot be downloaded", url)
logging.warning("There was no (valid) credentials passed, "
"therefore file `%s' cannot be downloaded", url)
return
splitted = url.split('/')
if len(splitted) == 7 and 'slack.com' in splitted[2]:
if len(splitted) == 7 and ('slack.com' in splitted[2] or
'slack-edge.com' in splitted[2]):
part = url.split('/')[-3]
fname = url.split('/')[-1]
else:
@@ -75,24 +123,24 @@ class Download(object):
utils.makedirs(os.path.join(path, part))
path = os.path.join(path, part)
path = os.path.join(path, fname)
return os.path.join(path, fname)
def calculate_new_filename(self, path, filetype):
count = 1
while filetype != 'avatar' and os.path.exists(path):
base, ext = os.path.splitext(path)
path = base + "%0.3d" % count + ext
if count == 1:
base, ext = os.path.splitext(path)
path = base + ".%0.3d" % count + ext
count += 1
return path
@retry(3)
def _download(self, url, local):
"""Download file"""
try:
res = self.session.get(url, stream=True)
except requests.exceptions.RequestException as exc:
logging.error('Request for %s failed. Reported reason: %s',
url, exc.__doc__)
raise
res = self.session.get(url, stream=True)
with open(local, 'wb') as fobj:
for chunk in res.iter_content(chunk_size=5120):
@@ -104,7 +152,9 @@ class Download(object):
"""
Authenticate and gather session for Slack
"""
self.session = requests.session() # new session
res = self.session.get('https://%s.slack.com/' % self.team)
if not all((self.team, self.password, self.user)):
logging.warning('There is neither username, password or team name'
' provided. Downloading will not be performed.')
@@ -112,7 +162,7 @@ class Download(object):
crumb = ''
for line in res.text.split('\n'):
if 'crumb' in line:
if 'crumb' in line and 'value' in line:
crumb = line.split('value=')[1].split('"')[1]
break
else:

View File

@@ -4,6 +4,8 @@ Some utils functions. Jsut to not copypaste the code around
import errno
import os
import logging
import tempfile
import hashlib
def makedirs(path):
@@ -19,3 +21,24 @@ def makedirs(path):
logging.error("Cannot create `%s'. There is some file on the "
"way; cannot proceed.", path)
raise
def get_temp_name():
"""Return temporary file name"""
fdesc, fname = tempfile.mkstemp()
os.close(fdesc)
return fname
def same_files(file1, file2):
"""
Compare files by calculating hash for each of them. Return True if hash is
identical, False otherwise
"""
with open(file1, 'rb') as fobj:
hash1 = hashlib.sha256(fobj.read())
with open(file2, 'rb') as fobj:
hash2 = hashlib.sha256(fobj.read())
return hash1.hexdigest() == hash2.hexdigest()

View File

@@ -1,5 +1,5 @@
from unittest import TestCase
from unittest.mock import MagicMock
from unittest import mock
from slack_backup import client
from slack_backup import objects as o
@@ -324,7 +324,10 @@ MSGS = {'messages': [{"type": "message",
"<https://esm64.slack.com/files/name2/F3405RRB5/"
"screenshot.png|Screenshot.png>",
"ts": "1478107371.000052",
"upload": True}],
"upload": True},
{'type': 'something else',
'ts': '1502003415232.000001',
"wibblr": True}],
"ok": True,
"latest": "1479501075.000020",
"has_more": True}
@@ -362,34 +365,34 @@ class TestApiCalls(TestCase):
def test_channels_list(self):
cl = client.Client(FakeArgs())
cl.slack.api_call = MagicMock(return_value=CHANNELS)
cl.slack.api_call = mock.MagicMock(return_value=CHANNELS)
channels = cl._channels_list()
self.assertListEqual(CHANNELS['channels'], channels)
def test_users_list(self):
cl = client.Client(FakeArgs())
cl.slack.api_call = MagicMock(return_value=USERS)
cl.slack.api_call = mock.MagicMock(return_value=USERS)
users = cl._users_list()
self.assertListEqual(USERS['members'], users)
def test_channels_history(self):
cl = client.Client(FakeArgs())
cl.slack.api_call = MagicMock(return_value=USERS)
cl.downloader._download = MagicMock(return_value=None)
cl.slack.api_call = mock.MagicMock(return_value=USERS)
cl.downloader._download = mock.MagicMock(return_value=None)
cl.update_users()
cl.slack.api_call = MagicMock(return_value=CHANNELS)
cl.slack.api_call = mock.MagicMock(return_value=CHANNELS)
cl.update_channels()
cl.slack.api_call = MagicMock()
cl.slack.api_call = mock.MagicMock()
cl.slack.api_call.side_effect = [MSGS, MSG2, MSG3]
channel = cl.q(o.Channel).filter(o.Channel.slackid ==
"C00000001").one()
msg, ts = cl._channels_history(channel, 0)
self.assertEqual(len(msg), 5)
self.assertEqual(len(msg), 6)
self.assertEqual(ts, '1479501074.000032')
msg, ts = cl._channels_history(channel, ts)
@@ -405,8 +408,8 @@ class TestClient(TestCase):
def test_update_users(self):
cl = client.Client(FakeArgs())
cl.slack.api_call = MagicMock(return_value=USERS)
cl.downloader._download = MagicMock(return_value=None)
cl.slack.api_call = mock.MagicMock(return_value=USERS)
cl.downloader._download = mock.MagicMock(return_value=None)
cl.update_users()
users = cl.session.query(o.User).all()
self.assertEqual(len(users), 4)
@@ -426,19 +429,22 @@ class TestMessage(TestCase):
args.channels = ['general']
self.cl = client.Client(args)
self.cl.downloader.authorize = MagicMock()
self.cl.slack.api_call = MagicMock(return_value=USERS)
self.cl.downloader._download = MagicMock(return_value=None)
self.cl.downloader.authorize = mock.MagicMock()
self.cl.slack.api_call = mock.MagicMock(return_value=USERS)
self.cl.downloader._download = mock.MagicMock(return_value=None)
self.cl.update_users()
self.cl.slack.api_call = MagicMock(return_value=CHANNELS)
self.cl.slack.api_call = mock.MagicMock(return_value=CHANNELS)
self.cl.update_channels()
self.cl.slack.api_call = MagicMock()
self.cl.slack.api_call = mock.MagicMock()
def test_update_history(self):
@mock.patch('slack_backup.download.Download.download')
def test_update_history(self, download):
self.cl.downloader._download = MagicMock(return_value=None)
download.return_value = 'foo'
self.cl.downloader._download = mock.MagicMock(return_value=None)
self.cl.slack.api_call.side_effect = [MSGS, MSG3]
self.cl.update_history()
self.assertEqual(len(self.cl.q(o.Message).all()), 5)