mirror of
https://github.com/gryf/slack-backup.git
synced 2025-12-17 19:40:21 +01:00
Using url_file_attachment user can specify if he like to change objects like 'file_share' marked as external (in Slack servers point of view) to attachments, so the only value would be remembered URL for the "uploaded" document as an attachment. Or, treat it as is, and let the slack-backup to produce file which contain the URLs and corresponding local file names for such files.
448 lines
14 KiB
Python
448 lines
14 KiB
Python
"""
|
|
Reporters module.
|
|
|
|
There are several classes for specific format reporting, and also some of the
|
|
slack conversation/convention parsers.
|
|
"""
|
|
import os
|
|
import errno
|
|
import html.parser
|
|
import logging
|
|
import pathlib
|
|
import re
|
|
|
|
from slack_backup import objects as o
|
|
from slack_backup import utils
|
|
from slack_backup import emoji
|
|
|
|
|
|
class Reporter(object):
|
|
"""Base reporter class"""
|
|
ext = ''
|
|
symbols = {'plain': {'join': '->',
|
|
'leave': '<-',
|
|
'me': '*',
|
|
'file': '-',
|
|
'topic': '+',
|
|
'separator': '|'},
|
|
'unicode': {'join': '⮊',
|
|
'leave': '⮈',
|
|
'me': '🟊',
|
|
'file': '📂',
|
|
'topic': '🟅',
|
|
'separator': '│'}}
|
|
literal_url_pat = re.compile(r'(?P<replace>(?P<url>https?[^\s\|]+))')
|
|
url_pat = re.compile(r'(?P<replace><(?P<url>http[^\|>]+)'
|
|
r'(\|(?P<title>[^>]+))?>)')
|
|
slackid_pat = re.compile(r'(?P<replace><@'
|
|
r'(?P<slackid>U[A-Z,0-9]+)(\|[^>]+)?[^>]*>)')
|
|
|
|
def __init__(self, args, query):
|
|
self.out = args.output
|
|
self.theme = args.theme
|
|
self.q = query
|
|
self.types = {"channel_join": self._msg_join,
|
|
"channel_leave": self._msg_leave,
|
|
"channel_topic": self._msg_topic,
|
|
"file_share": self._msg_file,
|
|
"me_message": self._msg_me}
|
|
|
|
self.emoji = emoji.EMOJI.get(args.theme, {})
|
|
|
|
self.channels = self._get_channels(args.channels)
|
|
self.users = self.q(o.User).all()
|
|
|
|
def generate(self):
|
|
"""Generate raport for each channel"""
|
|
for channel in self.channels:
|
|
messages = []
|
|
log_path = self.get_log_path(channel.name)
|
|
try:
|
|
os.unlink(log_path)
|
|
except IOError as err:
|
|
if err.errno != errno.ENOENT:
|
|
raise
|
|
for message in self.q(o.Message).\
|
|
filter(o.Message.channel == channel).\
|
|
order_by(o.Message.ts).all():
|
|
messages.append(message)
|
|
self.write_msg(messages, log_path)
|
|
|
|
def get_log_path(self, name):
|
|
"""Return relative log file name """
|
|
return os.path.join(self.out, name + self.ext)
|
|
|
|
def write_msg(self, messages, log):
|
|
"""Write message to file"""
|
|
with open(log, "a", encoding='utf8') as fobj:
|
|
for message in messages:
|
|
data = self._process_message(message)
|
|
fobj.write(data['tpl'].format(**data))
|
|
|
|
def _get_symbol(self, item):
|
|
"""Return appropriate item depending on the selected theme"""
|
|
return self.symbols[self.theme][item]
|
|
|
|
def _get_channels(self, selected_channels):
|
|
"""
|
|
Retrieve channels from db and return those which names matched from
|
|
selected_channels list
|
|
"""
|
|
all_channels = self.q(o.Channel).all()
|
|
if not selected_channels:
|
|
return all_channels
|
|
|
|
result = []
|
|
for channel in all_channels:
|
|
if channel.name in selected_channels:
|
|
result.append(channel)
|
|
return result
|
|
|
|
def _process_message(self, msg):
|
|
"""
|
|
Make changes to the text (replace slack ids, replace representation of
|
|
urls, substitute images etc) and return dict with data suitable to
|
|
display.
|
|
"""
|
|
processor = self.types.get(msg.type, self._msg)
|
|
data = processor(msg)
|
|
data.update({'date': msg.datetime().strftime("%Y-%m-%d %H:%M:%S"),
|
|
'tpl': "{date} {nick} {msg}"})
|
|
|
|
for emoticon in self.emoji:
|
|
data['msg'] = data['msg'].replace(emoticon, self.emoji[emoticon])
|
|
|
|
return data
|
|
|
|
def _msg_join(self, msg):
|
|
"""return data for join"""
|
|
return {'msg': msg.text,
|
|
'nick': self._get_symbol('join')}
|
|
|
|
def _msg_leave(self, msg):
|
|
"""return data for leave"""
|
|
return {'msg': msg.text,
|
|
'nick': self._get_symbol('leave')}
|
|
|
|
def _msg_topic(self, msg):
|
|
"""return data for set topic"""
|
|
return {'msg': msg.text,
|
|
'nick': self._get_symbol('topic')}
|
|
|
|
def _msg_me(self, msg):
|
|
"""return data for /me"""
|
|
return {'msg': msg.user.name + ' ' + msg.text,
|
|
'nick': self._get_symbol('me')}
|
|
|
|
def _msg_file(self, msg):
|
|
"""return data for file"""
|
|
return {'msg': msg.text,
|
|
'nick': self._get_symbol('file')}
|
|
|
|
def _msg(self, msg):
|
|
"""return data for all other message types"""
|
|
return {'msg': msg.text,
|
|
'nick': msg.user.name}
|
|
|
|
def _filter_slackid(self, text):
|
|
"""filter out all of the id from slack"""
|
|
match = True
|
|
while match:
|
|
match = self.slackid_pat.search(text)
|
|
if not match:
|
|
return text
|
|
|
|
match = match.groupdict()
|
|
user = self.q(o.User).filter(o.User.slackid ==
|
|
match['slackid']).one()
|
|
text = text.replace(match['replace'], user.name)
|
|
|
|
return text
|
|
|
|
|
|
class NoneReporter(Reporter):
|
|
"""Dummy reporter used for fallback"""
|
|
|
|
def generate(self):
|
|
"""Generate raport it's a dummmy one - for use with none reporter"""
|
|
return
|
|
|
|
|
|
class TextReporter(Reporter):
|
|
"""Text aka IRC reporter"""
|
|
ext = '.log'
|
|
tpl = '{date} {nick:>{max_len}} {separator} {msg}\n'
|
|
|
|
def __init__(self, args, query):
|
|
super(TextReporter, self).__init__(args, query)
|
|
utils.makedirs(self.out)
|
|
self._max_len = 0
|
|
|
|
def generate(self):
|
|
"""Generate raport"""
|
|
for channel in self.channels:
|
|
messages = []
|
|
log_path = self.get_log_path(channel.name)
|
|
self._set_max_len(channel)
|
|
try:
|
|
os.unlink(log_path)
|
|
except IOError as err:
|
|
if err.errno != errno.ENOENT:
|
|
raise
|
|
for message in self.q(o.Message).\
|
|
filter(o.Message.channel == channel).\
|
|
order_by(o.Message.ts).all():
|
|
messages.append(message)
|
|
|
|
self.write_msg(messages, log_path)
|
|
|
|
def _set_max_len(self, channel):
|
|
"""calculate max_len for sepcified channel"""
|
|
users = [m.user for m in channel.messages]
|
|
users = set([u.name for u in users])
|
|
|
|
self._max_len = 0
|
|
for user_name in users:
|
|
if len(user_name) > self._max_len:
|
|
self._max_len = len(user_name)
|
|
|
|
def _process_message(self, msg):
|
|
"""
|
|
Check what kind of message we are dealing with and do appropriate
|
|
formatting
|
|
"""
|
|
data = super(TextReporter, self)._process_message(msg)
|
|
data['msg'] = self._filter_slackid(data['msg'])
|
|
data['msg'] = self._fix_newlines(data['msg'])
|
|
data['msg'] = self._remove_entities(data['msg'])
|
|
data.update({'date': msg.datetime().strftime("%Y-%m-%d %H:%M:%S"),
|
|
'max_len': self._max_len,
|
|
'separator': self._get_symbol('separator'),
|
|
'tpl': self.tpl})
|
|
return data
|
|
|
|
def _msg_file(self, msg):
|
|
"""return data for file"""
|
|
if msg.file.filepath:
|
|
fpath = os.path.abspath(msg.file.filepath)
|
|
fpath = pathlib.PurePath(fpath).as_uri()
|
|
else:
|
|
fpath = 'does_not_exists'
|
|
|
|
return {'msg': self.url_pat.sub('(' + fpath + ') ' + msg.file.title,
|
|
msg.text),
|
|
'nick': self._get_symbol('file')}
|
|
|
|
def _msg(self, msg):
|
|
"""return data for all other message types"""
|
|
|
|
data = super(TextReporter, self)._msg(msg)
|
|
result = ''
|
|
|
|
if msg.attachments:
|
|
for att in msg.attachments:
|
|
if att.title:
|
|
att_text = att.title + '\n'
|
|
else:
|
|
att_text = self._fix_newlines(att.fallback) + '\n'
|
|
|
|
if att.text:
|
|
att_text += att.text
|
|
|
|
result += att_text + '\n'
|
|
|
|
data['msg'] += result.strip()
|
|
return data
|
|
|
|
def _remove_entities(self, text):
|
|
"""replace html entites into appropriate chars"""
|
|
return html.parser.HTMLParser().unescape(text)
|
|
|
|
def _fix_newlines(self, text):
|
|
"""Shift text with new lines to the right with separator"""
|
|
shift = 19 # length of the date
|
|
shift += 1 # separator space
|
|
shift += self._max_len # length reserved for the nicks
|
|
shift += 1 # separator space
|
|
return text.replace('\n', '\n' + shift * ' ' +
|
|
self._get_symbol('separator') + ' ')
|
|
|
|
|
|
class StaticHtmlReporter(Reporter):
|
|
"""Text-like, but with browsable, clickable links"""
|
|
ext = '.html'
|
|
index_templ = """<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
|
|
<title>%(title)s</title>
|
|
</head>
|
|
<body>
|
|
<div id="container">
|
|
%(msgs)s
|
|
</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
index_list = """
|
|
<ul>
|
|
%s
|
|
</ul>
|
|
"""
|
|
msg_head = """<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
|
|
<title>Bla</title>
|
|
<style>
|
|
* {
|
|
font-family: sans-serif;
|
|
}
|
|
.log {
|
|
width: 100%;
|
|
}
|
|
.log tr:nth-child(even) {
|
|
background-color: #efefef;
|
|
}
|
|
.nick {
|
|
text-align: right;
|
|
white-space: nowrap;
|
|
}
|
|
.date {
|
|
white-space: nowrap;
|
|
}
|
|
td {
|
|
padding: 2px;
|
|
}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div id="container">
|
|
<table class="log">
|
|
"""
|
|
msg_foot = """
|
|
</table>
|
|
</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
msg_line = """
|
|
<tr>
|
|
<td class="date">{date}</td>
|
|
<td class="nick">{nick}</td>
|
|
<td>{msg}</td>
|
|
</tr>
|
|
"""
|
|
|
|
def __init__(self, args, query):
|
|
super(StaticHtmlReporter, self).__init__(args, query)
|
|
utils.makedirs(self.out)
|
|
self._max_len = 0
|
|
|
|
def generate(self):
|
|
"""Generate raport"""
|
|
super(StaticHtmlReporter, self).generate()
|
|
|
|
with open(os.path.join(self.out, "index.html"), "w",
|
|
encoding='utf8') as fobj:
|
|
content = {'title': 'index',
|
|
'msgs': self.index_list % self._get_index_list()}
|
|
fobj.write(self.index_templ % content)
|
|
|
|
def write_msg(self, messages, log):
|
|
"""Write message to file"""
|
|
with open(log, "w", encoding='utf8') as fobj:
|
|
fobj.write(self.msg_head)
|
|
|
|
super(StaticHtmlReporter, self).write_msg(messages, log)
|
|
|
|
with open(log, "a", encoding='utf8') as fobj:
|
|
fobj.write(self.msg_foot)
|
|
|
|
def _get_index_list(self):
|
|
_list = []
|
|
for channel in sorted([c.name for c in self.channels]):
|
|
_list.append('<li><a href="%s">%s</a></li>' % (channel + '.html',
|
|
channel))
|
|
return '\n'.join(_list)
|
|
|
|
def _process_message(self, msg):
|
|
"""
|
|
Check what kind of message we are dealing with and do appropriate
|
|
formatting
|
|
"""
|
|
data = super(StaticHtmlReporter, self)._process_message(msg)
|
|
data['msg'] = self._filter_slackid(data['msg'])
|
|
data.update({'date': msg.datetime().strftime("%Y-%m-%d %H:%M:%S"),
|
|
'tpl': self.msg_line})
|
|
return data
|
|
|
|
def _msg_file(self, msg):
|
|
"""return data for file"""
|
|
if msg.file.filepath:
|
|
fpath = os.path.abspath(msg.file.filepath)
|
|
fpath = pathlib.PurePath(fpath).as_uri()
|
|
else:
|
|
fpath = 'does_not_exists'
|
|
|
|
_, ext = os.path.splitext(fpath)
|
|
if ext.lower() in ('.png', '.jpg', '.jpeg', '.gif'):
|
|
url = ('<img src="' + fpath + '" height="300" alt="' +
|
|
msg.file.title + '">')
|
|
else:
|
|
url = ('<a href="' + fpath + '">' + msg.file.title + '</a>')
|
|
|
|
return {'msg': self.url_pat.sub(url, msg.text),
|
|
'nick': self._get_symbol('file')}
|
|
|
|
def _msg(self, msg):
|
|
"""return processor for all other message types"""
|
|
|
|
data = {'date': msg.datetime().strftime("%Y-%m-%d %H:%M:%S"),
|
|
'msg': msg.text,
|
|
'nick': msg.user.name}
|
|
|
|
link = '<a href="{url}">{title}</a>'
|
|
attachment_msg = []
|
|
|
|
if msg.attachments:
|
|
for att in msg.attachments:
|
|
if 'http' in att.fallback:
|
|
match = self.url_pat.search(att.fallback)
|
|
if not match:
|
|
match = self.literal_url_pat.search(att.fallback)
|
|
match = match.groupdict()
|
|
|
|
if 'title' not in match:
|
|
match['title'] = match['url']
|
|
if att.title:
|
|
match['title'] = att.title
|
|
|
|
att_text = att.fallback.replace(match['replace'],
|
|
link.format(**match))
|
|
else:
|
|
match = self.url_pat.search(msg.text)
|
|
if match:
|
|
match = match.groupdict()
|
|
match['title'] = att.fallback
|
|
att_text = msg.text.replace(match['replace'],
|
|
link.format(**match))
|
|
else:
|
|
att_text = att.fallback
|
|
attachment_msg.append(att_text)
|
|
|
|
data['msg'] += '<br>'.join(attachment_msg)
|
|
return data
|
|
|
|
|
|
def get_reporter(args, query):
|
|
"""Return object of right reporter class"""
|
|
reporters = {'text': TextReporter,
|
|
'html': StaticHtmlReporter}
|
|
|
|
klass = reporters.get(args.format, NoneReporter)
|
|
if klass.__name__ == 'Reporter':
|
|
logging.warning('None, or wrong (%s) formatter selected, falling to'
|
|
' None Reporter', args.format)
|
|
return klass(args, query)
|