mirror of
https://github.com/gryf/slack-backup.git
synced 2025-12-17 11:30:25 +01:00
447 lines
14 KiB
Python
447 lines
14 KiB
Python
"""
|
|
Reporters module.
|
|
|
|
There are several classes for specific format reporting, and also some of the
|
|
slack conversation/convention parsers.
|
|
"""
|
|
import os
|
|
import errno
|
|
import html.parser
|
|
import logging
|
|
import pathlib
|
|
import re
|
|
|
|
from slack_backup import objects as o
|
|
from slack_backup import utils
|
|
from slack_backup import emoji
|
|
|
|
|
|
class Reporter(object):
|
|
"""Base reporter class"""
|
|
ext = ''
|
|
symbols = {'plain': {'join': '->',
|
|
'leave': '<-',
|
|
'me': '*',
|
|
'file': '-',
|
|
'topic': '+',
|
|
'separator': '|'},
|
|
'unicode': {'join': '⮊',
|
|
'leave': '⮈',
|
|
'me': '🟊',
|
|
'file': '📂',
|
|
'topic': '🟅',
|
|
'separator': '│'}}
|
|
literal_url_pat = re.compile(r'(?P<replace>(?P<url>https?[^\s\|]+))')
|
|
url_pat = re.compile(r'(?P<replace><(?P<url>http[^\|>]+)'
|
|
r'(\|(?P<title>[^>]+))?>)')
|
|
slackid_pat = re.compile(r'(?P<replace><@'
|
|
r'(?P<slackid>U[A-Z,0-9]+)(\|[^>]+)?[^>]*>)')
|
|
|
|
def __init__(self, args, query):
|
|
self.out = args.output
|
|
self.theme = args.theme
|
|
self.q = query
|
|
self.types = {"channel_join": self._msg_join,
|
|
"channel_leave": self._msg_leave,
|
|
"channel_topic": self._msg_topic,
|
|
"file_share": self._msg_file,
|
|
"me_message": self._msg_me}
|
|
|
|
self.emoji = emoji.EMOJI.get(args.theme, {})
|
|
|
|
self.channels = self._get_channels(args.channels)
|
|
self.users = self.q(o.User).all()
|
|
|
|
def generate(self):
|
|
"""Generate raport for each channel"""
|
|
for channel in self.channels:
|
|
messages = []
|
|
log_path = self.get_log_path(channel.name)
|
|
try:
|
|
os.unlink(log_path)
|
|
except IOError as err:
|
|
if err.errno != errno.ENOENT:
|
|
raise
|
|
for message in self.q(o.Message).\
|
|
filter(o.Message.channel == channel).\
|
|
order_by(o.Message.ts).all():
|
|
messages.append(message)
|
|
self.write_msg(messages, log_path)
|
|
|
|
def get_log_path(self, name):
|
|
"""Return relative log file name """
|
|
return os.path.join(self.out, name + self.ext)
|
|
|
|
def write_msg(self, messages, log):
|
|
"""Write message to file"""
|
|
with open(log, "a") as fobj:
|
|
for message in messages:
|
|
data = self._process_message(message)
|
|
fobj.write(data['tpl'].format(**data))
|
|
|
|
def _get_symbol(self, item):
|
|
"""Return appropriate item depending on the selected theme"""
|
|
return self.symbols[self.theme][item]
|
|
|
|
def _get_channels(self, selected_channels):
|
|
"""
|
|
Retrieve channels from db and return those which names matched from
|
|
selected_channels list
|
|
"""
|
|
all_channels = self.q(o.Channel).all()
|
|
if not selected_channels:
|
|
return all_channels
|
|
|
|
result = []
|
|
for channel in all_channels:
|
|
if channel.name in selected_channels:
|
|
result.append(channel)
|
|
return result
|
|
|
|
def _process_message(self, msg):
|
|
"""
|
|
Make changes to the text (replace slack ids, replace representation of
|
|
urls, substitute images etc) and return dict with data suitable to
|
|
display.
|
|
"""
|
|
processor = self.types.get(msg.type, self._msg)
|
|
data = processor(msg)
|
|
data.update({'date': msg.datetime().strftime("%Y-%m-%d %H:%M:%S"),
|
|
'tpl': "{date} {nick} {msg}"})
|
|
|
|
for emoticon in self.emoji:
|
|
data['msg'] = data['msg'].replace(emoticon, self.emoji[emoticon])
|
|
|
|
return data
|
|
|
|
def _msg_join(self, msg):
|
|
"""return data for join"""
|
|
return {'msg': msg.text,
|
|
'nick': self._get_symbol('join')}
|
|
|
|
def _msg_leave(self, msg):
|
|
"""return data for leave"""
|
|
return {'msg': msg.text,
|
|
'nick': self._get_symbol('leave')}
|
|
|
|
def _msg_topic(self, msg):
|
|
"""return data for set topic"""
|
|
return {'msg': msg.text,
|
|
'nick': self._get_symbol('topic')}
|
|
|
|
def _msg_me(self, msg):
|
|
"""return data for /me"""
|
|
return {'msg': msg.user.name + ' ' + msg.text,
|
|
'nick': self._get_symbol('me')}
|
|
|
|
def _msg_file(self, msg):
|
|
"""return data for file"""
|
|
return {'msg': msg.text,
|
|
'nick': self._get_symbol('file')}
|
|
|
|
def _msg(self, msg):
|
|
"""return data for all other message types"""
|
|
return {'msg': msg.text,
|
|
'nick': msg.user.name}
|
|
|
|
def _filter_slackid(self, text):
|
|
"""filter out all of the id from slack"""
|
|
match = True
|
|
while match:
|
|
match = self.slackid_pat.search(text)
|
|
if not match:
|
|
return text
|
|
|
|
match = match.groupdict()
|
|
user = self.q(o.User).filter(o.User.slackid ==
|
|
match['slackid']).one()
|
|
text = text.replace(match['replace'], user.name)
|
|
|
|
return text
|
|
|
|
|
|
class NoneReporter(Reporter):
|
|
"""Dummy reporter used for fallback"""
|
|
|
|
def generate(self):
|
|
"""Generate raport it's a dummmy one - for use with none reporter"""
|
|
return
|
|
|
|
|
|
class TextReporter(Reporter):
|
|
"""Text aka IRC reporter"""
|
|
ext = '.log'
|
|
tpl = '{date} {nick:>{max_len}} {separator} {msg}\n'
|
|
|
|
def __init__(self, args, query):
|
|
super(TextReporter, self).__init__(args, query)
|
|
utils.makedirs(self.out)
|
|
self._max_len = 0
|
|
|
|
def generate(self):
|
|
"""Generate raport"""
|
|
for channel in self.channels:
|
|
messages = []
|
|
log_path = self.get_log_path(channel.name)
|
|
self._set_max_len(channel)
|
|
try:
|
|
os.unlink(log_path)
|
|
except IOError as err:
|
|
if err.errno != errno.ENOENT:
|
|
raise
|
|
for message in self.q(o.Message).\
|
|
filter(o.Message.channel == channel).\
|
|
order_by(o.Message.ts).all():
|
|
messages.append(message)
|
|
|
|
self.write_msg(messages, log_path)
|
|
|
|
def _set_max_len(self, channel):
|
|
"""calculate max_len for sepcified channel"""
|
|
users = [m.user for m in channel.messages]
|
|
users = set([u.name for u in users])
|
|
|
|
self._max_len = 0
|
|
for user_name in users:
|
|
if len(user_name) > self._max_len:
|
|
self._max_len = len(user_name)
|
|
|
|
def _process_message(self, msg):
|
|
"""
|
|
Check what kind of message we are dealing with and do appropriate
|
|
formatting
|
|
"""
|
|
data = super(TextReporter, self)._process_message(msg)
|
|
data['msg'] = self._filter_slackid(data['msg'])
|
|
data['msg'] = self._fix_newlines(data['msg'])
|
|
data['msg'] = self._remove_entities(data['msg'])
|
|
data.update({'date': msg.datetime().strftime("%Y-%m-%d %H:%M:%S"),
|
|
'max_len': self._max_len,
|
|
'separator': self._get_symbol('separator'),
|
|
'tpl': self.tpl})
|
|
return data
|
|
|
|
def _msg_file(self, msg):
|
|
"""return data for file"""
|
|
if msg.file.filepath:
|
|
fpath = os.path.abspath(msg.file.filepath)
|
|
else:
|
|
fpath = 'does_not_exists'
|
|
|
|
return {'msg': self.url_pat.sub('(' +
|
|
pathlib.PurePath(fpath).as_uri() +
|
|
') ' + msg.file.title, msg.text),
|
|
'nick': self._get_symbol('file')}
|
|
|
|
def _msg(self, msg):
|
|
"""return data for all other message types"""
|
|
|
|
data = super(TextReporter, self)._msg(msg)
|
|
result = ''
|
|
|
|
if msg.attachments:
|
|
for att in msg.attachments:
|
|
if att.title:
|
|
att_text = att.title + '\n'
|
|
else:
|
|
att_text = self._fix_newlines(att.fallback) + '\n'
|
|
|
|
if att.text:
|
|
att_text += att.text
|
|
|
|
result += att_text + '\n'
|
|
|
|
data['msg'] += result.strip()
|
|
return data
|
|
|
|
def _remove_entities(self, text):
|
|
"""replace html entites into appropriate chars"""
|
|
return html.parser.HTMLParser().unescape(text)
|
|
|
|
def _fix_newlines(self, text):
|
|
"""Shift text with new lines to the right with separator"""
|
|
shift = 19 # length of the date
|
|
shift += 1 # separator space
|
|
shift += self._max_len # length reserved for the nicks
|
|
shift += 1 # separator space
|
|
return text.replace('\n', '\n' + shift * ' ' +
|
|
self._get_symbol('separator') + ' ')
|
|
|
|
|
|
class StaticHtmlReporter(Reporter):
|
|
"""Text-like, but with browsable, clickable links"""
|
|
ext = '.html'
|
|
index_templ = """<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
|
|
<title>%(title)s</title>
|
|
</head>
|
|
<body>
|
|
<div id="container">
|
|
%(msgs)s
|
|
</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
index_list = """
|
|
<ul>
|
|
%s
|
|
</ul>
|
|
"""
|
|
msg_head = """<!DOCTYPE html>
|
|
<html>
|
|
<head>
|
|
<meta content="text/html; charset=utf-8" http-equiv="Content-Type"/>
|
|
<title>Bla</title>
|
|
<style>
|
|
* {
|
|
font-family: sans-serif;
|
|
}
|
|
.log {
|
|
width: 100%;
|
|
}
|
|
.log tr:nth-child(even) {
|
|
background-color: #efefef;
|
|
}
|
|
.nick {
|
|
text-align: right;
|
|
white-space: nowrap;
|
|
}
|
|
.date {
|
|
white-space: nowrap;
|
|
}
|
|
td {
|
|
padding: 2px;
|
|
}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div id="container">
|
|
<table class="log">
|
|
"""
|
|
msg_foot = """
|
|
</table>
|
|
</div>
|
|
</body>
|
|
</html>
|
|
"""
|
|
msg_line = """
|
|
<tr>
|
|
<td class="date">{date}</td>
|
|
<td class="nick">{nick}</td>
|
|
<td>{msg}</td>
|
|
</tr>
|
|
"""
|
|
|
|
def __init__(self, args, query):
|
|
super(StaticHtmlReporter, self).__init__(args, query)
|
|
utils.makedirs(self.out)
|
|
self._max_len = 0
|
|
|
|
def generate(self):
|
|
"""Generate raport"""
|
|
super(StaticHtmlReporter, self).generate()
|
|
|
|
with open(os.path.join(self.out, "index.html"), "w") as fobj:
|
|
content = {'title': 'index',
|
|
'msgs': self.index_list % self._get_index_list()}
|
|
fobj.write(self.index_templ % content)
|
|
|
|
def write_msg(self, messages, log):
|
|
"""Write message to file"""
|
|
with open(log, "w") as fobj:
|
|
fobj.write(self.msg_head)
|
|
|
|
super(StaticHtmlReporter, self).write_msg(messages, log)
|
|
|
|
with open(log, "a") as fobj:
|
|
fobj.write(self.msg_foot)
|
|
|
|
def _get_index_list(self):
|
|
_list = []
|
|
for channel in sorted([c.name for c in self.channels]):
|
|
_list.append('<li><a href="%s">%s</a></li>' % (channel + '.html',
|
|
channel))
|
|
return '\n'.join(_list)
|
|
|
|
def _process_message(self, msg):
|
|
"""
|
|
Check what kind of message we are dealing with and do appropriate
|
|
formatting
|
|
"""
|
|
data = super(StaticHtmlReporter, self)._process_message(msg)
|
|
data['msg'] = self._filter_slackid(data['msg'])
|
|
data.update({'date': msg.datetime().strftime("%Y-%m-%d %H:%M:%S"),
|
|
'tpl': self.msg_line})
|
|
return data
|
|
|
|
def _msg_file(self, msg):
|
|
"""return data for file"""
|
|
if msg.file.filepath:
|
|
fpath = os.path.abspath(msg.file.filepath)
|
|
else:
|
|
fpath = 'does_not_exists'
|
|
|
|
_, ext = os.path.splitext(fpath)
|
|
if ext.lower() in ('.png', '.jpg', '.jpeg', '.gif'):
|
|
url = ('<img src="' + pathlib.PurePath(fpath).as_uri() +
|
|
'" height="300" alt="' + msg.file.title + '">')
|
|
else:
|
|
url = ('<a href="' + pathlib.PurePath(fpath).as_uri() +
|
|
'">' + msg.file.title + '</a>')
|
|
|
|
return {'msg': self.url_pat.sub(url, msg.text),
|
|
'nick': self._get_symbol('file')}
|
|
|
|
def _msg(self, msg):
|
|
"""return processor for all other message types"""
|
|
|
|
data = {'date': msg.datetime().strftime("%Y-%m-%d %H:%M:%S"),
|
|
'msg': msg.text,
|
|
'nick': msg.user.name}
|
|
|
|
link = '<a href="{url}">{title}</a>'
|
|
attachement_msg = []
|
|
|
|
if msg.attachments:
|
|
for att in msg.attachments:
|
|
if 'http' in att.fallback:
|
|
match = self.url_pat.search(att.fallback)
|
|
if not match:
|
|
match = self.literal_url_pat.search(att.fallback)
|
|
match = match.groupdict()
|
|
|
|
if 'title' not in match:
|
|
match['title'] = match['url']
|
|
if att.title:
|
|
match['title'] = att.title
|
|
|
|
att_text = att.fallback.replace(match['replace'],
|
|
link.format(**match))
|
|
else:
|
|
match = self.url_pat.search(msg.text)
|
|
if match:
|
|
match = match.groupdict()
|
|
match['title'] = att.fallback
|
|
att_text = msg.text.replace(match['replace'],
|
|
link.format(**match))
|
|
else:
|
|
att_text = att.fallback
|
|
attachement_msg.append(att_text)
|
|
|
|
data['msg'] += '<br>'.join(attachement_msg)
|
|
return data
|
|
|
|
|
|
def get_reporter(args, query):
|
|
"""Return object of right reporter class"""
|
|
reporters = {'text': TextReporter,
|
|
'html': StaticHtmlReporter}
|
|
|
|
klass = reporters.get(args.format, NoneReporter)
|
|
if klass.__name__ == 'Reporter':
|
|
logging.warning('None, or wrong (%s) formatter selected, falling to'
|
|
' None Reporter', args.format)
|
|
return klass(args, query)
|