From c167c98549e0b57a1a2ade757e260a2385445afc Mon Sep 17 00:00:00 2001 From: woorst Date: Sun, 20 Aug 2017 08:23:02 -0500 Subject: [PATCH] Add mime parser: clips.twitch.tv --- rtv/mime_parsers.py | 20 + tests/cassettes/test_parser[twitch_clip].yaml | 360 ++++++++++++++++++ tests/test_mime_parsers.py | 4 + 3 files changed, 384 insertions(+) create mode 100644 tests/cassettes/test_parser[twitch_clip].yaml diff --git a/rtv/mime_parsers.py b/rtv/mime_parsers.py index 449f554..722a27b 100644 --- a/rtv/mime_parsers.py +++ b/rtv/mime_parsers.py @@ -302,6 +302,25 @@ class StreamableMIMEParser(OpenGraphMIMEParser): pattern = re.compile(r'https?://(www\.)?streamable\.com/[^.]+$') +class TwitchMIMEParser(BaseMIMEParser): + """ + Non-streaming videos hosted by twitch.tv + """ + pattern = re.compile(r'https?://clips\.?twitch\.tv/[^.]+$') + + @staticmethod + def get_mimetype(url): + page = requests.get(url) + soup = BeautifulSoup(page.content, 'html.parser') + tag = soup.find('meta', attrs={'name': 'twitter:image'}) + thumbnail = tag.get('content') + suffix = '-preview.jpg' + if thumbnail.endswith(suffix): + return thumbnail.replace(suffix, '.mp4'), 'video/mp4' + else: + return url, None + + class VidmeMIMEParser(BaseMIMEParser): """ Vidme provides a json api. @@ -360,5 +379,6 @@ parsers = [ RedditVideoMIMEParser, YoutubeMIMEParser, LiveleakMIMEParser, + TwitchMIMEParser, GifvMIMEParser, BaseMIMEParser] diff --git a/tests/cassettes/test_parser[twitch_clip].yaml b/tests/cassettes/test_parser[twitch_clip].yaml new file mode 100644 index 0000000..3671d2f --- /dev/null +++ b/tests/cassettes/test_parser[twitch_clip].yaml @@ -0,0 +1,360 @@ +interactions: +- request: + body: null + headers: + Accept: ['*/*'] + Accept-Encoding: ['gzip, deflate'] + Connection: [keep-alive] + User-Agent: [python-requests/2.18.3] + method: GET + uri: https://clips.twitch.tv/avaail/ExpensiveFishBCouch + response: + body: {string: 'Moved + Permanently. + + +'} + headers: + Cache-Control: ['public, max-age=300, s-maxage=300'] + Connection: [keep-alive] + Content-Length: ['95'] + Content-Type: [text/html; charset=utf-8] + Date: ['Sun, 20 Aug 2017 13:25:05 GMT'] + Location: ['https://clips.twitch.tv/RelentlessDifficultYogurtArsonNoSexy'] + Server: [nginx/1.8.1] + Via: [1.1 cbefa4574158a548775db8beda8075d2.cloudfront.net (CloudFront)] + X-Amz-Cf-Id: [KdIDFnr7QRO0YsFiDVc1bekNw6m2yIAWP_V0AwYNk3M_ppJqFxRiRQ==] + X-Cache: [Miss from cloudfront] + status: {code: 301, message: Moved Permanently} +- request: + body: null + headers: + Accept: ['*/*'] + Accept-Encoding: ['gzip, deflate'] + Connection: [keep-alive] + User-Agent: [python-requests/2.18.3] + method: GET + uri: https://clips.twitch.tv/RelentlessDifficultYogurtArsonNoSexy + response: + body: {string: "\n\n \n \n TAKING + EVERY GYM BY STORM. IRL IDIOT POKEMON GO. Level 25. @Snowboardjacket\n + \ \n \n + \ \n \n \n\n \n \n \n + \ \n \n \n + \ \n \n \n \n \n \n\n \n \n \n + \ \n + \ \n + \ \n \n \n \n \n \n + \ \n + \ \n\n \n \n \n \n + \ \n \n \n \n \n \n \n + \ \n + \ \n \n \n + \ \n + \ \n \n \n + \ \n \n + \ \n \n + \ \n + \ \n \n + \
\n
\n
\n + \
\n
\n + \
\n
\n
\n
\n + \
\n
\n + \
\n
\n + \
\n
\n \n + \ \n \n\n"} + headers: + Cache-Control: ['public, max-age=300, s-maxage=300'] + Connection: [keep-alive] + Content-Type: [text/html; charset=utf-8] + Date: ['Sun, 20 Aug 2017 13:25:06 GMT'] + Server: [nginx/1.8.1] + Via: [1.1 cbefa4574158a548775db8beda8075d2.cloudfront.net (CloudFront)] + X-Amz-Cf-Id: [2xcFsZt-awZHRQ3ML7EDWx4ediZDRAQ8R84SfBXB25J92uBiSuDBcw==] + X-Cache: [Miss from cloudfront] + status: {code: 200, message: OK} +version: 1 diff --git a/tests/test_mime_parsers.py b/tests/test_mime_parsers.py index 08d29b0..70773e6 100644 --- a/tests/test_mime_parsers.py +++ b/tests/test_mime_parsers.py @@ -81,6 +81,10 @@ URLS = OrderedDict([ 'https://v.redd.it/zv89llsvexdz', 'https://v.redd.it/zv89llsvexdz/DASHPlaylist.mpd', 'video/x-youtube')), + ('twitch_clip', ( + 'https://clips.twitch.tv/avaail/ExpensiveFishBCouch', + 'https://clips-media-assets.twitch.tv/22467338656-index-0000000111.mp4', + 'video/mp4')), ])