diff --git a/rtv/mime_parsers.py b/rtv/mime_parsers.py index 1a6fad5..449f554 100644 --- a/rtv/mime_parsers.py +++ b/rtv/mime_parsers.py @@ -128,6 +128,28 @@ class RedditUploadsMIMEParser(BaseMIMEParser): return url, content_type +class RedditVideoMIMEParser(BaseMIMEParser): + """ + Reddit hosted videos/gifs. + Media uses MPEG-DASH format (.mpd) + """ + pattern = re.compile(r'https://v\.redd\.it/.+$') + + @staticmethod + def get_mimetype(url): + request_url = url + '/DASHPlaylist.mpd' + page = requests.get(request_url) + soup = BeautifulSoup(page.content, 'html.parser') + if not soup.find('representation', attrs={'mimetype': 'audio/mp4'}): + reps = soup.find_all('representation', + attrs={'mimetype': 'video/mp4'}) + rep = sorted(reps, reverse=True, + key=lambda t: int(t.get('bandwidth')))[0] + return url + '/' + rep.find('baseurl').text, 'video/mp4' + else: + return request_url, 'video/x-youtube' + + class ImgurApiMIMEParser(BaseMIMEParser): """ Imgur now provides a json API exposing its entire infrastructure. Each Imgur @@ -335,6 +357,7 @@ parsers = [ GfycatMIMEParser, ImgurApiMIMEParser, RedditUploadsMIMEParser, + RedditVideoMIMEParser, YoutubeMIMEParser, LiveleakMIMEParser, GifvMIMEParser, diff --git a/tests/cassettes/test_parser[reddit_gif].yaml b/tests/cassettes/test_parser[reddit_gif].yaml new file mode 100644 index 0000000..a88a7a8 --- /dev/null +++ b/tests/cassettes/test_parser[reddit_gif].yaml @@ -0,0 +1,40 @@ +interactions: +- request: + body: null + headers: + Accept: ['*/*'] + Accept-Encoding: ['gzip, deflate'] + Connection: [keep-alive] + User-Agent: [python-requests/2.12.4] + method: GET + uri: https://v.redd.it/wkm9zol8c6fz/DASHPlaylist.mpd + response: + body: + string: !!binary | + H4sIAAAAAAAAA4VSXY+bMBB8z6+w/E4whKYJCjnllKt6aquikGsfTz68gCVskG3SXH99F8ippDmp + vAD7MTM7u5u7s6rJCYyVjU5oMGeUgM4bIXWZ0KfjJ29FiXVcC143GhL6CpbebWebb+meKBCSpwYs + aMcdAuw7M7wTmh6DaB4uoowSJfV9VxRgjlLBkJl/YAwTrWkKWYNNaGd0rFooY8FtFV/isbRNUXiN + 9gQoFBCHLAgoca8totieMKcE1esbAJtX2IGBS892RvDZpGBkI4h4R+RYMVTtBG/HaTJwxEKpcLpd + LUvdfyTUmQ7Qku7l/6nMcePsT+mqbJeiuROageoA7cQ88oJD/pLCVQldspCtF5TgIiDH+fgpD+aR + iFiABIXhCg7coQ0LXFcFsqyQPYzwR4qE/njcP3z3gt55BcfBrpMU0PiqjYZlGjfRRC6Uffu1vkHj + PbfwdPi63e+yz89Lxp6/bPy32G11Ng7eFxCpBZwPXJfIv1p/9AIWIv3f6MOZ52+m3UINcI9aOslr + +Xs0yIxgzFutl9R/h96f8P/jtX9t9mTh/tXGL7fij8eCh+7jpW9nfwDvbQVhKAMAAA== + headers: + Accept-Ranges: [bytes] + Age: ['1814'] + Connection: [keep-alive] + Content-Encoding: [gzip] + Content-Length: ['448'] + Content-Type: [application/dash+xml] + Date: ['Sat, 12 Aug 2017 06:02:01 GMT'] + ETag: ['"981cd6c498c30ab3c4f1c743be7e6f60"'] + Last-Modified: ['Fri, 11 Aug 2017 21:10:49 GMT'] + Server: [snooserv] + Vary: ['Accept-Encoding,Origin'] + Via: [1.1 varnish] + X-Cache: [HIT] + X-Cache-Hits: ['2'] + X-Served-By: [cache-mdw17337-MDW] + X-Timer: ['S1502517721.219943,VS0,VE0'] + status: {code: 200, message: OK} +version: 1 diff --git a/tests/cassettes/test_parser[reddit_video].yaml b/tests/cassettes/test_parser[reddit_video].yaml new file mode 100644 index 0000000..5fe4621 --- /dev/null +++ b/tests/cassettes/test_parser[reddit_video].yaml @@ -0,0 +1,44 @@ +interactions: +- request: + body: null + headers: + Accept: ['*/*'] + Accept-Encoding: ['gzip, deflate'] + Connection: [keep-alive] + User-Agent: [python-requests/2.12.4] + method: GET + uri: https://v.redd.it/zv89llsvexdz/DASHPlaylist.mpd + response: + body: + string: !!binary | + H4sIAAAAAAAAA+WWXW+bMBSG7/srLN8TbKAUUEiVNp0WbdWikGyXkYsNWAKD+Mja/fodIJFImq5L + lV0tN0ns4/PxPuYcxrfPWYq2oqxkrnxMRwQjocKcSxX7eL36pDkYVTVTnKW5Ej5+ERW+nVyNHxcz + lAku2aIUlVA1q8HBrCm7bx8vVpSOblw7wCiT6q6JIlGuZCa6ndE1IbBRlHkkU1H5uCmVlxUi9jir + Em+37skqjyItVxoXGSTgGYRSjOqXArxUbcAQI8hevXJQhQmcgIXdmckVgs94IUqZc8RPJNlbdFZT + zoq+mkDUqBJxBtVNUxmr9oeP67IRIEnz9P5WULOyrn7IOgmmCxB3EKYLtRTFQDz0BEX+lLxOfGxY + 1KaOiRGQECEUyLYhHVncIjTCKCpZJpasBh1M4JUIGScQ3nLgj+Q+/j6fPXzTaCt9JladXlvJRa5n + hdXRLOtBUmgf07aOEuySvGOVWC+/TmbT4PPG2Fibx7G+X3ttHfSVtwZIKi6el0zFEN8lpkYJsfFg + 9eGZhXvVXrvq3M2VrCVL5a9eobJ3RjSXGFg/EV4fxD8SWz9U+69RUIM4N8bNKRTiTRSmfYDCOBMF + dZ13UVCA8Z+hsInr0uvzSBjWAQnzXBKm8S4Jm5DNl4+RMFoS1xcjQS9GYqwf9MG3+uOf0bGGyzxg + WZHCNOnRQI8iAGT4fJmUtn19DxVwsJFFRkaPbbqezY96Wef2NLYT1U9b6/uEKSXS+1xFMt71f9SN + CTHn61IeTxDDJMT0TK8LtQn705tweHw3jrYsbSCn08/A/kp0bj5yQxzT0FzzUhfEMf/xBRnr/YyF + 9wMdXhAmV78BYdjQZV8IAAA= + headers: + Accept-Ranges: [bytes] + Age: ['3435'] + Connection: [keep-alive] + Content-Encoding: [gzip] + Content-Length: ['644'] + Content-Type: [application/dash+xml] + Date: ['Sat, 12 Aug 2017 06:02:01 GMT'] + ETag: ['"c8a71f31b45dbcdb30f8c03bc08e444b"'] + Last-Modified: ['Sat, 05 Aug 2017 14:05:39 GMT'] + Server: [snooserv] + Vary: ['Accept-Encoding,Origin'] + Via: [1.1 varnish] + X-Cache: [HIT] + X-Cache-Hits: ['2'] + X-Served-By: [cache-mdw17335-MDW] + X-Timer: ['S1502517721.351252,VS0,VE0'] + status: {code: 200, message: OK} +version: 1 diff --git a/tests/test_mime_parsers.py b/tests/test_mime_parsers.py index 1fe25ad..08d29b0 100644 --- a/tests/test_mime_parsers.py +++ b/tests/test_mime_parsers.py @@ -73,6 +73,14 @@ URLS = OrderedDict([ 'https://www.liveleak.com/view?i=08b_1499296574', re.compile('https://cdn.liveleak.com/80281E/ll_a_s/2017/Jul/5/LiveLeak-dot-com-08b_1499296574-NMHH8690_1499296571.mov.h264_720p.mp4(.*)'), 'video/mp4')), + ('reddit_gif', ( + 'https://v.redd.it/wkm9zol8c6fz', + 'https://v.redd.it/wkm9zol8c6fz/DASH_600_K', + 'video/mp4')), + ('reddit_video', ( + 'https://v.redd.it/zv89llsvexdz', + 'https://v.redd.it/zv89llsvexdz/DASHPlaylist.mpd', + 'video/x-youtube')), ]) @@ -121,4 +129,4 @@ def test_imgur_fallback(reddit): parsed_url, parsed_type = ImgurApiMIMEParser.get_mimetype(url) # Not sure why, but http://imgur.com/gallery/yjP1v4B (a .gif) # appears to incorrectly return as a JPG type from the scraper - assert parsed_type is not None \ No newline at end of file + assert parsed_type is not None