mime parser for reddit hosted video

This commit is contained in:
woorst
2017-08-12 01:03:17 -05:00
parent c5936f4253
commit a72aa8ea1e
4 changed files with 116 additions and 1 deletions

View File

@@ -128,6 +128,28 @@ class RedditUploadsMIMEParser(BaseMIMEParser):
return url, content_type
class RedditVideoMIMEParser(BaseMIMEParser):
"""
Reddit hosted videos/gifs.
Media uses MPEG-DASH format (.mpd)
"""
pattern = re.compile(r'https://v\.redd\.it/.+$')
@staticmethod
def get_mimetype(url):
request_url = url + '/DASHPlaylist.mpd'
page = requests.get(request_url)
soup = BeautifulSoup(page.content, 'html.parser')
if not soup.find('representation', attrs={'mimetype': 'audio/mp4'}):
reps = soup.find_all('representation',
attrs={'mimetype': 'video/mp4'})
rep = sorted(reps, reverse=True,
key=lambda t: int(t.get('bandwidth')))[0]
return url + '/' + rep.find('baseurl').text, 'video/mp4'
else:
return request_url, 'video/x-youtube'
class ImgurApiMIMEParser(BaseMIMEParser):
"""
Imgur now provides a json API exposing its entire infrastructure. Each Imgur
@@ -335,6 +357,7 @@ parsers = [
GfycatMIMEParser,
ImgurApiMIMEParser,
RedditUploadsMIMEParser,
RedditVideoMIMEParser,
YoutubeMIMEParser,
LiveleakMIMEParser,
GifvMIMEParser,

View File

@@ -0,0 +1,40 @@
interactions:
- request:
body: null
headers:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
User-Agent: [python-requests/2.12.4]
method: GET
uri: https://v.redd.it/wkm9zol8c6fz/DASHPlaylist.mpd
response:
body:
string: !!binary |
H4sIAAAAAAAAA4VSXY+bMBB8z6+w/E4whKYJCjnllKt6aquikGsfTz68gCVskG3SXH99F8ippDmp
vAD7MTM7u5u7s6rJCYyVjU5oMGeUgM4bIXWZ0KfjJ29FiXVcC143GhL6CpbebWebb+meKBCSpwYs
aMcdAuw7M7wTmh6DaB4uoowSJfV9VxRgjlLBkJl/YAwTrWkKWYNNaGd0rFooY8FtFV/isbRNUXiN
9gQoFBCHLAgoca8totieMKcE1esbAJtX2IGBS892RvDZpGBkI4h4R+RYMVTtBG/HaTJwxEKpcLpd
LUvdfyTUmQ7Qku7l/6nMcePsT+mqbJeiuROageoA7cQ88oJD/pLCVQldspCtF5TgIiDH+fgpD+aR
iFiABIXhCg7coQ0LXFcFsqyQPYzwR4qE/njcP3z3gt55BcfBrpMU0PiqjYZlGjfRRC6Uffu1vkHj
PbfwdPi63e+yz89Lxp6/bPy32G11Ng7eFxCpBZwPXJfIv1p/9AIWIv3f6MOZ52+m3UINcI9aOslr
+Xs0yIxgzFutl9R/h96f8P/jtX9t9mTh/tXGL7fij8eCh+7jpW9nfwDvbQVhKAMAAA==
headers:
Accept-Ranges: [bytes]
Age: ['1814']
Connection: [keep-alive]
Content-Encoding: [gzip]
Content-Length: ['448']
Content-Type: [application/dash+xml]
Date: ['Sat, 12 Aug 2017 06:02:01 GMT']
ETag: ['"981cd6c498c30ab3c4f1c743be7e6f60"']
Last-Modified: ['Fri, 11 Aug 2017 21:10:49 GMT']
Server: [snooserv]
Vary: ['Accept-Encoding,Origin']
Via: [1.1 varnish]
X-Cache: [HIT]
X-Cache-Hits: ['2']
X-Served-By: [cache-mdw17337-MDW]
X-Timer: ['S1502517721.219943,VS0,VE0']
status: {code: 200, message: OK}
version: 1

View File

@@ -0,0 +1,44 @@
interactions:
- request:
body: null
headers:
Accept: ['*/*']
Accept-Encoding: ['gzip, deflate']
Connection: [keep-alive]
User-Agent: [python-requests/2.12.4]
method: GET
uri: https://v.redd.it/zv89llsvexdz/DASHPlaylist.mpd
response:
body:
string: !!binary |
H4sIAAAAAAAAA+WWXW+bMBSG7/srLN8TbKAUUEiVNp0WbdWikGyXkYsNWAKD+Mja/fodIJFImq5L
lV0tN0ns4/PxPuYcxrfPWYq2oqxkrnxMRwQjocKcSxX7eL36pDkYVTVTnKW5Ej5+ERW+nVyNHxcz
lAku2aIUlVA1q8HBrCm7bx8vVpSOblw7wCiT6q6JIlGuZCa6ndE1IbBRlHkkU1H5uCmVlxUi9jir
Em+37skqjyItVxoXGSTgGYRSjOqXArxUbcAQI8hevXJQhQmcgIXdmckVgs94IUqZc8RPJNlbdFZT
zoq+mkDUqBJxBtVNUxmr9oeP67IRIEnz9P5WULOyrn7IOgmmCxB3EKYLtRTFQDz0BEX+lLxOfGxY
1KaOiRGQECEUyLYhHVncIjTCKCpZJpasBh1M4JUIGScQ3nLgj+Q+/j6fPXzTaCt9JladXlvJRa5n
hdXRLOtBUmgf07aOEuySvGOVWC+/TmbT4PPG2Fibx7G+X3ttHfSVtwZIKi6el0zFEN8lpkYJsfFg
9eGZhXvVXrvq3M2VrCVL5a9eobJ3RjSXGFg/EV4fxD8SWz9U+69RUIM4N8bNKRTiTRSmfYDCOBMF
dZ13UVCA8Z+hsInr0uvzSBjWAQnzXBKm8S4Jm5DNl4+RMFoS1xcjQS9GYqwf9MG3+uOf0bGGyzxg
WZHCNOnRQI8iAGT4fJmUtn19DxVwsJFFRkaPbbqezY96Wef2NLYT1U9b6/uEKSXS+1xFMt71f9SN
CTHn61IeTxDDJMT0TK8LtQn705tweHw3jrYsbSCn08/A/kp0bj5yQxzT0FzzUhfEMf/xBRnr/YyF
9wMdXhAmV78BYdjQZV8IAAA=
headers:
Accept-Ranges: [bytes]
Age: ['3435']
Connection: [keep-alive]
Content-Encoding: [gzip]
Content-Length: ['644']
Content-Type: [application/dash+xml]
Date: ['Sat, 12 Aug 2017 06:02:01 GMT']
ETag: ['"c8a71f31b45dbcdb30f8c03bc08e444b"']
Last-Modified: ['Sat, 05 Aug 2017 14:05:39 GMT']
Server: [snooserv]
Vary: ['Accept-Encoding,Origin']
Via: [1.1 varnish]
X-Cache: [HIT]
X-Cache-Hits: ['2']
X-Served-By: [cache-mdw17335-MDW]
X-Timer: ['S1502517721.351252,VS0,VE0']
status: {code: 200, message: OK}
version: 1

View File

@@ -73,6 +73,14 @@ URLS = OrderedDict([
'https://www.liveleak.com/view?i=08b_1499296574',
re.compile('https://cdn.liveleak.com/80281E/ll_a_s/2017/Jul/5/LiveLeak-dot-com-08b_1499296574-NMHH8690_1499296571.mov.h264_720p.mp4(.*)'),
'video/mp4')),
('reddit_gif', (
'https://v.redd.it/wkm9zol8c6fz',
'https://v.redd.it/wkm9zol8c6fz/DASH_600_K',
'video/mp4')),
('reddit_video', (
'https://v.redd.it/zv89llsvexdz',
'https://v.redd.it/zv89llsvexdz/DASHPlaylist.mpd',
'video/x-youtube')),
])