mime parser for reddit hosted video
This commit is contained in:
@@ -128,6 +128,28 @@ class RedditUploadsMIMEParser(BaseMIMEParser):
|
||||
return url, content_type
|
||||
|
||||
|
||||
class RedditVideoMIMEParser(BaseMIMEParser):
|
||||
"""
|
||||
Reddit hosted videos/gifs.
|
||||
Media uses MPEG-DASH format (.mpd)
|
||||
"""
|
||||
pattern = re.compile(r'https://v\.redd\.it/.+$')
|
||||
|
||||
@staticmethod
|
||||
def get_mimetype(url):
|
||||
request_url = url + '/DASHPlaylist.mpd'
|
||||
page = requests.get(request_url)
|
||||
soup = BeautifulSoup(page.content, 'html.parser')
|
||||
if not soup.find('representation', attrs={'mimetype': 'audio/mp4'}):
|
||||
reps = soup.find_all('representation',
|
||||
attrs={'mimetype': 'video/mp4'})
|
||||
rep = sorted(reps, reverse=True,
|
||||
key=lambda t: int(t.get('bandwidth')))[0]
|
||||
return url + '/' + rep.find('baseurl').text, 'video/mp4'
|
||||
else:
|
||||
return request_url, 'video/x-youtube'
|
||||
|
||||
|
||||
class ImgurApiMIMEParser(BaseMIMEParser):
|
||||
"""
|
||||
Imgur now provides a json API exposing its entire infrastructure. Each Imgur
|
||||
@@ -335,6 +357,7 @@ parsers = [
|
||||
GfycatMIMEParser,
|
||||
ImgurApiMIMEParser,
|
||||
RedditUploadsMIMEParser,
|
||||
RedditVideoMIMEParser,
|
||||
YoutubeMIMEParser,
|
||||
LiveleakMIMEParser,
|
||||
GifvMIMEParser,
|
||||
|
||||
40
tests/cassettes/test_parser[reddit_gif].yaml
Normal file
40
tests/cassettes/test_parser[reddit_gif].yaml
Normal file
@@ -0,0 +1,40 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: null
|
||||
headers:
|
||||
Accept: ['*/*']
|
||||
Accept-Encoding: ['gzip, deflate']
|
||||
Connection: [keep-alive]
|
||||
User-Agent: [python-requests/2.12.4]
|
||||
method: GET
|
||||
uri: https://v.redd.it/wkm9zol8c6fz/DASHPlaylist.mpd
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAAA4VSXY+bMBB8z6+w/E4whKYJCjnllKt6aquikGsfTz68gCVskG3SXH99F8ippDmp
|
||||
vAD7MTM7u5u7s6rJCYyVjU5oMGeUgM4bIXWZ0KfjJ29FiXVcC143GhL6CpbebWebb+meKBCSpwYs
|
||||
aMcdAuw7M7wTmh6DaB4uoowSJfV9VxRgjlLBkJl/YAwTrWkKWYNNaGd0rFooY8FtFV/isbRNUXiN
|
||||
9gQoFBCHLAgoca8totieMKcE1esbAJtX2IGBS892RvDZpGBkI4h4R+RYMVTtBG/HaTJwxEKpcLpd
|
||||
LUvdfyTUmQ7Qku7l/6nMcePsT+mqbJeiuROageoA7cQ88oJD/pLCVQldspCtF5TgIiDH+fgpD+aR
|
||||
iFiABIXhCg7coQ0LXFcFsqyQPYzwR4qE/njcP3z3gt55BcfBrpMU0PiqjYZlGjfRRC6Uffu1vkHj
|
||||
PbfwdPi63e+yz89Lxp6/bPy32G11Ng7eFxCpBZwPXJfIv1p/9AIWIv3f6MOZ52+m3UINcI9aOslr
|
||||
+Xs0yIxgzFutl9R/h96f8P/jtX9t9mTh/tXGL7fij8eCh+7jpW9nfwDvbQVhKAMAAA==
|
||||
headers:
|
||||
Accept-Ranges: [bytes]
|
||||
Age: ['1814']
|
||||
Connection: [keep-alive]
|
||||
Content-Encoding: [gzip]
|
||||
Content-Length: ['448']
|
||||
Content-Type: [application/dash+xml]
|
||||
Date: ['Sat, 12 Aug 2017 06:02:01 GMT']
|
||||
ETag: ['"981cd6c498c30ab3c4f1c743be7e6f60"']
|
||||
Last-Modified: ['Fri, 11 Aug 2017 21:10:49 GMT']
|
||||
Server: [snooserv]
|
||||
Vary: ['Accept-Encoding,Origin']
|
||||
Via: [1.1 varnish]
|
||||
X-Cache: [HIT]
|
||||
X-Cache-Hits: ['2']
|
||||
X-Served-By: [cache-mdw17337-MDW]
|
||||
X-Timer: ['S1502517721.219943,VS0,VE0']
|
||||
status: {code: 200, message: OK}
|
||||
version: 1
|
||||
44
tests/cassettes/test_parser[reddit_video].yaml
Normal file
44
tests/cassettes/test_parser[reddit_video].yaml
Normal file
@@ -0,0 +1,44 @@
|
||||
interactions:
|
||||
- request:
|
||||
body: null
|
||||
headers:
|
||||
Accept: ['*/*']
|
||||
Accept-Encoding: ['gzip, deflate']
|
||||
Connection: [keep-alive]
|
||||
User-Agent: [python-requests/2.12.4]
|
||||
method: GET
|
||||
uri: https://v.redd.it/zv89llsvexdz/DASHPlaylist.mpd
|
||||
response:
|
||||
body:
|
||||
string: !!binary |
|
||||
H4sIAAAAAAAAA+WWXW+bMBSG7/srLN8TbKAUUEiVNp0WbdWikGyXkYsNWAKD+Mja/fodIJFImq5L
|
||||
lV0tN0ns4/PxPuYcxrfPWYq2oqxkrnxMRwQjocKcSxX7eL36pDkYVTVTnKW5Ej5+ERW+nVyNHxcz
|
||||
lAku2aIUlVA1q8HBrCm7bx8vVpSOblw7wCiT6q6JIlGuZCa6ndE1IbBRlHkkU1H5uCmVlxUi9jir
|
||||
Em+37skqjyItVxoXGSTgGYRSjOqXArxUbcAQI8hevXJQhQmcgIXdmckVgs94IUqZc8RPJNlbdFZT
|
||||
zoq+mkDUqBJxBtVNUxmr9oeP67IRIEnz9P5WULOyrn7IOgmmCxB3EKYLtRTFQDz0BEX+lLxOfGxY
|
||||
1KaOiRGQECEUyLYhHVncIjTCKCpZJpasBh1M4JUIGScQ3nLgj+Q+/j6fPXzTaCt9JladXlvJRa5n
|
||||
hdXRLOtBUmgf07aOEuySvGOVWC+/TmbT4PPG2Fibx7G+X3ttHfSVtwZIKi6el0zFEN8lpkYJsfFg
|
||||
9eGZhXvVXrvq3M2VrCVL5a9eobJ3RjSXGFg/EV4fxD8SWz9U+69RUIM4N8bNKRTiTRSmfYDCOBMF
|
||||
dZ13UVCA8Z+hsInr0uvzSBjWAQnzXBKm8S4Jm5DNl4+RMFoS1xcjQS9GYqwf9MG3+uOf0bGGyzxg
|
||||
WZHCNOnRQI8iAGT4fJmUtn19DxVwsJFFRkaPbbqezY96Wef2NLYT1U9b6/uEKSXS+1xFMt71f9SN
|
||||
CTHn61IeTxDDJMT0TK8LtQn705tweHw3jrYsbSCn08/A/kp0bj5yQxzT0FzzUhfEMf/xBRnr/YyF
|
||||
9wMdXhAmV78BYdjQZV8IAAA=
|
||||
headers:
|
||||
Accept-Ranges: [bytes]
|
||||
Age: ['3435']
|
||||
Connection: [keep-alive]
|
||||
Content-Encoding: [gzip]
|
||||
Content-Length: ['644']
|
||||
Content-Type: [application/dash+xml]
|
||||
Date: ['Sat, 12 Aug 2017 06:02:01 GMT']
|
||||
ETag: ['"c8a71f31b45dbcdb30f8c03bc08e444b"']
|
||||
Last-Modified: ['Sat, 05 Aug 2017 14:05:39 GMT']
|
||||
Server: [snooserv]
|
||||
Vary: ['Accept-Encoding,Origin']
|
||||
Via: [1.1 varnish]
|
||||
X-Cache: [HIT]
|
||||
X-Cache-Hits: ['2']
|
||||
X-Served-By: [cache-mdw17335-MDW]
|
||||
X-Timer: ['S1502517721.351252,VS0,VE0']
|
||||
status: {code: 200, message: OK}
|
||||
version: 1
|
||||
@@ -73,6 +73,14 @@ URLS = OrderedDict([
|
||||
'https://www.liveleak.com/view?i=08b_1499296574',
|
||||
re.compile('https://cdn.liveleak.com/80281E/ll_a_s/2017/Jul/5/LiveLeak-dot-com-08b_1499296574-NMHH8690_1499296571.mov.h264_720p.mp4(.*)'),
|
||||
'video/mp4')),
|
||||
('reddit_gif', (
|
||||
'https://v.redd.it/wkm9zol8c6fz',
|
||||
'https://v.redd.it/wkm9zol8c6fz/DASH_600_K',
|
||||
'video/mp4')),
|
||||
('reddit_video', (
|
||||
'https://v.redd.it/zv89llsvexdz',
|
||||
'https://v.redd.it/zv89llsvexdz/DASHPlaylist.mpd',
|
||||
'video/x-youtube')),
|
||||
])
|
||||
|
||||
|
||||
@@ -121,4 +129,4 @@ def test_imgur_fallback(reddit):
|
||||
parsed_url, parsed_type = ImgurApiMIMEParser.get_mimetype(url)
|
||||
# Not sure why, but http://imgur.com/gallery/yjP1v4B (a .gif)
|
||||
# appears to incorrectly return as a JPG type from the scraper
|
||||
assert parsed_type is not None
|
||||
assert parsed_type is not None
|
||||
|
||||
Reference in New Issue
Block a user