Add instagram parser

This commit is contained in:
woorst
2016-08-06 01:52:57 -05:00
parent ddba5aab06
commit cc1d208b37

View File

@@ -164,8 +164,33 @@ class ImgurAlbumMIMEParser(BaseMIMEParser):
return url, None return url, None
class InstagramMIMEParser(BaseMIMEParser):
"""
Instagram pages can contain either an embedded image or video. The <meta>
tags below provide the relevant info.
<meta property="og:image" content="https://xxxx.jpg?ig_cache_key=xxxxx" />
<meta property="og:video:secure_url" content="https://xxxxx.mp4" />
If the page is a video page both of the above tags will be present.
"""
pattern = re.compile(r'https?://(www\.)?instagr((am\.com)|\.am)/p/[^.]+$')
@staticmethod
def get_mimetype(url):
page = requests.get(url)
soup = BeautifulSoup(page.content, 'html.parser')
tags = soup.find_all('meta', attrs={'property': 'og:video:secure_url'})
if tags:
return BaseMIMEParser.get_mimetype(tags[0].get('content'))
else:
tags = soup.find_all('meta', attrs={'property': 'og:image'})
return BaseMIMEParser.get_mimetype(tags[0].get('content'))
# Parsers should be listed in the order they will be checked # Parsers should be listed in the order they will be checked
parsers = [ parsers = [
InstagramMIMEParser,
GfycatMIMEParser, GfycatMIMEParser,
ImgurAlbumMIMEParser, ImgurAlbumMIMEParser,
ImgurMIMEParser, ImgurMIMEParser,