From cc1d208b37fe63ca1c3c3527ab473444ae0f1002 Mon Sep 17 00:00:00 2001 From: woorst Date: Sat, 6 Aug 2016 01:52:57 -0500 Subject: [PATCH] Add instagram parser --- rtv/mime_parsers.py | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/rtv/mime_parsers.py b/rtv/mime_parsers.py index a33b33c..799aaca 100644 --- a/rtv/mime_parsers.py +++ b/rtv/mime_parsers.py @@ -164,8 +164,33 @@ class ImgurAlbumMIMEParser(BaseMIMEParser): return url, None +class InstagramMIMEParser(BaseMIMEParser): + """ + Instagram pages can contain either an embedded image or video. The + tags below provide the relevant info. + + + + + If the page is a video page both of the above tags will be present. + """ + pattern = re.compile(r'https?://(www\.)?instagr((am\.com)|\.am)/p/[^.]+$') + + @staticmethod + def get_mimetype(url): + page = requests.get(url) + soup = BeautifulSoup(page.content, 'html.parser') + tags = soup.find_all('meta', attrs={'property': 'og:video:secure_url'}) + if tags: + return BaseMIMEParser.get_mimetype(tags[0].get('content')) + else: + tags = soup.find_all('meta', attrs={'property': 'og:image'}) + return BaseMIMEParser.get_mimetype(tags[0].get('content')) + + # Parsers should be listed in the order they will be checked parsers = [ + InstagramMIMEParser, GfycatMIMEParser, ImgurAlbumMIMEParser, ImgurMIMEParser,