From cc1d208b37fe63ca1c3c3527ab473444ae0f1002 Mon Sep 17 00:00:00 2001
From: woorst <woorst@users.noreply.github.com>
Date: Sat, 6 Aug 2016 01:52:57 -0500
Subject: [PATCH] Add instagram parser

---
 rtv/mime_parsers.py | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)
diff --git a/rtv/mime_parsers.py b/rtv/mime_parsers.py
index a33b33c..799aaca 100644
--- a/rtv/mime_parsers.py
+++ b/rtv/mime_parsers.py
@@ -164,8 +164,33 @@ class ImgurAlbumMIMEParser(BaseMIMEParser):
             return url, None
 
 
+class InstagramMIMEParser(BaseMIMEParser):
+    """
+    Instagram pages can contain either an embedded image or video. The <meta>
+    tags below provide the relevant info.
+
+    <meta property="og:image" content="https://xxxx.jpg?ig_cache_key=xxxxx" />
+    <meta property="og:video:secure_url" content="https://xxxxx.mp4" />
+
+    If the page is a video page both of the above tags will be present.
+    """
+    pattern = re.compile(r'https?://(www\.)?instagr((am\.com)|\.am)/p/[^.]+$')
+
+    @staticmethod
+    def get_mimetype(url):
+        page = requests.get(url)
+        soup = BeautifulSoup(page.content, 'html.parser')
+        tags = soup.find_all('meta', attrs={'property': 'og:video:secure_url'})
+        if tags:
+            return BaseMIMEParser.get_mimetype(tags[0].get('content'))
+        else:
+            tags = soup.find_all('meta', attrs={'property':  'og:image'})
+            return BaseMIMEParser.get_mimetype(tags[0].get('content'))
+
+
 # Parsers should be listed in the order they will be checked
 parsers = [
+    InstagramMIMEParser,
     GfycatMIMEParser,
     ImgurAlbumMIMEParser,
     ImgurMIMEParser,