Add instagram parser

2016-08-06 01:52:57 -05:00
parent ddba5aab06
commit cc1d208b37
1 changed files with 25 additions and 0 deletions
--- a/rtv/mime_parsers.py
+++ b/rtv/mime_parsers.py
@@ -164,8 +164,33 @@ class ImgurAlbumMIMEParser(BaseMIMEParser):
            return url, None
 class InstagramMIMEParser(BaseMIMEParser):
    """
    Instagram pages can contain either an embedded image or video. The <meta>
    tags below provide the relevant info.
    <meta property="og:image" content="https://xxxx.jpg?ig_cache_key=xxxxx" />
    <meta property="og:video:secure_url" content="https://xxxxx.mp4" />
    If the page is a video page both of the above tags will be present.
    """
    pattern = re.compile(r'https?://(www\.)?instagr((am\.com)|\.am)/p/[^.]+$')
    @staticmethod
    def get_mimetype(url):
        page = requests.get(url)
        soup = BeautifulSoup(page.content, 'html.parser')
        tags = soup.find_all('meta', attrs={'property': 'og:video:secure_url'})
        if tags:
            return BaseMIMEParser.get_mimetype(tags[0].get('content'))
        else:
            tags = soup.find_all('meta', attrs={'property':  'og:image'})
            return BaseMIMEParser.get_mimetype(tags[0].get('content'))
 # Parsers should be listed in the order they will be checked
 parsers = [
    InstagramMIMEParser,
    GfycatMIMEParser,
    ImgurAlbumMIMEParser,
    ImgurMIMEParser,