Create mime parser super class. Add streamable mime parser.

2017-05-28 01:33:36 -05:00
parent 84824f0352
commit 02a6a4e064
1 changed files with 35 additions and 18 deletions
--- a/rtv/mime_parsers.py
+++ b/rtv/mime_parsers.py
@@ -36,6 +36,32 @@ class BaseMIMEParser(object):
        return url, content_type
 class OpenGraphMIMEParser(BaseMIMEParser):
    """
    Open graph protocol is used on many web pages.
    <meta property="og:image" content="https://xxxx.jpg?ig_cache_key=xxxxx" />
    <meta property="og:video:secure_url" content="https://xxxxx.mp4" />
    If the page is a video page both of the above tags will be present and
    priority is given to video content.
    see http://ogp.me
    """
    pattern = re.compile(r'.*$')
    @staticmethod
    def get_mimetype(url):
        page = requests.get(url)
        soup = BeautifulSoup(page.content, 'html.parser')
        tag = soup.find('meta', attrs={'property': 'og:video:secure_url'})
        tag = tag or soup.find('meta', attrs={'property':  'og:image'})
        if tag:
            return BaseMIMEParser.get_mimetype(tag.get('content'))
        else:
            return url, None
 class GfycatMIMEParser(BaseMIMEParser):
    """
    Gfycat provides a primitive json api to generate image links. URLs can be
@@ -164,32 +190,23 @@ class ImgurAlbumMIMEParser(BaseMIMEParser):
            return url, None
-class InstagramMIMEParser(BaseMIMEParser):
+class InstagramMIMEParser(OpenGraphMIMEParser):
    """
-    Instagram pages can contain either an embedded image or video. The <meta>
+    Instagram uses the Open Graph protocol
    tags below provide the relevant info.
    <meta property="og:image" content="https://xxxx.jpg?ig_cache_key=xxxxx" />
    <meta property="og:video:secure_url" content="https://xxxxx.mp4" />
    If the page is a video page both of the above tags will be present.
    """
    pattern = re.compile(r'https?://(www\.)?instagr((am\.com)|\.am)/p/[^.]+$')
-    @staticmethod
+
-    def get_mimetype(url):
+class StreamableMIMEParser(OpenGraphMIMEParser):
-        page = requests.get(url)
+    """
-        soup = BeautifulSoup(page.content, 'html.parser')
+    Streamable uses the Open Graph protocol
-        tag = soup.find('meta', attrs={'property': 'og:video:secure_url'})
+    """
-        tag = tag or soup.find('meta', attrs={'property':  'og:image'})
+    pattern = re.compile(r'https?://(www\.)?streamable\.com/[^.]+$')
        if tag:
            return BaseMIMEParser.get_mimetype(tag.get('content'))
        else:
            return url, None
 # Parsers should be listed in the order they will be checked
 parsers = [
    StreamableMIMEParser,
    InstagramMIMEParser,
    GfycatMIMEParser,
    ImgurAlbumMIMEParser,