diff --git a/rtv/mime_parsers.py b/rtv/mime_parsers.py index 59b978d..770d753 100644 --- a/rtv/mime_parsers.py +++ b/rtv/mime_parsers.py @@ -36,6 +36,32 @@ class BaseMIMEParser(object): return url, content_type +class OpenGraphMIMEParser(BaseMIMEParser): + """ + Open graph protocol is used on many web pages. + + + + + If the page is a video page both of the above tags will be present and + priority is given to video content. + + see http://ogp.me + """ + pattern = re.compile(r'.*$') + + @staticmethod + def get_mimetype(url): + page = requests.get(url) + soup = BeautifulSoup(page.content, 'html.parser') + tag = soup.find('meta', attrs={'property': 'og:video:secure_url'}) + tag = tag or soup.find('meta', attrs={'property': 'og:image'}) + if tag: + return BaseMIMEParser.get_mimetype(tag.get('content')) + else: + return url, None + + class GfycatMIMEParser(BaseMIMEParser): """ Gfycat provides a primitive json api to generate image links. URLs can be @@ -164,32 +190,23 @@ class ImgurAlbumMIMEParser(BaseMIMEParser): return url, None -class InstagramMIMEParser(BaseMIMEParser): +class InstagramMIMEParser(OpenGraphMIMEParser): """ - Instagram pages can contain either an embedded image or video. The - tags below provide the relevant info. - - - - - If the page is a video page both of the above tags will be present. + Instagram uses the Open Graph protocol """ pattern = re.compile(r'https?://(www\.)?instagr((am\.com)|\.am)/p/[^.]+$') - @staticmethod - def get_mimetype(url): - page = requests.get(url) - soup = BeautifulSoup(page.content, 'html.parser') - tag = soup.find('meta', attrs={'property': 'og:video:secure_url'}) - tag = tag or soup.find('meta', attrs={'property': 'og:image'}) - if tag: - return BaseMIMEParser.get_mimetype(tag.get('content')) - else: - return url, None + +class StreamableMIMEParser(OpenGraphMIMEParser): + """ + Streamable uses the Open Graph protocol + """ + pattern = re.compile(r'https?://(www\.)?streamable\.com/[^.]+$') # Parsers should be listed in the order they will be checked parsers = [ + StreamableMIMEParser, InstagramMIMEParser, GfycatMIMEParser, ImgurAlbumMIMEParser,