From 4abc0bb6d0e3d7871723d39ddb7498a7b0045532 Mon Sep 17 00:00:00 2001 From: woorst Date: Wed, 20 Jun 2018 22:44:31 -0400 Subject: [PATCH 1/2] Add mime parser: streamja.com --- rtv/mime_parsers.py | 35 ++++++ tests/cassettes/test_parser[streamja].yaml | 138 +++++++++++++++++++++ tests/test_mime_parsers.py | 4 + 3 files changed, 177 insertions(+) create mode 100644 tests/cassettes/test_parser[streamja].yaml diff --git a/rtv/mime_parsers.py b/rtv/mime_parsers.py index 79583a5..eee99cb 100644 --- a/rtv/mime_parsers.py +++ b/rtv/mime_parsers.py @@ -67,6 +67,33 @@ class OpenGraphMIMEParser(BaseMIMEParser): return url, None +class VideoTagMIMEParser(BaseMIMEParser): + """ + + """ + pattern = re.compile(r'.*$') + + @staticmethod + def get_mimetype(url): + page = requests.get(url) + soup = BeautifulSoup(page.content, 'html.parser') + + # TODO: Handle pages with multiple videos + video = soup.find('video') + if video: + source = video.find('source', attr={'res': 'HD'}) + source = source or video.find('source', attr={'type': 'video/mp4'}) + source = source or video.find('source') + if source: + return source.get('src'), source.get('type') + else: + return url, None + + class GfycatMIMEParser(BaseMIMEParser): """ Gfycat provides a primitive json api to generate image links. URLs can be @@ -458,6 +485,13 @@ class FlickrMIMEParser(OpenGraphMIMEParser): pattern = re.compile(r'https?://(www\.)?flickr\.com/photos/[^/]+/[^/]+/?$') +class StreamjaMIMEParser(VideoTagMIMEParser): + """ + Embedded HTML5 video element + """ + pattern = re.compile(r'https?://(www\.)?streamja\.com/[^/]+/?$') + + class WorldStarHipHopMIMEParser(BaseMIMEParser): """