diff --git a/rtv/mime_handlers.py b/rtv/mime_handlers.py
new file mode 100644
index 0000000..b5e2dc6
--- /dev/null
+++ b/rtv/mime_handlers.py
@@ -0,0 +1,135 @@
+import re
+import mimetypes
+from html.parser import HTMLParser
+
+import requests
+
+
+class HTMLParsed(Exception):
+ def __init__(self, data):
+ self.data = data
+
+
+class BaseHandler(object):
+ """
+ BaseHandler can be sub-classed to define custom handlers for determining
+ the MIME type of external urls.
+ """
+
+ # URL regex pattern that the handler will be triggered on
+ pattern = re.compile(r'.*$')
+
+ @staticmethod
+ def get_mimetype(url):
+ """
+ Args:
+ url (text): Web url that was linked to by a reddit submission.
+
+ Returns:
+ modified_url (text): The url (or filename) that will be used when
+ constructing the command to run.
+ content_type (text): The mime-type that will be used when
+ constructing the command to run. If the mime-type is unknown,
+ return None and the program will fallback to using the web
+ browser.
+ """
+
+ # Guess based on the file extension
+ filename = url.split('?')[0]
+ content_type, _ = mimetypes.guess_type(filename)
+ return url, content_type
+
+
+class YoutubeHandler(BaseHandler):
+ """
+ Youtube videos can be streamed with vlc or downloaded with youtube-dl.
+ Assign a custom mime-type so they can be referenced in mailcap.
+ """
+
+ pattern = re.compile(
+ r'(?:https?://)?(m\.)?(?:youtu\.be/|(?:www\.)?youtube\.com/watch'
+ r'(?:\.php)?\'?.*v=)([a-zA-Z0-9\-_]+)')
+
+ @staticmethod
+ def get_mimetype(url):
+ return url, 'video/x-youtube'
+
+
+class GifvHandler(BaseHandler):
+ """
+ Special case for .gifv, which is a custom video format for imgur that is
+ incorrectly (or on purpose?) returned with a Content-Type of text/html.
+ """
+ pattern = re.compile(r'.*[.]gifv$')
+
+ @staticmethod
+ def get_mimetype(url):
+ modified_url = url[:-4] + 'webm'
+ return modified_url, 'video/webm'
+
+
+class RedditUploadsHandler(BaseHandler):
+ """
+ Reddit uploads do not have a file extension, but we can grab the mime-type
+ from the page header.
+ """
+ pattern = re.compile(r'https://i.reddituploads.com/.+$')
+
+ @staticmethod
+ def get_mimetype(url):
+ page = requests.head(url)
+ content_type = page.headers.get('Content-Type', '')
+ content_type = content_type.split(';')[0] # Strip out the encoding
+ return url, content_type
+
+
+class ImgurHTMLParser(HTMLParser):
+ """
+ Scrape the actual image url from an imgur landing page. Imgur intentionally
+ obscures this on most reddit links in order to draw more traffic for their
+ advertisements.
+
+ There are a couple of tags that supply the relevant info:
+
+
+
+
+ Note:
+ BeautifulSoup or lxml would be faster here but I wanted to skip adding
+ an extra dependency for something as trivial as this.
+ """
+
+ def handle_starttag(self, tag, attr):
+ if tag == 'meta' and attr[0] == ('name', 'twitter:image'):
+ raise HTMLParsed(attr[1][1])
+
+
+class ImgurHandler(BaseHandler):
+ """
+ The majority of imgur links don't point directly to the image, so we need
+ to open the provided url and scrape the page for the link. For galleries,
+ this method only returns the first image.
+ """
+ pattern = re.compile(r'https?://(w+\.)?(m\.)?imgur\.com/[^.]+$')
+
+ @staticmethod
+ def get_mimetype(url):
+ imgur_page = requests.get(url)
+ try:
+ ImgurHTMLParser().feed(imgur_page.text)
+ except HTMLParsed as data:
+ # We found a link
+ url = data.data
+ if GifvHandler.pattern.match(url):
+ return GifvHandler.get_mimetype(url)
+
+ return BaseHandler.get_mimetype(url)
+
+
+# Handlers should be defined in the order they will be checked
+handlers = [
+ ImgurHandler,
+ RedditUploadsHandler,
+ YoutubeHandler,
+ GifvHandler,
+ BaseHandler]