Merge remote-tracking branch 'upstream/master'

2016-07-30 12:52:37 +02:00
parent 94dc86f915 06ac46ccce
commit d141182333
28 changed files with 1791 additions and 154 deletions
@@ -5,12 +5,14 @@ import os
 import sys
 import locale
 import logging
+import warnings

+import six
 import praw
 import tornado

 from . import docs
-from .config import Config, copy_default_config
+from .config import Config, copy_default_config, copy_default_mailcap
 from .oauth import OAuthHelper
 from .terminal import Terminal
 from .objects import curses_session, Color
@@ -34,6 +36,10 @@ def main():

    # Squelch SSL warnings
    logging.captureWarnings(True)
+    if six.PY3:
+        # These ones get triggered even when capturing warnings is turned on
+        warnings.simplefilter('ignore', ResourceWarning)  #pylint:disable=E0602
+
    locale.setlocale(locale.LC_ALL, '')

    # Set the terminal title
@@ -59,6 +65,10 @@ def main():
        copy_default_config()
        return

+    if config['copy_mailcap']:
+        copy_default_mailcap()
+        return
+
    # Load the browsing history from previous sessions
    config.load_history()

@@ -103,7 +113,7 @@ def main():
            if not config['monochrome']:
                Color.init()

-            term = Terminal(stdscr, config['ascii'])
+            term = Terminal(stdscr, config)
            with term.loader('Initializing', catch_exception=False):
                reddit = praw.Reddit(user_agent=user_agent,
                                     decode_html_entities=False,
@@ -16,9 +16,11 @@ from .objects import KeyMap
 PACKAGE = os.path.dirname(__file__)
 HOME = os.path.expanduser('~')
 TEMPLATE = os.path.join(PACKAGE, 'templates')
-DEFAULT_CONFIG = os.path.join(PACKAGE, 'rtv.cfg')
+DEFAULT_CONFIG = os.path.join(TEMPLATE, 'rtv.cfg')
+DEFAULT_MAILCAP = os.path.join(TEMPLATE, 'mailcap')
 XDG_HOME = os.getenv('XDG_CONFIG_HOME', os.path.join(HOME, '.config'))
 CONFIG = os.path.join(XDG_HOME, 'rtv', 'rtv.cfg')
+MAILCAP = os.path.join(HOME, '.mailcap')
 TOKEN = os.path.join(XDG_HOME, 'rtv', 'refresh-token')
 HISTORY = os.path.join(XDG_HOME, 'rtv', 'history.log')

@@ -59,30 +61,50 @@ def build_parser():
    parser.add_argument(
        '--copy-config', dest='copy_config', action='store_const', const=True,
        help='Copy the default configuration to {HOME}/.config/rtv/rtv.cfg')
+    parser.add_argument(
+        '--copy-mailcap', dest='copy_mailcap', action='store_const', const=True,
+        help='Copy an example mailcap configuration to {HOME}/.mailcap')
+    parser.add_argument(
+        '--enable-media', dest='enable_media', action='store_const', const=True,
+        help='Open external links using programs defined in the mailcap config')
    return parser


+def copy_default_mailcap(filename=MAILCAP):
+    """
+    Copy the example mailcap configuration to the specified file.
+    """
+    return _copy_settings_file(DEFAULT_MAILCAP, filename, 'mailcap')
+
+
 def copy_default_config(filename=CONFIG):
    """
-    Copy the default configuration file to the user's {HOME}/.config/rtv
+    Copy the default rtv user configuration to the specified file.
+    """
+    return _copy_settings_file(DEFAULT_CONFIG, filename, 'config')
+
+
+def _copy_settings_file(source, destination, name):
+    """
+    Copy a file from the repo to the user's home directory.
    """

-    if os.path.exists(filename):
+    if os.path.exists(destination):
        try:
            ch = six.moves.input(
-                'File %s already exists, overwrite? y/[n]):' % filename)
+                'File %s already exists, overwrite? y/[n]):' % destination)
            if ch not in ('Y', 'y'):
                return
        except KeyboardInterrupt:
            return

-    filepath = os.path.dirname(filename)
+    filepath = os.path.dirname(destination)
    if not os.path.exists(filepath):
        os.makedirs(filepath)

-    print('Copying default settings to %s' % filename)
-    shutil.copy(DEFAULT_CONFIG, filename)
-    os.chmod(filename, 0o664)
+    print('Copying default %s to %s' % (name, destination))
+    shutil.copy(source, destination)
+    os.chmod(destination, 0o664)


 class OrderedSet(object):
@@ -215,6 +237,7 @@ class Config(object):
            'monochrome': partial(config.getboolean, 'rtv'),
            'clear_auth': partial(config.getboolean, 'rtv'),
            'persistent': partial(config.getboolean, 'rtv'),
+            'enable_media': partial(config.getboolean, 'rtv'),
            'history_size': partial(config.getint, 'rtv'),
            'oauth_redirect_port': partial(config.getint, 'rtv'),
            'oauth_scope': lambda x: rtv[x].split(',')
@@ -240,4 +263,4 @@ class Config(object):

        filepath = os.path.dirname(filename)
        if not os.path.exists(filepath):
-            os.makedirs(filepath)
+            os.makedirs(filepath)
@@ -40,3 +40,7 @@ class BrowserError(RTVError):

 class TemporaryFileError(RTVError):
    "Indicates that an error has occurred and the file should not be deleted"
+
+
+class MailcapEntryNotFound(RTVError):
+    "A valid mailcap entry could not be coerced from the given url"
@@ -0,0 +1,175 @@
+import re
+import logging
+import mimetypes
+
+import requests
+from bs4 import BeautifulSoup
+
+_logger = logging.getLogger(__name__)
+
+
+class BaseMIMEParser(object):
+    """
+    BaseMIMEParser can be sub-classed to define custom handlers for determining
+    the MIME type of external urls.
+    """
+    pattern = re.compile(r'.*$')
+
+    @staticmethod
+    def get_mimetype(url):
+        """
+        Guess based on the file extension.
+
+        Args:
+            url (text): Web url that was linked to by a reddit submission.
+
+        Returns:
+            modified_url (text): The url (or filename) that will be used when
+                constructing the command to run.
+            content_type (text): The mime-type that will be used when
+                constructing the command to run. If the mime-type is unknown,
+                return None and the program will fallback to using the web
+                browser.
+        """
+        filename = url.split('?')[0]
+        content_type, _ = mimetypes.guess_type(filename)
+        return url, content_type
+
+
+class GfycatMIMEParser(BaseMIMEParser):
+    """
+    Gfycat provides a primitive json api to generate image links. URLs can be
+    downloaded as either gif, webm, or mjpg. Webm was selected because it's
+    fast and works with VLC.
+
+        https://gfycat.com/api
+
+        https://gfycat.com/UntidyAcidicIberianemeraldlizard -->
+        https://giant.gfycat.com/UntidyAcidicIberianemeraldlizard.webm
+    """
+    pattern = re.compile(r'https?://(www\.)?gfycat\.com/[^.]+$')
+
+    @staticmethod
+    def get_mimetype(url):
+        parts = url.split('/')
+        api_url = '/'.join(parts[:-1] + ['cajax', 'get'] + parts[-1:])
+        resp = requests.get(api_url)
+        image_url = resp.json()['gfyItem']['webmUrl']
+        return image_url, 'video/webm'
+
+
+class YoutubeMIMEParser(BaseMIMEParser):
+    """
+    Youtube videos can be streamed with vlc or downloaded with youtube-dl.
+    Assign a custom mime-type so they can be referenced in mailcap.
+    """
+    pattern = re.compile(
+        r'(?:https?://)?(m\.)?(?:youtu\.be/|(?:www\.)?youtube\.com/watch'
+        r'(?:\.php)?\'?.*v=)([a-zA-Z0-9\-_]+)')
+
+    @staticmethod
+    def get_mimetype(url):
+        return url, 'video/x-youtube'
+
+
+class GifvMIMEParser(BaseMIMEParser):
+    """
+    Special case for .gifv, which is a custom video format for imgur serves
+    as html with a special <video> frame. Note that attempting for download as
+    .webm also returns this html page. However, .mp4 appears to return the raw
+    video file.
+    """
+    pattern = re.compile(r'.*[.]gifv$')
+
+    @staticmethod
+    def get_mimetype(url):
+        modified_url = url[:-4] + 'mp4'
+        return modified_url, 'video/mp4'
+
+
+class RedditUploadsMIMEParser(BaseMIMEParser):
+    """
+    Reddit uploads do not have a file extension, but we can grab the mime-type
+    from the page header.
+    """
+    pattern = re.compile(r'https://i\.reddituploads\.com/.+$')
+
+    @staticmethod
+    def get_mimetype(url):
+        page = requests.head(url)
+        content_type = page.headers.get('Content-Type', '')
+        content_type = content_type.split(';')[0]  # Strip out the encoding
+        return url, content_type
+
+
+class ImgurMIMEParser(BaseMIMEParser):
+    """
+    The majority of imgur links don't point directly to the image, so we need
+    to open the provided url and scrape the page for the link.
+
+    Scrape the actual image url from an imgur landing page. Imgur intentionally
+    obscures this on most reddit links in order to draw more traffic for their
+    advertisements.
+
+    There are a couple of <meta> tags that supply the relevant info:
+        <meta name="twitter:image" content="https://i.imgur.com/xrqQ4LEh.jpg">
+        <meta property="og:image" content="http://i.imgur.com/xrqQ4LE.jpg?fb">
+        <link rel="image_src" href="http://i.imgur.com/xrqQ4LE.jpg">
+    """
+    pattern = re.compile(r'https?://(w+\.)?(m\.)?imgur\.com/[^.]+$')
+
+    @staticmethod
+    def get_mimetype(url):
+        page = requests.get(url)
+        soup = BeautifulSoup(page.content, 'html.parser')
+        tag = soup.find('meta', attrs={'name': 'twitter:image'})
+        if tag:
+            url = tag.get('content')
+            if GifvMIMEParser.pattern.match(url):
+                return GifvMIMEParser.get_mimetype(url)
+        return BaseMIMEParser.get_mimetype(url)
+
+
+class ImgurAlbumMIMEParser(BaseMIMEParser):
+    """
+    Imgur albums can contain several images, which need to be scraped from the
+    landing page. Assumes the following html structure:
+
+        <div class="post-image">
+            <a href="//i.imgur.com/L3Lfp1O.jpg" class="zoom">
+                <img class="post-image-placeholder"
+                     src="//i.imgur.com/L3Lfp1Og.jpg" alt="Close up">
+                <img class="js-post-image-thumb"
+                     src="//i.imgur.com/L3Lfp1Og.jpg" alt="Close up">
+            </a>
+        </div>
+    """
+    pattern = re.compile(r'https?://(w+\.)?(m\.)?imgur\.com/a(lbum)?/[^.]+$')
+
+    @staticmethod
+    def get_mimetype(url):
+        page = requests.get(url)
+        soup = BeautifulSoup(page.content, 'html.parser')
+
+        urls = []
+        for div in soup.find_all('div', class_='post-image'):
+            img = div.find('img')
+            src = img.get('src') if img else None
+            if src:
+                urls.append('http:{0}'.format(src))
+
+        if urls:
+            return " ".join(urls), 'image/x-imgur-album'
+        else:
+            return url, None
+
+
+# Parsers should be listed in the order they will be checked
+parsers = [
+    GfycatMIMEParser,
+    ImgurAlbumMIMEParser,
+    ImgurMIMEParser,
+    RedditUploadsMIMEParser,
+    YoutubeMIMEParser,
+    GifvMIMEParser,
+    BaseMIMEParser]
@@ -190,7 +190,7 @@ class LoadScreen(object):

        self.exception = e
        exc_name = type(e).__name__
-        _logger.info('Loader caught: {0} - {1}'.format(exc_name, e))
+        _logger.info('Loader caught: %s - %s', exc_name, e)

        if isinstance(e, KeyboardInterrupt):
            # Don't need to print anything for this one, just swallow it
@@ -81,7 +81,7 @@ class SubredditPage(Page):
        If this was pressed on the front page, go back to the last subreddit.
        """

-        if not self.content.name == '/r/front':
+        if self.content.name != '/r/front':
            target = '/r/front'
            self._toggled_subreddit = self.content.name
        else:
@@ -124,7 +124,7 @@ class SubredditPage(Page):
            self.open_submission(url=data['url_full'])
            self.config.history.add(data['url_full'])
        else:
-            self.term.open_browser(data['url_full'])
+            self.term.open_link(data['url_full'])
            self.config.history.add(data['url_full'])

    @SubredditController.register(Command('SUBREDDIT_POST'))
@@ -0,0 +1,62 @@
+# Example mailcap file for Reddit Terminal Viewer
+# https://github.com/michael-lazar/rtv/
+#
+# Copy the contents of this file to {HOME}/.mailcap, or point to using $MAILCAPS
+# Then launch RTV using the --enable-media flag. All shell commands defined in
+# this file depend on external programs that must be installed on your system.
+#
+# HELP REQUESTED! If you come up with your own commands (especially for OS X)
+# and would like to share, please post an issue on the GitHub tracker and we
+# can get them added to this file as references.
+#
+#
+#                              Mailcap 101
+# - The first entry with a matching MIME type will be executed, * is a wildcard
+# - %s will be replaced with the image or video url
+# - Add ``test=test -n "$DISPLAY"`` if your command opens a new window
+# - Add ``needstermial`` for commands that use the terminal
+# - Add ``copiousoutput`` for commands that dump text to stdout
+
+###############################################################################
+# Commands below this point will open media in a separate window without
+# pausing execution of RTV.
+###############################################################################
+
+# Feh is a simple and effective image viewer
+# Note that rtv returns a list of urls for imgur albums, so we don't put quotes
+# around the `%s`
+image/x-imgur-album; feh -g 640x480 %s; test=test -n "$DISPLAY"
+image/*; feh -g 640x480 '%s'; test=test -n "$DISPLAY"
+
+# Youtube videos are assigned a custom mime-type, which can be streamed with
+# vlc or youtube-dl.
+video/x-youtube; vlc '%s' --width 640 --height 480; test=test -n "$DISPLAY"
+video/x-youtube; youtube-dl -q -o - '%s' | mpv - --autofit 640x480; test=test -n "$DISPLAY"
+
+# Mpv is a simple and effective video streamer
+video/webm; mpv '%s' --autofit 640x480; test=test -n "$DISPLAY"
+video/*; mpv '%s' --autofit 640x480; test=test -n "$DISPLAY"
+
+###############################################################################
+# Commands below this point will attempt to display media directly in the
+# terminal when X is not available.
+###############################################################################
+
+# View true images in the terminal, supported by rxvt-unicode, xterm and st
+# Requires the w3m-img package
+# image/*; w3m -o 'ext_image_viewer=off' '%s'; needsterminal
+
+# Don't have a solution for albums yet
+image/x-imgur-album; echo
+
+# 256 color images using half-width unicode characters
+# Much higher quality that img2txt, but must be built from source
+# https://github.com/rossy/img2xterm
+image/*; curl -s '%s' | convert -resize 80x80 - jpg:/tmp/rtv.jpg && img2xterm /tmp/rtv.jpg; needsterminal; copiousoutput
+
+# Display images in classic ascii using img2txt and lib-caca
+image/*; curl -s '%s' | convert - jpg:/tmp/rtv.jpg && img2txt -f utf8 /tmp/rtv.jpg; needsterminal; copiousoutput
+
+# Ascii videos
+video/x-youtube; youtube-dl -q -o - '%s' | mplayer -cache 8192 -vo caca -quiet -; needsterminal
+video/*; wget '%s' -O - | mplayer -cache 8192 -vo caca -quiet -; needsterminal
@@ -34,6 +34,9 @@ clear_auth = False
 ; Maximum number of opened links that will be saved in the history file.
 history_size = 200

+; Open external links using programs defined in the mailcap config.
+enable_media = True
+
 ################
 # OAuth Settings
 ################
@@ -17,10 +17,13 @@ from contextlib import contextmanager

 import six
 from kitchen.text.display import textual_width_chop
+from mailcap_fix import mailcap

 from . import exceptions
+from . import mime_parsers
 from .objects import LoadScreen, Color

+
 try:
    # Added in python 3.4+
    from html import unescape
@@ -42,28 +45,29 @@ class Terminal(object):
    RETURN = 10
    SPACE = 32

-    def __init__(self, stdscr, ascii=False):
+    def __init__(self, stdscr, config):

        self.stdscr = stdscr
-        self.ascii = ascii
+        self.config = config
        self.loader = LoadScreen(self)
        self._display = None
+        self._mailcap_dict = mailcap.getcaps()

    @property
    def up_arrow(self):
-        symbol = '^' if self.ascii else '▲'
+        symbol = '^' if self.config['ascii'] else '▲'
        attr = curses.A_BOLD | Color.GREEN
        return symbol, attr

    @property
    def down_arrow(self):
-        symbol = 'v' if self.ascii else '▼'
+        symbol = 'v' if self.config['ascii'] else '▼'
        attr = curses.A_BOLD | Color.RED
        return symbol, attr

    @property
    def neutral_arrow(self):
-        symbol = 'o' if self.ascii else '•'
+        symbol = 'o' if self.config['ascii'] else '•'
        attr = curses.A_BOLD
        return symbol, attr

@@ -75,7 +79,7 @@ class Terminal(object):

    @property
    def guilded(self):
-        symbol = '*' if self.ascii else '✪'
+        symbol = '*' if self.config['ascii'] else '✪'
        attr = curses.A_BOLD | Color.YELLOW
        return symbol, attr

@@ -228,7 +232,7 @@ class Terminal(object):
        if isinstance(string, six.text_type):
            string = unescape(string)

-        if self.ascii:
+        if self.config['ascii']:
            if isinstance(string, six.binary_type):
                string = string.decode('utf-8')
            string = string.encode('ascii', 'replace')
@@ -279,7 +283,7 @@ class Terminal(object):
        """

        if isinstance(message, six.string_types):
-            message = [message]
+            message = message.splitlines()

        n_rows, n_cols = self.stdscr.getmaxyx()

@@ -317,6 +321,128 @@ class Terminal(object):

        return ch

+    def open_link(self, url):
+        """
+        Open a media link using the definitions from the user's mailcap file.
+
+        Most urls are parsed using their file extension, but special cases
+        exist for websites that are prevalent on reddit such as Imgur and
+        Gfycat. If there are no valid mailcap definitions, RTV will fall back
+        to using the default webbrowser.
+
+        RTV checks for certain mailcap fields to determine how to open a link:
+            - If ``copiousoutput`` is specified, the curses application will
+              be paused and stdout will be piped to the system pager.
+            - If `needsterminal`` is specified, the curses application will
+              yield terminal control to the subprocess until it has exited.
+            - Otherwise, we assume that the subprocess is meant to open a new
+              x-window, and we swallow all stdout output.
+
+        Examples:
+            Stream youtube videos with VLC
+            Browse images and imgur albums with feh
+            Watch .webm videos through your terminal with mplayer
+            View images directly in your terminal with fbi or w3m
+            Play .mp3 files with sox player
+            Send HTML pages your pager using to html2text
+            ...anything is possible!
+        """
+
+        if not self.config['enable_media']:
+            return self.open_browser(url)
+
+        try:
+            with self.loader('Checking link', catch_exception=False):
+                command, entry = self.get_mailcap_entry(url)
+        except exceptions.MailcapEntryNotFound:
+            return self.open_browser(url)
+
+        _logger.info('Executing command: %s', command)
+        needs_terminal = 'needsterminal' in entry
+        copious_output = 'copiousoutput' in entry
+
+        if needs_terminal or copious_output:
+            # Blocking, pause rtv until the process returns
+            with self.suspend():
+                os.system('clear')
+                p = subprocess.Popen(
+                    [command], stderr=subprocess.PIPE,
+                    universal_newlines=True, shell=True)
+                code = p.wait()
+                if copious_output:
+                    six.moves.input('Press any key to continue')
+            if code != 0:
+                _, stderr = p.communicate()
+                _logger.warning(stderr)
+                self.show_notification(
+                    'Program exited with status={0}\n{1}'.format(
+                        code, stderr.strip()))
+
+        else:
+            # Non-blocking, open a background process
+            with self.loader('Opening page', delay=0):
+                p = subprocess.Popen(
+                    [command], shell=True, universal_newlines=True,
+                    stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+                # Wait a little while to make sure that the command doesn't
+                # exit with an error. This isn't perfect, but it should be good
+                # enough to catch invalid commands.
+                time.sleep(1.0)
+                code = p.poll()
+                if code is not None and code != 0:
+                    _, stderr = p.communicate()
+                    raise exceptions.BrowserError(
+                        'Program exited with status={0}\n{1}'.format(
+                            code, stderr.strip()))
+
+    def get_mailcap_entry(self, url):
+        """
+        Search through the mime handlers list and attempt to find the
+        appropriate command to open the provided url with.
+
+        Will raise a MailcapEntryNotFound exception if no valid command exists.
+
+        Params:
+            url (text): URL that will be checked
+
+        Returns:
+            command (text): The string of the command that should be executed
+                in a subprocess to open the resource.
+            entry (dict): The full mailcap entry for the corresponding command
+        """
+
+        for parser in mime_parsers.parsers:
+            if parser.pattern.match(url):
+                # modified_url may be the same as the original url, but it
+                # could also be updated to point to a different page, or it
+                # could refer to the location of a temporary file with the
+                # page's downloaded content.
+                try:
+                    modified_url, content_type = parser.get_mimetype(url)
+                except Exception as e:
+                    # If Imgur decides to change its html layout, let it fail
+                    # silently in the background instead of crashing.
+                    _logger.warn('parser %s raised an exception', parser)
+                    _logger.exception(e)
+                    raise exceptions.MailcapEntryNotFound()
+                if not content_type:
+                    _logger.info('Content type could not be determined')
+                    raise exceptions.MailcapEntryNotFound()
+                elif content_type == 'text/html':
+                    _logger.info('Content type text/html, deferring to browser')
+                    raise exceptions.MailcapEntryNotFound()
+
+                command, entry = mailcap.findmatch(
+                    self._mailcap_dict, content_type, filename=modified_url)
+                if not entry:
+                    _logger.info('Could not find a valid mailcap entry')
+                    raise exceptions.MailcapEntryNotFound()
+
+                return command, entry
+
+        # No parsers matched the url
+        raise exceptions.MailcapEntryNotFound()
+
    def open_browser(self, url):
        """
        Open the given url using the default webbrowser. The preferred browser
@@ -359,7 +485,7 @@ class Terminal(object):
                            break  # Success
                        elif code is not None:
                            raise exceptions.BrowserError(
-                                'Browser exited with status=%s' % code)
+                                'Program exited with status=%s' % code)
                        time.sleep(0.01)
                    else:
                        raise exceptions.BrowserError(
@@ -453,6 +579,12 @@ class Terminal(object):
                _logger.info('File deleted: %s', filepath)

    def open_urlview(self, data):
+        """
+        Pipe a block of text to urlview, which displays a list of urls
+        contained in the text and allows the user to open them with their
+        web browser.
+        """
+
        urlview = os.getenv('RTV_URLVIEWER') or 'urlview'
        try:
            with self.suspend():
@@ -461,6 +593,16 @@ class Terminal(object):
                    p.communicate(input=data.encode('utf-8'))
                except KeyboardInterrupt:
                    p.terminate()
+
+                code = p.poll()
+                if code == 1:
+                    # Clear the "No URLs found." message from stdout
+                    sys.stdout.write("\033[F")
+                    sys.stdout.flush()
+
+            if code == 1:
+                self.show_notification('No URLs found')
+
        except OSError:
            self.show_notification(
                'Failed to open {0}'.format(urlview))