From cd9617b0d86e21c05ed47100315108d9474b8f33 Mon Sep 17 00:00:00 2001 From: Michael Lazar Date: Fri, 8 Jul 2016 20:48:42 -0700 Subject: [PATCH 01/36] Pass config object to terminal. --- rtv/__main__.py | 2 +- rtv/terminal.py | 14 +++++------ tests/test_objects.py | 54 +++++++++++++++++++++--------------------- tests/test_terminal.py | 42 ++++++++++++++++---------------- 4 files changed, 56 insertions(+), 56 deletions(-) diff --git a/rtv/__main__.py b/rtv/__main__.py index 03e5fc3..8be25d4 100644 --- a/rtv/__main__.py +++ b/rtv/__main__.py @@ -93,7 +93,7 @@ def main(): if not config['monochrome']: Color.init() - term = Terminal(stdscr, config['ascii']) + term = Terminal(stdscr, config) with term.loader('Initializing', catch_exception=False): reddit = praw.Reddit(user_agent=user_agent, decode_html_entities=False, diff --git a/rtv/terminal.py b/rtv/terminal.py index 90dc698..94bf45c 100644 --- a/rtv/terminal.py +++ b/rtv/terminal.py @@ -42,28 +42,28 @@ class Terminal(object): RETURN = 10 SPACE = 32 - def __init__(self, stdscr, ascii=False): + def __init__(self, stdscr, config): self.stdscr = stdscr - self.ascii = ascii + self.config = config self.loader = LoadScreen(self) self._display = None @property def up_arrow(self): - symbol = '^' if self.ascii else '▲' + symbol = '^' if self.config['ascii'] else '▲' attr = curses.A_BOLD | Color.GREEN return symbol, attr @property def down_arrow(self): - symbol = 'v' if self.ascii else '▼' + symbol = 'v' if self.config['ascii'] else '▼' attr = curses.A_BOLD | Color.RED return symbol, attr @property def neutral_arrow(self): - symbol = 'o' if self.ascii else '•' + symbol = 'o' if self.config['ascii'] else '•' attr = curses.A_BOLD return symbol, attr @@ -75,7 +75,7 @@ class Terminal(object): @property def guilded(self): - symbol = '*' if self.ascii else '✪' + symbol = '*' if self.config['ascii'] else '✪' attr = curses.A_BOLD | Color.YELLOW return symbol, attr @@ -215,7 +215,7 @@ class Terminal(object): if isinstance(string, six.text_type): string = unescape(string) - if self.ascii: + if self.config['ascii']: if isinstance(string, six.binary_type): string = string.decode('utf-8') string = string.encode('ascii', 'replace') diff --git a/tests/test_objects.py b/tests/test_objects.py index 1d2b6f7..f745b3e 100644 --- a/tests/test_objects.py +++ b/tests/test_objects.py @@ -18,9 +18,9 @@ except ImportError: import mock -@pytest.mark.parametrize('ascii', [True, False]) -def test_objects_load_screen(terminal, stdscr, ascii): - terminal.ascii = ascii +@pytest.mark.parametrize('use_ascii', [True, False]) +def test_objects_load_screen(terminal, stdscr, use_ascii): + terminal.config['ascii'] = use_ascii # Ensure the thread is properly started/stopped with terminal.loader(delay=0, message=u'Hello', trail=u'...'): @@ -32,9 +32,9 @@ def test_objects_load_screen(terminal, stdscr, ascii): assert stdscr.subwin.nlines == 3 -@pytest.mark.parametrize('ascii', [True, False]) -def test_objects_load_screen_exception_unhandled(terminal, stdscr, ascii): - terminal.ascii = ascii +@pytest.mark.parametrize('use_ascii', [True, False]) +def test_objects_load_screen_exception_unhandled(terminal, stdscr, use_ascii): + terminal.config['ascii'] = use_ascii # Raising an exception should clean up the loader properly with pytest.raises(Exception): @@ -45,9 +45,9 @@ def test_objects_load_screen_exception_unhandled(terminal, stdscr, ascii): assert not terminal.loader._animator.is_alive() -@pytest.mark.parametrize('ascii', [True, False]) -def test_objects_load_screen_exception_handled(terminal, stdscr, ascii): - terminal.ascii = ascii +@pytest.mark.parametrize('use_ascii', [True, False]) +def test_objects_load_screen_exception_handled(terminal, stdscr, use_ascii): + terminal.config['ascii'] = use_ascii # Raising a handled exception should get stored on the loaders with terminal.loader(delay=0): @@ -60,9 +60,9 @@ def test_objects_load_screen_exception_handled(terminal, stdscr, ascii): stdscr.subwin.addstr.assert_called_with(1, 1, error_message) -@pytest.mark.parametrize('ascii', [True, False]) -def test_objects_load_screen_exception_not_caught(terminal, stdscr, ascii): - terminal.ascii = ascii +@pytest.mark.parametrize('use_ascii', [True, False]) +def test_objects_load_screen_exception_not_caught(terminal, stdscr, use_ascii): + terminal.config['ascii'] = use_ascii with pytest.raises(KeyboardInterrupt): with terminal.loader(delay=0, catch_exception=False): @@ -73,9 +73,9 @@ def test_objects_load_screen_exception_not_caught(terminal, stdscr, ascii): assert terminal.loader.exception is None -@pytest.mark.parametrize('ascii', [True, False]) -def test_objects_load_screen_keyboard_interrupt(terminal, stdscr, ascii): - terminal.ascii = ascii +@pytest.mark.parametrize('use_ascii', [True, False]) +def test_objects_load_screen_keyboard_interrupt(terminal, stdscr, use_ascii): + terminal.config['ascii'] = use_ascii # Raising a KeyboardInterrupt should be also be stored with terminal.loader(delay=0): @@ -86,9 +86,9 @@ def test_objects_load_screen_keyboard_interrupt(terminal, stdscr, ascii): assert isinstance(terminal.loader.exception, KeyboardInterrupt) -@pytest.mark.parametrize('ascii', [True, False]) -def test_objects_load_screen_escape(terminal, stdscr, ascii): - terminal.ascii = ascii +@pytest.mark.parametrize('use_ascii', [True, False]) +def test_objects_load_screen_escape(terminal, stdscr, use_ascii): + terminal.config['ascii'] = use_ascii stdscr.getch.return_value = terminal.ESCAPE @@ -109,9 +109,9 @@ def test_objects_load_screen_escape(terminal, stdscr, ascii): assert kill.called -@pytest.mark.parametrize('ascii', [True, False]) -def test_objects_load_screen_initial_delay(terminal, stdscr, ascii): - terminal.ascii = ascii +@pytest.mark.parametrize('use_ascii', [True, False]) +def test_objects_load_screen_initial_delay(terminal, stdscr, use_ascii): + terminal.config['ascii'] = use_ascii # If we don't reach the initial delay nothing should be drawn with terminal.loader(delay=0.1): @@ -119,9 +119,9 @@ def test_objects_load_screen_initial_delay(terminal, stdscr, ascii): assert not stdscr.subwin.addstr.called -@pytest.mark.parametrize('ascii', [True, False]) -def test_objects_load_screen_nested(terminal, ascii): - terminal.ascii = ascii +@pytest.mark.parametrize('use_ascii', [True, False]) +def test_objects_load_screen_nested(terminal, use_ascii): + terminal.config['ascii'] = use_ascii with terminal.loader(message='Outer'): with terminal.loader(message='Inner'): @@ -134,9 +134,9 @@ def test_objects_load_screen_nested(terminal, ascii): assert not terminal.loader._animator.is_alive() -@pytest.mark.parametrize('ascii', [True, False]) -def test_objects_load_screen_nested_complex(terminal, stdscr, ascii): - terminal.ascii = ascii +@pytest.mark.parametrize('use_ascii', [True, False]) +def test_objects_load_screen_nested_complex(terminal, stdscr, use_ascii): + terminal.config['ascii'] = use_ascii with terminal.loader(message='Outer') as outer_loader: assert outer_loader.depth == 1 diff --git a/tests/test_terminal.py b/tests/test_terminal.py index ef3808b..c0f4812 100644 --- a/tests/test_terminal.py +++ b/tests/test_terminal.py @@ -51,7 +51,7 @@ def test_terminal_properties(terminal, config): assert terminal.get_arrow(None) is not None assert terminal.get_arrow(True) is not None assert terminal.get_arrow(False) is not None - assert terminal.ascii == config['ascii'] + assert terminal.config == config assert terminal.loader is not None assert terminal.MIN_HEIGHT is not None @@ -93,7 +93,7 @@ def test_terminal_functions(terminal): def test_terminal_clean_ascii(terminal): - terminal.ascii = True + terminal.config['ascii'] = True # unicode returns ascii text = terminal.clean('hello ❤') @@ -113,7 +113,7 @@ def test_terminal_clean_ascii(terminal): def test_terminal_clean_unicode(terminal): - terminal.ascii = False + terminal.config['ascii'] = False # unicode returns utf-8 text = terminal.clean('hello ❤') @@ -146,20 +146,20 @@ def test_terminal_clean_ncols(terminal): assert text.decode('utf-8') == 'hell' -@pytest.mark.parametrize('ascii', [True, False]) -def test_terminal_clean_unescape_html(terminal, ascii): +@pytest.mark.parametrize('use_ascii', [True, False]) +def test_terminal_clean_unescape_html(terminal, use_ascii): # HTML characters get decoded - terminal.ascii = ascii + terminal.config['ascii'] = use_ascii text = terminal.clean('<') assert isinstance(text, six.binary_type) assert text.decode('ascii' if ascii else 'utf-8') == '<' -@pytest.mark.parametrize('ascii', [True, False]) -def test_terminal_add_line(terminal, stdscr, ascii): +@pytest.mark.parametrize('use_ascii', [True, False]) +def test_terminal_add_line(terminal, stdscr, use_ascii): - terminal.ascii = ascii + terminal.config['ascii'] = use_ascii terminal.add_line(stdscr, 'hello') assert stdscr.addstr.called_with(0, 0, 'hello'.encode('ascii')) @@ -176,10 +176,10 @@ def test_terminal_add_line(terminal, stdscr, ascii): stdscr.reset_mock() -@pytest.mark.parametrize('ascii', [True, False]) -def test_show_notification(terminal, stdscr, ascii): +@pytest.mark.parametrize('use_ascii', [True, False]) +def test_show_notification(terminal, stdscr, use_ascii): - terminal.ascii = ascii + terminal.config['ascii'] = use_ascii # The whole message should fit in 40x80 text = HELP.strip().splitlines() @@ -198,10 +198,10 @@ def test_show_notification(terminal, stdscr, ascii): assert stdscr.subwin.addstr.call_count == 13 -@pytest.mark.parametrize('ascii', [True, False]) -def test_text_input(terminal, stdscr, ascii): +@pytest.mark.parametrize('use_ascii', [True, False]) +def test_text_input(terminal, stdscr, use_ascii): - terminal.ascii = ascii + terminal.config['ascii'] = use_ascii stdscr.nlines = 1 # Text will be wrong because stdscr.inch() is not implemented @@ -219,10 +219,10 @@ def test_text_input(terminal, stdscr, ascii): assert terminal.text_input(stdscr, allow_resize=False) is None -@pytest.mark.parametrize('ascii', [True, False]) -def test_prompt_input(terminal, stdscr, ascii): +@pytest.mark.parametrize('use_ascii', [True, False]) +def test_prompt_input(terminal, stdscr, use_ascii): - terminal.ascii = ascii + terminal.config['ascii'] = use_ascii window = stdscr.derwin() window.getch.side_effect = [ord('h'), ord('i'), terminal.RETURN] @@ -270,10 +270,10 @@ def test_prompt_y_or_n(terminal, stdscr): assert curses.flash.called -@pytest.mark.parametrize('ascii', [True, False]) -def test_open_editor(terminal, ascii): +@pytest.mark.parametrize('use_ascii', [True, False]) +def test_open_editor(terminal, use_ascii): - terminal.ascii = ascii + terminal.config['ascii'] = use_ascii comment = COMMENT_EDIT_FILE.format(content='#| This is a comment! ❤') data = {'filename': None} From 1f6ced1f7473107a8d3d3beade52dac530d9bc82 Mon Sep 17 00:00:00 2001 From: Michael Lazar Date: Fri, 8 Jul 2016 20:55:04 -0700 Subject: [PATCH 02/36] Fixed tests. --- tests/conftest.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/conftest.py b/tests/conftest.py index 02ee9ee..7c01b3d 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -175,7 +175,7 @@ def reddit(vcr, request): @pytest.fixture() def terminal(stdscr, config): - term = Terminal(stdscr, ascii=config['ascii']) + term = Terminal(stdscr, config=config) # Disable the python 3.4 addch patch so that the mock stdscr calls are # always made the same way term.addch = lambda window, *args: window.addch(*args) From 06ac572b209127278955a4d314c0c93a6702b923 Mon Sep 17 00:00:00 2001 From: Michael Lazar Date: Fri, 8 Jul 2016 20:57:33 -0700 Subject: [PATCH 03/36] Added mime_handlers file. --- rtv/mime_handlers.py | 135 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 135 insertions(+) create mode 100644 rtv/mime_handlers.py diff --git a/rtv/mime_handlers.py b/rtv/mime_handlers.py new file mode 100644 index 0000000..b5e2dc6 --- /dev/null +++ b/rtv/mime_handlers.py @@ -0,0 +1,135 @@ +import re +import mimetypes +from html.parser import HTMLParser + +import requests + + +class HTMLParsed(Exception): + def __init__(self, data): + self.data = data + + +class BaseHandler(object): + """ + BaseHandler can be sub-classed to define custom handlers for determining + the MIME type of external urls. + """ + + # URL regex pattern that the handler will be triggered on + pattern = re.compile(r'.*$') + + @staticmethod + def get_mimetype(url): + """ + Args: + url (text): Web url that was linked to by a reddit submission. + + Returns: + modified_url (text): The url (or filename) that will be used when + constructing the command to run. + content_type (text): The mime-type that will be used when + constructing the command to run. If the mime-type is unknown, + return None and the program will fallback to using the web + browser. + """ + + # Guess based on the file extension + filename = url.split('?')[0] + content_type, _ = mimetypes.guess_type(filename) + return url, content_type + + +class YoutubeHandler(BaseHandler): + """ + Youtube videos can be streamed with vlc or downloaded with youtube-dl. + Assign a custom mime-type so they can be referenced in mailcap. + """ + + pattern = re.compile( + r'(?:https?://)?(m\.)?(?:youtu\.be/|(?:www\.)?youtube\.com/watch' + r'(?:\.php)?\'?.*v=)([a-zA-Z0-9\-_]+)') + + @staticmethod + def get_mimetype(url): + return url, 'video/x-youtube' + + +class GifvHandler(BaseHandler): + """ + Special case for .gifv, which is a custom video format for imgur that is + incorrectly (or on purpose?) returned with a Content-Type of text/html. + """ + pattern = re.compile(r'.*[.]gifv$') + + @staticmethod + def get_mimetype(url): + modified_url = url[:-4] + 'webm' + return modified_url, 'video/webm' + + +class RedditUploadsHandler(BaseHandler): + """ + Reddit uploads do not have a file extension, but we can grab the mime-type + from the page header. + """ + pattern = re.compile(r'https://i.reddituploads.com/.+$') + + @staticmethod + def get_mimetype(url): + page = requests.head(url) + content_type = page.headers.get('Content-Type', '') + content_type = content_type.split(';')[0] # Strip out the encoding + return url, content_type + + +class ImgurHTMLParser(HTMLParser): + """ + Scrape the actual image url from an imgur landing page. Imgur intentionally + obscures this on most reddit links in order to draw more traffic for their + advertisements. + + There are a couple of tags that supply the relevant info: + + + + + Note: + BeautifulSoup or lxml would be faster here but I wanted to skip adding + an extra dependency for something as trivial as this. + """ + + def handle_starttag(self, tag, attr): + if tag == 'meta' and attr[0] == ('name', 'twitter:image'): + raise HTMLParsed(attr[1][1]) + + +class ImgurHandler(BaseHandler): + """ + The majority of imgur links don't point directly to the image, so we need + to open the provided url and scrape the page for the link. For galleries, + this method only returns the first image. + """ + pattern = re.compile(r'https?://(w+\.)?(m\.)?imgur\.com/[^.]+$') + + @staticmethod + def get_mimetype(url): + imgur_page = requests.get(url) + try: + ImgurHTMLParser().feed(imgur_page.text) + except HTMLParsed as data: + # We found a link + url = data.data + if GifvHandler.pattern.match(url): + return GifvHandler.get_mimetype(url) + + return BaseHandler.get_mimetype(url) + + +# Handlers should be defined in the order they will be checked +handlers = [ + ImgurHandler, + RedditUploadsHandler, + YoutubeHandler, + GifvHandler, + BaseHandler] From 1d8c555c27483eb26d72c974a6294190c2bf480f Mon Sep 17 00:00:00 2001 From: Michael Lazar Date: Fri, 8 Jul 2016 23:51:42 -0700 Subject: [PATCH 04/36] Fighting with mailcap --- rtv/config.py | 3 +++ rtv/mime_handlers.py | 5 +++-- rtv/rtv.cfg | 3 +++ rtv/subreddit.py | 2 +- rtv/terminal.py | 51 +++++++++++++++++++++++++++++++++++++++++++- 5 files changed, 60 insertions(+), 4 deletions(-) diff --git a/rtv/config.py b/rtv/config.py index 9fb38b6..d93c849 100644 --- a/rtv/config.py +++ b/rtv/config.py @@ -59,6 +59,9 @@ def build_parser(): parser.add_argument( '--copy-config', dest='copy_config', action='store_const', const=True, help='Copy the default configuration to {HOME}/.config/rtv/rtv.cfg') + parser.add_argument( + '--enable-media', dest='enable_media', action='store_const', const=True, + help='Open external links using programs defined in the mailcap config') return parser diff --git a/rtv/mime_handlers.py b/rtv/mime_handlers.py index b5e2dc6..839457b 100644 --- a/rtv/mime_handlers.py +++ b/rtv/mime_handlers.py @@ -1,7 +1,7 @@ import re import mimetypes -from html.parser import HTMLParser +from six.moves.html_parser import HTMLParser import requests @@ -116,7 +116,8 @@ class ImgurHandler(BaseHandler): def get_mimetype(url): imgur_page = requests.get(url) try: - ImgurHTMLParser().feed(imgur_page.text) + # convert_charrefs will be true by default in python 3.5 + ImgurHTMLParser(convert_charrefs=True).feed(imgur_page.text) except HTMLParsed as data: # We found a link url = data.data diff --git a/rtv/rtv.cfg b/rtv/rtv.cfg index cb15877..2e88205 100644 --- a/rtv/rtv.cfg +++ b/rtv/rtv.cfg @@ -34,6 +34,9 @@ clear_auth = False ; Maximum number of opened links that will be saved in the history file. history_size = 200 +; Open external links using programs defined in the mailcap config. +enable_media = True + ################ # OAuth Settings ################ diff --git a/rtv/subreddit.py b/rtv/subreddit.py index b2c9e9d..cfd6d45 100644 --- a/rtv/subreddit.py +++ b/rtv/subreddit.py @@ -106,7 +106,7 @@ class SubredditPage(Page): self.open_submission(url=data['url_full']) self.config.history.add(data['url_full']) else: - self.term.open_browser(data['url_full']) + self.term.open_link(data['url_full']) self.config.history.add(data['url_full']) @SubredditController.register(Command('SUBREDDIT_POST')) diff --git a/rtv/terminal.py b/rtv/terminal.py index 94bf45c..e4ce527 100644 --- a/rtv/terminal.py +++ b/rtv/terminal.py @@ -7,6 +7,7 @@ import time import codecs import curses import logging +import mailcap import tempfile import webbrowser import subprocess @@ -19,8 +20,10 @@ import six from kitchen.text.display import textual_width_chop from . import exceptions +from . import mime_handlers from .objects import LoadScreen, Color + try: # Added in python 3.4+ from html import unescape @@ -49,6 +52,9 @@ class Terminal(object): self.loader = LoadScreen(self) self._display = None + # TODO: Load from custom location + self._mailcap_dict = mailcap.getcaps() + @property def up_arrow(self): symbol = '^' if self.config['ascii'] else '▲' @@ -304,6 +310,49 @@ class Terminal(object): return ch + def open_link(self, url): + + _logger.info('Opening link %s', url) + if not self.config['enable_media']: + return self.open_browser(url) + + command = None + for handler in mime_handlers.handlers: + if handler.pattern.match(url): + modified_url, content_type = handler.get_mimetype(url) + _logger.info('MIME type: %s', content_type) + _logger.info('Modified url: %s', modified_url) + if not content_type or content_type == 'text/html': + # Could not figure out the Content-Type + return self.open_browser(modified_url) + + # http://bugs.python.org/issue14977 + command, entry = mailcap.findmatch( + self._mailcap_dict, content_type, filename=modified_url) + if not entry: + _logger.info('Could not find a valid mailcap entry') + return self.open_browser(modified_url) + + break + + with self.loader('Opening page in a new window', delay=0): + args = [command] + _logger.info('Running command: %s', args) + # Non-blocking, run with a full shell to support pipes + p = subprocess.Popen( + args, shell=True, universal_newlines=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + # Wait a little while to make sure that the command doesn't exit + # with an error. This isn't perfect, but it should be good enough + # to catch invalid commands. + time.sleep(1.0) + code = p.poll() + if code is not None and code != 0: + stdout, stderr = p.communicate() + _logger.warning(stderr) + raise exceptions.BrowserError( + 'Program exited with status=%s' % code) + def open_browser(self, url): """ Open the given url using the default webbrowser. The preferred browser @@ -346,7 +395,7 @@ class Terminal(object): break # Success elif code is not None: raise exceptions.BrowserError( - 'Browser exited with status=%s' % code) + 'Program exited with status=%s' % code) time.sleep(0.01) else: raise exceptions.BrowserError( From 265c4446e6cfd000a130404ea05d85f448579b12 Mon Sep 17 00:00:00 2001 From: Michael Lazar Date: Mon, 11 Jul 2016 01:20:00 -0700 Subject: [PATCH 05/36] Added mailcap_fix dependency. --- rtv/terminal.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtv/terminal.py b/rtv/terminal.py index e4ce527..2d867d9 100644 --- a/rtv/terminal.py +++ b/rtv/terminal.py @@ -7,7 +7,6 @@ import time import codecs import curses import logging -import mailcap import tempfile import webbrowser import subprocess @@ -18,6 +17,7 @@ from contextlib import contextmanager import six from kitchen.text.display import textual_width_chop +from mailcap_fix import mailcap from . import exceptions from . import mime_handlers From 2ebc9552c82372281e1e56cbb6f19881259b5e65 Mon Sep 17 00:00:00 2001 From: Michael Lazar Date: Mon, 11 Jul 2016 17:58:14 -0700 Subject: [PATCH 06/36] Added support for imgur albums. --- rtv/mime_handlers.py | 235 +++++++++++++++++++++++++++---------------- rtv/terminal.py | 6 +- 2 files changed, 149 insertions(+), 92 deletions(-) diff --git a/rtv/mime_handlers.py b/rtv/mime_handlers.py index 839457b..470267c 100644 --- a/rtv/mime_handlers.py +++ b/rtv/mime_handlers.py @@ -1,88 +1,20 @@ import re +import logging import mimetypes -from six.moves.html_parser import HTMLParser import requests +from six.moves.html_parser import HTMLParser +from html import parser +_logger = logging.getLogger(__name__) + +# HTML Parsers class HTMLParsed(Exception): def __init__(self, data): self.data = data -class BaseHandler(object): - """ - BaseHandler can be sub-classed to define custom handlers for determining - the MIME type of external urls. - """ - - # URL regex pattern that the handler will be triggered on - pattern = re.compile(r'.*$') - - @staticmethod - def get_mimetype(url): - """ - Args: - url (text): Web url that was linked to by a reddit submission. - - Returns: - modified_url (text): The url (or filename) that will be used when - constructing the command to run. - content_type (text): The mime-type that will be used when - constructing the command to run. If the mime-type is unknown, - return None and the program will fallback to using the web - browser. - """ - - # Guess based on the file extension - filename = url.split('?')[0] - content_type, _ = mimetypes.guess_type(filename) - return url, content_type - - -class YoutubeHandler(BaseHandler): - """ - Youtube videos can be streamed with vlc or downloaded with youtube-dl. - Assign a custom mime-type so they can be referenced in mailcap. - """ - - pattern = re.compile( - r'(?:https?://)?(m\.)?(?:youtu\.be/|(?:www\.)?youtube\.com/watch' - r'(?:\.php)?\'?.*v=)([a-zA-Z0-9\-_]+)') - - @staticmethod - def get_mimetype(url): - return url, 'video/x-youtube' - - -class GifvHandler(BaseHandler): - """ - Special case for .gifv, which is a custom video format for imgur that is - incorrectly (or on purpose?) returned with a Content-Type of text/html. - """ - pattern = re.compile(r'.*[.]gifv$') - - @staticmethod - def get_mimetype(url): - modified_url = url[:-4] + 'webm' - return modified_url, 'video/webm' - - -class RedditUploadsHandler(BaseHandler): - """ - Reddit uploads do not have a file extension, but we can grab the mime-type - from the page header. - """ - pattern = re.compile(r'https://i.reddituploads.com/.+$') - - @staticmethod - def get_mimetype(url): - page = requests.head(url) - content_type = page.headers.get('Content-Type', '') - content_type = content_type.split(';')[0] # Strip out the encoding - return url, content_type - - class ImgurHTMLParser(HTMLParser): """ Scrape the actual image url from an imgur landing page. Imgur intentionally @@ -96,19 +28,117 @@ class ImgurHTMLParser(HTMLParser): Note: BeautifulSoup or lxml would be faster here but I wanted to skip adding - an extra dependency for something as trivial as this. + an extra dependency for something this trivial. """ - def handle_starttag(self, tag, attr): if tag == 'meta' and attr[0] == ('name', 'twitter:image'): raise HTMLParsed(attr[1][1]) -class ImgurHandler(BaseHandler): +class ImgurAlbumHTMLParser(HTMLParser): + """ + Scrape the complete list of images from an imgur album. The HTML parser is + very limited, so this assumes the following html structure: + +
+ + Close up + Close up + +
+ """ + def reset(self): + super(ImgurAlbumHTMLParser, self).reset() + self.primed = False + self.hrefs = [] + + def handle_starttag(self, tag, attr): + if tag == 'div' and ('class', 'post-image') in attr: + self.primed = True + elif self.primed: + self.primed = False + if tag == 'a' and attr[0][0] == 'href': + self.hrefs.append(attr[0][1]) + + +# MIME Parsers + +class BaseMIMEParser(object): + """ + BaseMIMEParser can be sub-classed to define custom handlers for determining + the MIME type of external urls. + """ + pattern = re.compile(r'.*$') + + @staticmethod + def get_mimetype(url): + """ + Guess based on the file extension. + + Args: + url (text): Web url that was linked to by a reddit submission. + + Returns: + modified_url (text): The url (or filename) that will be used when + constructing the command to run. + content_type (text): The mime-type that will be used when + constructing the command to run. If the mime-type is unknown, + return None and the program will fallback to using the web + browser. + """ + filename = url.split('?')[0] + content_type, _ = mimetypes.guess_type(filename) + return url, content_type + + +class YoutubeMIMEParser(BaseMIMEParser): + """ + Youtube videos can be streamed with vlc or downloaded with youtube-dl. + Assign a custom mime-type so they can be referenced in mailcap. + """ + pattern = re.compile( + r'(?:https?://)?(m\.)?(?:youtu\.be/|(?:www\.)?youtube\.com/watch' + r'(?:\.php)?\'?.*v=)([a-zA-Z0-9\-_]+)') + + @staticmethod + def get_mimetype(url): + return url, 'video/x-youtube' + + +class GifvMIMEParser(BaseMIMEParser): + """ + Special case for .gifv, which is a custom video format for imgur that is + incorrectly (or on purpose?) returned with a Content-Type of text/html. + """ + pattern = re.compile(r'.*[.]gifv$') + + @staticmethod + def get_mimetype(url): + modified_url = url[:-4] + 'webm' + return modified_url, 'image/webm' + + +class RedditUploadsMIMEParser(BaseMIMEParser): + """ + Reddit uploads do not have a file extension, but we can grab the mime-type + from the page header. + """ + pattern = re.compile(r'https://i.reddituploads.com/.+$') + + @staticmethod + def get_mimetype(url): + page = requests.head(url) + content_type = page.headers.get('Content-Type', '') + content_type = content_type.split(';')[0] # Strip out the encoding + return url, content_type + + +class ImgurMIMEParser(BaseMIMEParser): """ The majority of imgur links don't point directly to the image, so we need - to open the provided url and scrape the page for the link. For galleries, - this method only returns the first image. + to open the provided url and scrape the page for the link. """ pattern = re.compile(r'https?://(w+\.)?(m\.)?imgur\.com/[^.]+$') @@ -121,16 +151,43 @@ class ImgurHandler(BaseHandler): except HTMLParsed as data: # We found a link url = data.data - if GifvHandler.pattern.match(url): - return GifvHandler.get_mimetype(url) + if GifvMIMEParser.pattern.match(url): + return GifvMIMEParser.get_mimetype(url) - return BaseHandler.get_mimetype(url) + return BaseMIMEParser.get_mimetype(url) -# Handlers should be defined in the order they will be checked -handlers = [ - ImgurHandler, - RedditUploadsHandler, - YoutubeHandler, - GifvHandler, - BaseHandler] +class ImgurAlbumMIMEParser(BaseMIMEParser): + """ + Imgur albums can contain several images, which need to be scraped from the + landing page. + """ + pattern = re.compile(r'https?://(w+\.)?(m\.)?imgur\.com/a/[^.]+$') + + @staticmethod + def get_mimetype(url): + imgur_page = requests.get(url) + parser = ImgurAlbumHTMLParser(convert_charrefs=True) + + try: + parser.feed(imgur_page.text) + except Exception as e: + _logger.warning(e) + urls = [] + else: + urls = ['http:' + href for href in parser.hrefs] + + if urls: + return "' '".join(urls), 'image/x-imgur-album' + else: + return url, None + + +# Parsers should be listed in the order they will be checked +parsers = [ + ImgurAlbumMIMEParser, + ImgurMIMEParser, + RedditUploadsMIMEParser, + YoutubeMIMEParser, + GifvMIMEParser, + BaseMIMEParser] \ No newline at end of file diff --git a/rtv/terminal.py b/rtv/terminal.py index 2d867d9..ebd048a 100644 --- a/rtv/terminal.py +++ b/rtv/terminal.py @@ -317,9 +317,9 @@ class Terminal(object): return self.open_browser(url) command = None - for handler in mime_handlers.handlers: - if handler.pattern.match(url): - modified_url, content_type = handler.get_mimetype(url) + for parser in mime_handlers.parsers: + if parser.pattern.match(url): + modified_url, content_type = parser.get_mimetype(url) _logger.info('MIME type: %s', content_type) _logger.info('Modified url: %s', modified_url) if not content_type or content_type == 'text/html': From 47b9835ccc5ac2c851c07c0348a54a73a1b05008 Mon Sep 17 00:00:00 2001 From: Michael Lazar Date: Mon, 11 Jul 2016 18:02:30 -0700 Subject: [PATCH 07/36] Tweaked regex. --- rtv/mime_handlers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtv/mime_handlers.py b/rtv/mime_handlers.py index 470267c..9900ab2 100644 --- a/rtv/mime_handlers.py +++ b/rtv/mime_handlers.py @@ -162,7 +162,7 @@ class ImgurAlbumMIMEParser(BaseMIMEParser): Imgur albums can contain several images, which need to be scraped from the landing page. """ - pattern = re.compile(r'https?://(w+\.)?(m\.)?imgur\.com/a/[^.]+$') + pattern = re.compile(r'https?://(w+\.)?(m\.)?imgur\.com/a(lbum)?/[^.]+$') @staticmethod def get_mimetype(url): From a51e4536602c11f38bf577c9cfc1e23dfb041ce7 Mon Sep 17 00:00:00 2001 From: Michael Lazar Date: Mon, 11 Jul 2016 18:03:59 -0700 Subject: [PATCH 08/36] Tweaked regex. --- rtv/mime_handlers.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtv/mime_handlers.py b/rtv/mime_handlers.py index 9900ab2..2571d5d 100644 --- a/rtv/mime_handlers.py +++ b/rtv/mime_handlers.py @@ -125,7 +125,7 @@ class RedditUploadsMIMEParser(BaseMIMEParser): Reddit uploads do not have a file extension, but we can grab the mime-type from the page header. """ - pattern = re.compile(r'https://i.reddituploads.com/.+$') + pattern = re.compile(r'https://i\.reddituploads\.com/.+$') @staticmethod def get_mimetype(url): From cfb0583541da64e5eb9bb9d71c8d11a1a80dfdbe Mon Sep 17 00:00:00 2001 From: Michael Lazar Date: Mon, 11 Jul 2016 18:11:53 -0700 Subject: [PATCH 09/36] Added docstrings for urlview. --- rtv/terminal.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/rtv/terminal.py b/rtv/terminal.py index ebd048a..115532d 100644 --- a/rtv/terminal.py +++ b/rtv/terminal.py @@ -489,11 +489,16 @@ class Terminal(object): _logger.info('File deleted: %s', filepath) def open_urlview(self, data): + """ + Pipe a block of text to urlview, which displays a list of urls + contained in the text and allows the user to open them with their + web browser. + """ + urlview = os.getenv('RTV_URLVIEWER') or 'urlview' try: with self.suspend(): - p = subprocess.Popen([urlview], - stdin=subprocess.PIPE) + p = subprocess.Popen([urlview], stdin=subprocess.PIPE) try: p.communicate(input=six.b(data)) except KeyboardInterrupt: From 43fb2a0378775fee70d6d3f9971d3cc1d5ee5a16 Mon Sep 17 00:00:00 2001 From: Michael Lazar Date: Wed, 13 Jul 2016 01:19:16 -0700 Subject: [PATCH 10/36] Added terminal mode, fixed mime handler for gifv --- rtv/mime_handlers.py | 4 ++-- rtv/terminal.py | 42 ++++++++++++++++++++++++++---------------- 2 files changed, 28 insertions(+), 18 deletions(-) diff --git a/rtv/mime_handlers.py b/rtv/mime_handlers.py index 2571d5d..d3e827b 100644 --- a/rtv/mime_handlers.py +++ b/rtv/mime_handlers.py @@ -117,7 +117,7 @@ class GifvMIMEParser(BaseMIMEParser): @staticmethod def get_mimetype(url): modified_url = url[:-4] + 'webm' - return modified_url, 'image/webm' + return modified_url, 'video/webm' class RedditUploadsMIMEParser(BaseMIMEParser): @@ -190,4 +190,4 @@ parsers = [ RedditUploadsMIMEParser, YoutubeMIMEParser, GifvMIMEParser, - BaseMIMEParser] \ No newline at end of file + BaseMIMEParser] diff --git a/rtv/terminal.py b/rtv/terminal.py index 115532d..7ff2b45 100644 --- a/rtv/terminal.py +++ b/rtv/terminal.py @@ -316,7 +316,7 @@ class Terminal(object): if not self.config['enable_media']: return self.open_browser(url) - command = None + command, entry = None, None for parser in mime_handlers.parsers: if parser.pattern.match(url): modified_url, content_type = parser.get_mimetype(url) @@ -335,23 +335,33 @@ class Terminal(object): break - with self.loader('Opening page in a new window', delay=0): - args = [command] - _logger.info('Running command: %s', args) - # Non-blocking, run with a full shell to support pipes - p = subprocess.Popen( - args, shell=True, universal_newlines=True, - stdout=subprocess.PIPE, stderr=subprocess.PIPE) - # Wait a little while to make sure that the command doesn't exit - # with an error. This isn't perfect, but it should be good enough - # to catch invalid commands. - time.sleep(1.0) - code = p.poll() - if code is not None and code != 0: - stdout, stderr = p.communicate() - _logger.warning(stderr) + args = [command] + _logger.info('Running command: %s', args) + + if 'needsterminal' in entry: + with self.suspend(): + # Blocking, pause rtv until the process returns + p = subprocess.Popen(args, shell=True) + code = p.wait() + if code != 0: raise exceptions.BrowserError( 'Program exited with status=%s' % code) + else: + with self.loader('Opening page in a new window', delay=0): + # Non-blocking, run with a full shell to support pipes + p = subprocess.Popen( + args, shell=True, universal_newlines=True, + stdout=subprocess.PIPE, stderr=subprocess.PIPE) + # Wait a little while to make sure that the command doesn't + # exit with an error. This isn't perfect, but it should be good + # enough to catch invalid commands. + time.sleep(1.0) + code = p.poll() + if code is not None and code != 0: + stdout, stderr = p.communicate() + _logger.warning(stderr) + raise exceptions.BrowserError( + 'Program exited with status=%s' % code) def open_browser(self, url): """ From 5ad040596c8cc3e03c978bfb78d3f83e34aae2c5 Mon Sep 17 00:00:00 2001 From: Michael Lazar Date: Thu, 14 Jul 2016 23:38:01 -0700 Subject: [PATCH 11/36] Added handler for gfycat videos. --- rtv/mime_handlers.py | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/rtv/mime_handlers.py b/rtv/mime_handlers.py index d3e827b..bf896b0 100644 --- a/rtv/mime_handlers.py +++ b/rtv/mime_handlers.py @@ -4,12 +4,12 @@ import mimetypes import requests from six.moves.html_parser import HTMLParser -from html import parser _logger = logging.getLogger(__name__) # HTML Parsers + class HTMLParsed(Exception): def __init__(self, data): self.data = data @@ -93,6 +93,27 @@ class BaseMIMEParser(object): return url, content_type +class GfycatMIMEParser(BaseMIMEParser): + """ + Gfycat provides a primitive json api to generate image links. URLs can be + downloaded as either gif, webm, or mjpg. Webm was selected because it's + fast and works with VLC. + + https://gfycat.com/api + https://gfycat.com/UntidyAcidicIberianemeraldlizard --> + https://giant.gfycat.com/UntidyAcidicIberianemeraldlizard.webm + """ + pattern = re.compile(r'https?://(www\.)?gfycat\.com/[^.]+$') + + @staticmethod + def get_mimetype(url): + parts = url.split('/') + api_url = '/'.join(parts[:-1] + ['cajax', 'get'] + parts[-1:]) + resp = requests.get(api_url) + image_url = resp.json()['gfyItem']['webmUrl'] + return image_url, 'video/webm' + + class YoutubeMIMEParser(BaseMIMEParser): """ Youtube videos can be streamed with vlc or downloaded with youtube-dl. @@ -109,15 +130,17 @@ class YoutubeMIMEParser(BaseMIMEParser): class GifvMIMEParser(BaseMIMEParser): """ - Special case for .gifv, which is a custom video format for imgur that is - incorrectly (or on purpose?) returned with a Content-Type of text/html. + Special case for .gifv, which is a custom video format for imgur serves + as html with a special