Run a second pass on escaped html characters.

This commit is contained in:
Michael Lazar
2015-12-09 01:23:22 -08:00
parent 52dfbe786c
commit 9de6056b04
2 changed files with 28 additions and 0 deletions

View File

@@ -19,6 +19,13 @@ from kitchen.text.display import textual_width_chop
from . import exceptions from . import exceptions
from .objects import LoadScreen, Color from .objects import LoadScreen, Color
try:
# Added in python 3.4+
from html import unescape
except ImportError:
from six.moves import html_parser
unescape = html_parser.HTMLParser().unescape
class Terminal(object): class Terminal(object):
@@ -173,11 +180,22 @@ class Terminal(object):
curses will treat each code point as one character and will not account curses will treat each code point as one character and will not account
for wide characters. If utf-8 is passed in, addnstr will treat each for wide characters. If utf-8 is passed in, addnstr will treat each
'byte' as a single character. 'byte' as a single character.
Reddit's api sometimes chokes and double-encodes some html characters
Praw handles the initial decoding, but we need to do a second pass
just to make sure. See https://github.com/michael-lazar/rtv/issues/96
Example:
& -> returned directly from reddit's api
& -> returned after PRAW decodes the html characters
& -> returned after our second pass, this is the true value
""" """
if n_cols is not None and n_cols <= 0: if n_cols is not None and n_cols <= 0:
return '' return ''
string = unescape(string)
if self.ascii: if self.ascii:
if isinstance(string, six.binary_type): if isinstance(string, six.binary_type):
string = string.decode('utf-8') string = string.decode('utf-8')

View File

@@ -145,6 +145,16 @@ def test_terminal_clean_ncols(terminal):
assert text.decode('utf-8') == '' assert text.decode('utf-8') == ''
@pytest.mark.parametrize('ascii', [True, False])
def test_terminal_clean_unescape_html(terminal, ascii):
# HTML characters get decoded
terminal.ascii = ascii
text = terminal.clean('&lt;')
assert isinstance(text, six.binary_type)
assert text.decode('ascii' if ascii else 'utf-8') == '<'
@pytest.mark.parametrize('ascii', [True, False]) @pytest.mark.parametrize('ascii', [True, False])
def test_terminal_add_line(terminal, stdscr, ascii): def test_terminal_add_line(terminal, stdscr, ascii):