Run a second pass on escaped html characters.
This commit is contained in:
@@ -19,6 +19,13 @@ from kitchen.text.display import textual_width_chop
|
|||||||
from . import exceptions
|
from . import exceptions
|
||||||
from .objects import LoadScreen, Color
|
from .objects import LoadScreen, Color
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Added in python 3.4+
|
||||||
|
from html import unescape
|
||||||
|
except ImportError:
|
||||||
|
from six.moves import html_parser
|
||||||
|
unescape = html_parser.HTMLParser().unescape
|
||||||
|
|
||||||
|
|
||||||
class Terminal(object):
|
class Terminal(object):
|
||||||
|
|
||||||
@@ -173,11 +180,22 @@ class Terminal(object):
|
|||||||
curses will treat each code point as one character and will not account
|
curses will treat each code point as one character and will not account
|
||||||
for wide characters. If utf-8 is passed in, addnstr will treat each
|
for wide characters. If utf-8 is passed in, addnstr will treat each
|
||||||
'byte' as a single character.
|
'byte' as a single character.
|
||||||
|
|
||||||
|
Reddit's api sometimes chokes and double-encodes some html characters
|
||||||
|
Praw handles the initial decoding, but we need to do a second pass
|
||||||
|
just to make sure. See https://github.com/michael-lazar/rtv/issues/96
|
||||||
|
|
||||||
|
Example:
|
||||||
|
& -> returned directly from reddit's api
|
||||||
|
& -> returned after PRAW decodes the html characters
|
||||||
|
& -> returned after our second pass, this is the true value
|
||||||
"""
|
"""
|
||||||
|
|
||||||
if n_cols is not None and n_cols <= 0:
|
if n_cols is not None and n_cols <= 0:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
|
string = unescape(string)
|
||||||
|
|
||||||
if self.ascii:
|
if self.ascii:
|
||||||
if isinstance(string, six.binary_type):
|
if isinstance(string, six.binary_type):
|
||||||
string = string.decode('utf-8')
|
string = string.decode('utf-8')
|
||||||
|
|||||||
@@ -145,6 +145,16 @@ def test_terminal_clean_ncols(terminal):
|
|||||||
assert text.decode('utf-8') == 'hell'
|
assert text.decode('utf-8') == 'hell'
|
||||||
|
|
||||||
|
|
||||||
|
@pytest.mark.parametrize('ascii', [True, False])
|
||||||
|
def test_terminal_clean_unescape_html(terminal, ascii):
|
||||||
|
|
||||||
|
# HTML characters get decoded
|
||||||
|
terminal.ascii = ascii
|
||||||
|
text = terminal.clean('<')
|
||||||
|
assert isinstance(text, six.binary_type)
|
||||||
|
assert text.decode('ascii' if ascii else 'utf-8') == '<'
|
||||||
|
|
||||||
|
|
||||||
@pytest.mark.parametrize('ascii', [True, False])
|
@pytest.mark.parametrize('ascii', [True, False])
|
||||||
def test_terminal_add_line(terminal, stdscr, ascii):
|
def test_terminal_add_line(terminal, stdscr, ascii):
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user