diff --git a/rtv/docs.py b/rtv/docs.py index 166d1fb..05f88eb 100644 --- a/rtv/docs.py +++ b/rtv/docs.py @@ -54,7 +54,7 @@ Submission Mode """ -COMMENT_FILE = """ +COMMENT_FILE = u""" # Please enter a comment. Lines starting with '#' will be ignored, # and an empty message aborts the comment. # @@ -62,14 +62,14 @@ COMMENT_FILE = """ {content} """ -COMMENT_EDIT_FILE = """{content} +COMMENT_EDIT_FILE = u"""{content} # Please enter a comment. Lines starting with '#' will be ignored, # and an empty message aborts the comment. # # Editing your comment """ -SUBMISSION_FILE = """{content} +SUBMISSION_FILE = u"""{content} # Please enter your submission. Lines starting with '#' will be ignored, # and an empty field aborts the submission. # diff --git a/rtv/helpers.py b/rtv/helpers.py index 7637635..85f94e3 100644 --- a/rtv/helpers.py +++ b/rtv/helpers.py @@ -16,6 +16,39 @@ from .exceptions import ProgramError __all__ = ['open_browser', 'clean', 'wrap_text', 'strip_textpad', 'strip_subreddit_url', 'humanize_timestamp', 'open_editor'] +def clean(string, n_cols=None): + """ + Required reading! + http://nedbatchelder.com/text/unipain.html + + Python 2 input string will be a unicode type (unicode code points). Curses + will accept unicode if all of the points are in the ascii range. However, if + any of the code points are not valid ascii curses will throw a + UnicodeEncodeError: 'ascii' codec can't encode character, ordinal not in + range(128). If we encode the unicode to a utf-8 byte string and pass that to + curses, it will render correctly. + + Python 3 input string will be a string type (unicode code points). Curses + will accept that in all cases. However, the n character count in addnstr + will not be correct. If code points are passed to addnstr, curses will treat + each code point as one character and will not account for wide characters. + If utf-8 is passed in, addnstr will treat each 'byte' as a single character. + """ + + if n_cols is not None and n_cols <= 0: + return '' + + if not config.unicode: + if six.PY3 or isinstance(string, unicode): + string = string.encode('ascii', 'replace') + return string[:n_cols] if n_cols else string + else: + if n_cols: + string = textual_width_chop(string, n_cols) + if six.PY3 or isinstance(string, unicode): + string = string.encode('utf-8') + return string + def open_editor(data=''): """ Open a temporary file using the system's default editor. @@ -26,7 +59,7 @@ def open_editor(data=''): """ with NamedTemporaryFile(prefix='rtv-', suffix='.txt', mode='w') as fp: - fp.write(data) + fp.write(clean(data)) fp.flush() editor = os.getenv('RTV_EDITOR') or os.getenv('EDITOR') or 'nano' @@ -91,41 +124,6 @@ def open_browser(url): webbrowser.open_new_tab(url) curses.doupdate() - -def clean(string, n_cols=None): - """ - Required reading! - http://nedbatchelder.com/text/unipain.html - - Python 2 input string will be a unicode type (unicode code points). Curses - will accept unicode if all of the points are in the ascii range. However, if - any of the code points are not valid ascii curses will throw a - UnicodeEncodeError: 'ascii' codec can't encode character, ordinal not in - range(128). If we encode the unicode to a utf-8 byte string and pass that to - curses, it will render correctly. - - Python 3 input string will be a string type (unicode code points). Curses - will accept that in all cases. However, the n character count in addnstr - will not be correct. If code points are passed to addnstr, curses will treat - each code point as one character and will not account for wide characters. - If utf-8 is passed in, addnstr will treat each 'byte' as a single character. - """ - - if n_cols is not None and n_cols <= 0: - return '' - - if not config.unicode: - if six.PY3 or isinstance(string, unicode): - string = string.encode('ascii', 'replace') - return string[:n_cols] if n_cols else string - else: - if n_cols: - string = textual_width_chop(string, n_cols) - if six.PY3 or isinstance(string, unicode): - string = string.encode('utf-8') - return string - - def wrap_text(text, width): """ Wrap text paragraphs to the given character width while preserving newlines.