r""" ============== smartypants.py ============== ---------------------------- SmartyPants ported to Python ---------------------------- Ported by `Chad Miller`_ Copyright (c) 2004, 2007 Chad Miller original `SmartyPants`_ by `John Gruber`_ Copyright (c) 2003 John Gruber Synopsis ======== A smart-quotes plugin for Pyblosxom_. The priginal "SmartyPants" is a free web publishing plug-in for Movable Type, Blosxom, and BBEdit that easily translates plain ASCII punctuation characters into "smart" typographic punctuation HTML entities. This software, *smartypants.py*, endeavours to be a functional port of SmartyPants to Python, for use with Pyblosxom_. Description =========== SmartyPants can perform the following transformations: - Straight quotes ( " and ' ) into "curly" quote HTML entities - Backticks-style quotes (\`\`like this'') into "curly" quote HTML entities - Dashes (``--`` and ``---``) into en- and em-dash entities - Three consecutive dots (``...`` or ``. . .``) into an ellipsis entity This means you can write, edit, and save your posts using plain old ASCII straight quotes, plain dashes, and plain dots, but your published posts (and final HTML output) will appear with smart quotes, em-dashes, and proper ellipses. SmartyPants does not modify characters within ``
``, ````, ````,
```` or ``

He said, "'Quoted' words in a larger quote."

text = re.sub(r""""'(?=\w)""", """“‘""", text) text = re.sub(r"""'"(?=\w)""", """‘“""", text) text = re.sub(r'''""(?=\w)''', """““""", text) text = re.sub(r"""''(?=\w)""", """‘‘""", text) text = re.sub(r'''\"\'''', """”’""", text) text = re.sub(r'''\'\"''', """’”""", text) text = re.sub(r'''""''', """””""", text) text = re.sub(r"""''""", """’’""", text) # Special case for decade abbreviations (the '80s --> ’80s): # See http://practicaltypography.com/apostrophes.html text = re.sub(r"""(\W|^)'(?=\d{2}s)""", r"""\1’""", text) # Measurements in feet and inches or longitude/latitude: 19' 43.5" --> 19′ 43.5″ text = re.sub(r'''(\W|^)([-0-9.]+\s*)'(\s*[-0-9.]+)"''', r'\1\2′\3″', text) # Special case for Quotes at inside of other entities, e.g.: #

A double quote--"within dashes"--would be nice.

text = re.sub(r"""(?<=\W)"(?=\w)""", r"""“""", text) text = re.sub(r"""(?<=\W)'(?=\w)""", r"""‘""", text) text = re.sub(r"""(?<=\w)"(?=\W)""", r"""”""", text) text = re.sub(r"""(?<=\w)'(?=\W)""", r"""’""", text) # The following are commented out as smartypants tokenizes text by # stripping out html tags. Therefore, there is no guarantee that the # start-of-line and end-ol-line regex operators will match anything # meaningful # Special case for Quotes at end of line with a preceeding space (may change just to end of line) # text = re.sub(r"""(?<=\s)"$""", r"""”""", text) # text = re.sub(r"""(?<=\s)'$""", r"""’""", text) # Special case for Quotes at beginning of line with a space - multiparagraph quoted text: # text = re.sub(r"""^"(?=\s)""", r"""“""", text) # text = re.sub(r"""^'(?=\s)""", r"""‘""", text) close_class = r"""[^\ \t\r\n\[\{\(\-]""" dec_dashes = r"""–|—""" # Get most opening single quotes: opening_single_quotes_regex = re.compile(r""" ( \s | # a whitespace char, or   | # a non-breaking space entity, or -- | # dashes, or &[mn]dash; | # named dash entities %s | # or decimal entities &\#x201[34]; # or hex ) ' # the quote (?=\w) # followed by a word character """ % (dec_dashes,), re.VERBOSE) text = opening_single_quotes_regex.sub(r"""\1‘""", text) closing_single_quotes_regex = re.compile(r""" (%s) ' (?!\s | s\b | \d) """ % (close_class,), re.VERBOSE) text = closing_single_quotes_regex.sub(r"""\1’""", text) closing_single_quotes_regex = re.compile(r""" (%s) ' (\s | s\b) """ % (close_class,), re.VERBOSE) text = closing_single_quotes_regex.sub(r"""\1’\2""", text) # Any remaining single quotes should be opening ones: text = re.sub(r"""'""", r"""‘""", text) # Get most opening double quotes: opening_double_quotes_regex = re.compile(r""" ( \s | # a whitespace char, or   | # a non-breaking space entity, or -- | # dashes, or &[mn]dash; | # named dash entities %s | # or decimal entities &\#x201[34]; # or hex ) " # the quote (?=\w) # followed by a word character """ % (dec_dashes,), re.VERBOSE) text = opening_double_quotes_regex.sub(r"""\1“""", text) # Double closing quotes: closing_double_quotes_regex = re.compile(r""" #(%s)? # character that indicates the quote should be closing " (?=\s) """ % (close_class,), re.VERBOSE) text = closing_double_quotes_regex.sub(r"""”""", text) closing_double_quotes_regex = re.compile(r""" (%s) # character that indicates the quote should be closing " """ % (close_class,), re.VERBOSE) text = closing_double_quotes_regex.sub(r"""\1”""", text) if text.endswith('-"'): # A string that endswith -" is sometimes used for dialogue text = text[:-1] + '”' # Any remaining quotes should be opening ones. text = re.sub(r'"', r"""“""", text) return text def educateBackticks(text): """ Parameter: String. Returns: The string, with ``backticks'' -style double quotes translated into HTML curly quote entities. Example input: ``Isn't this fun?'' Example output: “Isn't this fun?” """ text = re.sub(r"""``""", r"""“""", text) text = re.sub(r"""''""", r"""”""", text) return text def educateSingleBackticks(text): """ Parameter: String. Returns: The string, with `backticks' -style single quotes translated into HTML curly quote entities. Example input: `Isn't this fun?' Example output: ‘Isn’t this fun?’ """ text = re.sub(r"""`""", r"""‘""", text) text = re.sub(r"""'""", r"""’""", text) return text def educateDashes(text): """ Parameter: String. Returns: The string, with each instance of "--" translated to an em-dash HTML entity. """ text = re.sub(r"""---""", r"""–""", text) # en (yes, backwards) text = re.sub(r"""--""", r"""—""", text) # em (yes, backwards) return text def educateDashesOldSchool(text): """ Parameter: String. Returns: The string, with each instance of "--" translated to an en-dash HTML entity, and each "---" translated to an em-dash HTML entity. """ text = re.sub(r"""---""", r"""—""", text) # em (yes, backwards) text = re.sub(r"""--""", r"""–""", text) # en (yes, backwards) return text def educateDashesOldSchoolInverted(text): """ Parameter: String. Returns: The string, with each instance of "--" translated to an em-dash HTML entity, and each "---" translated to an en-dash HTML entity. Two reasons why: First, unlike the en- and em-dash syntax supported by EducateDashesOldSchool(), it's compatible with existing entries written before SmartyPants 1.1, back when "--" was only used for em-dashes. Second, em-dashes are more common than en-dashes, and so it sort of makes sense that the shortcut should be shorter to type. (Thanks to Aaron Swartz for the idea.) """ text = re.sub(r"""---""", r"""–""", text) # em text = re.sub(r"""--""", r"""—""", text) # en return text def educateEllipses(text): """ Parameter: String. Returns: The string, with each instance of "..." translated to an ellipsis HTML entity. Example input: Huh...? Example output: Huh…? """ text = re.sub(r"""\.\.\.""", r"""…""", text) text = re.sub(r"""\. \. \.""", r"""…""", text) return text def stupefyEntities(text): """ Parameter: String. Returns: The string, with each SmartyPants HTML entity translated to its ASCII counterpart. Example input: “Hello — world.” Example output: "Hello -- world." """ text = re.sub(r"""–""", r"""-""", text) # en-dash text = re.sub(r"""—""", r"""--""", text) # em-dash text = re.sub(r"""‘""", r"""'""", text) # open single quote text = re.sub(r"""’""", r"""'""", text) # close single quote text = re.sub(r"""“""", r'''"''', text) # open double quote text = re.sub(r"""”""", r'''"''', text) # close double quote text = re.sub(r"""…""", r"""...""", text) # ellipsis return text def processEscapes(text): r""" Parameter: String. Returns: The string, with after processing the following backslash escape sequences. This is useful if you want to force a "dumb" quote or other character to appear. Escape Value ------ ----- \\ \ \" " \' ' \. . \- - \` ` """ text = re.sub(r"""\\\\""", r"""\""", text) text = re.sub(r'''\\"''', r""""""", text) text = re.sub(r"""\\'""", r"""'""", text) text = re.sub(r"""\\\.""", r""".""", text) text = re.sub(r"""\\-""", r"""-""", text) text = re.sub(r"""\\`""", r"""`""", text) return text def _tokenize(html): """ Parameter: String containing HTML markup. Returns: Reference to an array of the tokens comprising the input string. Each token is either a tag (possibly with nested, tags contained therein, such as , or a run of text between tags. Each element of the array is a two-element array; the first is either 'tag' or 'text'; the second is the actual value. Based on the _tokenize() subroutine from Brad Choate's MTRegex plugin. """ tokens = [] # depth = 6 # nested_tags = "|".join(['(?:<(?:[^<>]',] * depth) + (')*>)' * depth) # match = r"""(?: ) | # comments # (?: <\? .*? \?> ) | # directives # %s # nested tags """ % (nested_tags,) tag_soup = re.compile(r"""([^<]*)(<[^>]*>)""") token_match = tag_soup.search(html) previous_end = 0 while token_match is not None: if token_match.group(1): tokens.append(['text', token_match.group(1)]) tokens.append(['tag', token_match.group(2)]) previous_end = token_match.end() token_match = tag_soup.search(html, token_match.end()) if previous_end < len(html): tokens.append(['text', html[previous_end:]]) return tokens def run_tests(return_tests=False): import unittest sp = smartyPants class TestSmartypantsAllAttributes(unittest.TestCase): # the default attribute is "1", which means "all". def test_dates(self): self.assertEqual(sp("one two '60s"), "one two ’60s") self.assertEqual(sp("1440-80's"), "1440-80’s") self.assertEqual(sp("1440-'80s"), "1440-’80s") self.assertEqual(sp("1440---'80s"), "1440–’80s") self.assertEqual(sp("1960s"), "1960s") # no effect. self.assertEqual(sp("1960's"), "1960’s") self.assertEqual(sp("one two '60s"), "one two ’60s") self.assertEqual(sp("'60s"), "’60s") def test_measurements(self): ae = self.assertEqual ae(sp("one two 1.1'2.2\""), "one two 1.1′2.2″") ae(sp("1' 2\""), "1′ 2″") def test_skip_tags(self): self.assertEqual( sp(""""""), # noqa """""") # noqa self.assertEqual( sp("""

He said "Let's write some code." This code here if True:\n\tprint "Okay" is python code.

"""), """

He said “Let’s write some code.” This code here if True:\n\tprint "Okay" is python code.

""") # noqa self.assertEqual( sp('''