import re from docutils import core from docutils import nodes from docutils.parsers.rst import directives, Directive from docutils.writers.html4css1 import Writer, HTMLTranslator from pygments import highlight from pygments.lexers import get_lexer_by_name, TextLexer from pygments.formatters import HtmlFormatter class Attrs(object): ATTRS = {} class Pygments(Directive): """ Source code syntax hightlighting. """ required_arguments = 1 optional_arguments = 0 final_argument_whitespace = True has_content = True def run(self): self.assert_has_content() try: lexer = get_lexer_by_name(self.arguments[0]) except ValueError: # no lexer found - use the text one instead of an exception lexer = TextLexer() # take an arbitrary option if more than one is given formatter = HtmlFormatter(noclasses=True) parsed = highlight(u'\n'.join(self.content), lexer, formatter) return [nodes.raw('', parsed, format='html')] directives.register_directive('sourcecode', Pygments) class CustomHTMLTranslator(HTMLTranslator): """ Base class for reST files translations. There are couple of customizations for docinfo fields behaviour and abbreviations and acronyms. """ def __init__(self, document): """ Set some nice defaults for articles translations """ HTMLTranslator.__init__(self, document) self.initial_header_level = 4 def visit_section(self, node): """ Don't affect document, just keep track of the section levels """ self.section_level += 1 def depart_section(self, node): self.section_level -= 1 def visit_meta(self, node): pass def depart_meta(self, node): pass def visit_document(self, node): pass def depart_document(self, node): pass def depart_docinfo(self, node): """ Reset body, remove unnecesairy content. """ self.body = [] def visit_date(self, node): pass def depart_date(self, node): pass def visit_literal(self, node): """ This is almos the same as the original one from HTMLTranslator class. The only difference is in used HTML tag: it uses 'code' instead of 'tt' """ self.body.append(self.starttag(node, 'code', '')) text = node.astext() for token in self.words_and_spaces.findall(text): if token.strip(): # Protect text like "--an-option" and the regular expression # ``[+]?(\d+(\.\d*)?|\.\d+)`` from bad line wrapping if self.sollbruchstelle.search(token): self.body.append('%s' % self.encode(token)) else: self.body.append(self.encode(token)) elif token in ('\n', ' '): # Allow breaks at whitespace: self.body.append(token) else: # Protect runs of multiple spaces; the last space can wrap: self.body.append(' ' * (len(token) - 1) + ' ') self.body.append('') # Content already processed: raise nodes.SkipNode def visit_acronym(self, node): """ Define missing acronym HTML tag """ node_text = node.children[0].astext() node_text = node_text.replace('\n', ' ') patt = re.compile(r'^(.+)\s<(.+)>') if patt.match(node_text): node.children[0] = nodes.Text(patt.match(node_text).groups()[0]) self.body.append(\ self.starttag(node, 'acronym', '', title=patt.match(node_text).groups()[1])) else: self.body.append(self.starttag(node, 'acronym', '')) def visit_abbreviation(self, node): """ Define missing abbr HTML tag """ node_text = node.children[0].astext() node_text = node_text.replace('\n', ' ') patt = re.compile(r'^(.+)\s<(.+)>') if patt.match(node_text): node.children[0] = nodes.Text(patt.match(node_text).groups()[0]) self.body.append(\ self.starttag(node, 'abbr', '', title=patt.match(node_text).groups()[1])) else: self.body.append(self.starttag(node, 'abbr', '')) class NoHeaderHTMLTranslator(CustomHTMLTranslator): """ Special subclass for generating only body of an article """ def __init__(self, document): """ Remove all needless parts of HTML document. """ CustomHTMLTranslator.__init__(self, document) self.head = [] self.meta = [] self.head_prefix = ['','','','',''] self.body_prefix = [] self.body_suffix = [] self.stylesheet = [] self.generator = ('') def visit_field(self, node): """ Harvest docinfo fields and store it in global dictionary. """ key, val = [n.astext() for n in node] key = key.lower() Attrs.ATTRS[key] = val def visit_date(self, node): """ Store published date in global dictionary. """ Attrs.ATTRS['date'] = node.astext() class PreviewHTMLTranslator(CustomHTMLTranslator): """ Class for dislpay article in the browser as a preview. """ def __init__(self, document): """ Alter levels for the heading tags, define custom, blog specific stylesheets. Note, that style_custom is present only locally to adjust way of display the page """ CustomHTMLTranslator.__init__(self, document) self.initial_header_level = 1 self.section_level = 1 # order of css files is important self.default_stylesheets = ["css/widget_css_2_bundle.css", "css/style_custom.css", "css/style_blogger.css"] self.stylesheet = [self.stylesheet_link % self.encode(css) \ for css in self.default_stylesheets] self.body_ = [] def depart_docinfo(self, node): """ Overwrite body with some custom one. body_ will hold the first heading with title of the document. """ self.body = self.body_ def visit_field(self, node): """ Additional 'keyword' for the ODF metadata """ key, node_ = [n.astext() for n in node] key = key.lower() if key == 'title': self.head.append('%s\n' % self.encode(node_)) self.body_.append('

' '%s

\n' % self.encode(node_)) class BlogBodyWriter(Writer): """ Custom Writer class for generating HTML partial with the article """ def __init__(self): Writer.__init__(self) self.translator_class = NoHeaderHTMLTranslator def translate(self): self.document.settings.output_encoding = "utf-8" Writer.translate(self) class BlogPreviewWriter(Writer): """ Custom Writer class for generating full HTML of the article """ def __init__(self): Writer.__init__(self) self.translator_class = PreviewHTMLTranslator def translate(self): self.document.settings.output_encoding = "utf-8" Writer.translate(self) def blogPreview(string): """ Returns partial HTML of the article, and attribute dictionary string argument is an article in reST """ html_output = core.publish_string(string, writer=BlogPreviewWriter()) html_output = html_output.strip() html_output = html_output.replace("", "\n\n") return html_output def blogArticleString(string): """ Returns partial HTML of the article, and attribute dictionary string argument is an article in reST """ # reset ATTRS Attrs.ATTRS = {} html_output = core.publish_string(string, writer=BlogBodyWriter()) html_output = html_output.strip() html_output = html_output.replace("", "\n\n") return html_output, Attrs.ATTRS