latex.py
372 lines
| 13.4 KiB
| text/x-python
|
PythonLexer
Jeandet Alexis
|
r0 | # -*- coding: utf-8 -*- | ||
""" | ||||
pygments.formatters.latex | ||||
~~~~~~~~~~~~~~~~~~~~~~~~~ | ||||
Formatter for LaTeX fancyvrb output. | ||||
:copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS. | ||||
:license: BSD, see LICENSE for details. | ||||
""" | ||||
from pygments.formatter import Formatter | ||||
from pygments.token import Token, STANDARD_TYPES | ||||
from pygments.util import get_bool_opt, get_int_opt, StringIO | ||||
__all__ = ['LatexFormatter'] | ||||
def escape_tex(text, commandprefix): | ||||
return text.replace('\\', '\x00'). \ | ||||
replace('{', '\x01'). \ | ||||
replace('}', '\x02'). \ | ||||
replace('\x00', r'\%sZbs{}' % commandprefix). \ | ||||
replace('\x01', r'\%sZob{}' % commandprefix). \ | ||||
replace('\x02', r'\%sZcb{}' % commandprefix). \ | ||||
replace('^', r'\%sZca{}' % commandprefix). \ | ||||
replace('_', r'\%sZus{}' % commandprefix). \ | ||||
replace('&', r'\%sZam{}' % commandprefix). \ | ||||
replace('<', r'\%sZlt{}' % commandprefix). \ | ||||
replace('>', r'\%sZgt{}' % commandprefix). \ | ||||
replace('#', r'\%sZsh{}' % commandprefix). \ | ||||
replace('%', r'\%sZpc{}' % commandprefix). \ | ||||
replace('$', r'\%sZdl{}' % commandprefix). \ | ||||
replace('~', r'\%sZti{}' % commandprefix) | ||||
DOC_TEMPLATE = r''' | ||||
\documentclass{%(docclass)s} | ||||
\usepackage{fancyvrb} | ||||
\usepackage{color} | ||||
\usepackage[%(encoding)s]{inputenc} | ||||
%(preamble)s | ||||
%(styledefs)s | ||||
\begin{document} | ||||
\section*{%(title)s} | ||||
%(code)s | ||||
\end{document} | ||||
''' | ||||
## Small explanation of the mess below :) | ||||
# | ||||
# The previous version of the LaTeX formatter just assigned a command to | ||||
# each token type defined in the current style. That obviously is | ||||
# problematic if the highlighted code is produced for a different style | ||||
# than the style commands themselves. | ||||
# | ||||
# This version works much like the HTML formatter which assigns multiple | ||||
# CSS classes to each <span> tag, from the most specific to the least | ||||
# specific token type, thus falling back to the parent token type if one | ||||
# is not defined. Here, the classes are there too and use the same short | ||||
# forms given in token.STANDARD_TYPES. | ||||
# | ||||
# Highlighted code now only uses one custom command, which by default is | ||||
# \PY and selectable by the commandprefix option (and in addition the | ||||
# escapes \PYZat, \PYZlb and \PYZrb which haven't been renamed for | ||||
# backwards compatibility purposes). | ||||
# | ||||
# \PY has two arguments: the classes, separated by +, and the text to | ||||
# render in that style. The classes are resolved into the respective | ||||
# style commands by magic, which serves to ignore unknown classes. | ||||
# | ||||
# The magic macros are: | ||||
# * \PY@it, \PY@bf, etc. are unconditionally wrapped around the text | ||||
# to render in \PY@do. Their definition determines the style. | ||||
# * \PY@reset resets \PY@it etc. to do nothing. | ||||
# * \PY@toks parses the list of classes, using magic inspired by the | ||||
# keyval package (but modified to use plusses instead of commas | ||||
# because fancyvrb redefines commas inside its environments). | ||||
# * \PY@tok processes one class, calling the \PY@tok@classname command | ||||
# if it exists. | ||||
# * \PY@tok@classname sets the \PY@it etc. to reflect the chosen style | ||||
# for its class. | ||||
# * \PY resets the style, parses the classnames and then calls \PY@do. | ||||
# | ||||
# Tip: to read this code, print it out in substituted form using e.g. | ||||
# >>> print STYLE_TEMPLATE % {'cp': 'PY'} | ||||
STYLE_TEMPLATE = r''' | ||||
\makeatletter | ||||
\def\%(cp)s@reset{\let\%(cp)s@it=\relax \let\%(cp)s@bf=\relax%% | ||||
\let\%(cp)s@ul=\relax \let\%(cp)s@tc=\relax%% | ||||
\let\%(cp)s@bc=\relax \let\%(cp)s@ff=\relax} | ||||
\def\%(cp)s@tok#1{\csname %(cp)s@tok@#1\endcsname} | ||||
\def\%(cp)s@toks#1+{\ifx\relax#1\empty\else%% | ||||
\%(cp)s@tok{#1}\expandafter\%(cp)s@toks\fi} | ||||
\def\%(cp)s@do#1{\%(cp)s@bc{\%(cp)s@tc{\%(cp)s@ul{%% | ||||
\%(cp)s@it{\%(cp)s@bf{\%(cp)s@ff{#1}}}}}}} | ||||
\def\%(cp)s#1#2{\%(cp)s@reset\%(cp)s@toks#1+\relax+\%(cp)s@do{#2}} | ||||
%(styles)s | ||||
\def\%(cp)sZbs{\char`\\} | ||||
\def\%(cp)sZus{\char`\_} | ||||
\def\%(cp)sZob{\char`\{} | ||||
\def\%(cp)sZcb{\char`\}} | ||||
\def\%(cp)sZca{\char`\^} | ||||
\def\%(cp)sZam{\char`\&} | ||||
\def\%(cp)sZlt{\char`\<} | ||||
\def\%(cp)sZgt{\char`\>} | ||||
\def\%(cp)sZsh{\char`\#} | ||||
\def\%(cp)sZpc{\char`\%%} | ||||
\def\%(cp)sZdl{\char`\$} | ||||
\def\%(cp)sZti{\char`\~} | ||||
%% for compatibility with earlier versions | ||||
\def\%(cp)sZat{@} | ||||
\def\%(cp)sZlb{[} | ||||
\def\%(cp)sZrb{]} | ||||
\makeatother | ||||
''' | ||||
def _get_ttype_name(ttype): | ||||
fname = STANDARD_TYPES.get(ttype) | ||||
if fname: | ||||
return fname | ||||
aname = '' | ||||
while fname is None: | ||||
aname = ttype[-1] + aname | ||||
ttype = ttype.parent | ||||
fname = STANDARD_TYPES.get(ttype) | ||||
return fname + aname | ||||
class LatexFormatter(Formatter): | ||||
r""" | ||||
Format tokens as LaTeX code. This needs the `fancyvrb` and `color` | ||||
standard packages. | ||||
Without the `full` option, code is formatted as one ``Verbatim`` | ||||
environment, like this: | ||||
.. sourcecode:: latex | ||||
\begin{Verbatim}[commandchars=\\{\}] | ||||
\PY{k}{def }\PY{n+nf}{foo}(\PY{n}{bar}): | ||||
\PY{k}{pass} | ||||
\end{Verbatim} | ||||
The special command used here (``\PY``) and all the other macros it needs | ||||
are output by the `get_style_defs` method. | ||||
With the `full` option, a complete LaTeX document is output, including | ||||
the command definitions in the preamble. | ||||
The `get_style_defs()` method of a `LatexFormatter` returns a string | ||||
containing ``\def`` commands defining the macros needed inside the | ||||
``Verbatim`` environments. | ||||
Additional options accepted: | ||||
`style` | ||||
The style to use, can be a string or a Style subclass (default: | ||||
``'default'``). | ||||
`full` | ||||
Tells the formatter to output a "full" document, i.e. a complete | ||||
self-contained document (default: ``False``). | ||||
`title` | ||||
If `full` is true, the title that should be used to caption the | ||||
document (default: ``''``). | ||||
`docclass` | ||||
If the `full` option is enabled, this is the document class to use | ||||
(default: ``'article'``). | ||||
`preamble` | ||||
If the `full` option is enabled, this can be further preamble commands, | ||||
e.g. ``\usepackage`` (default: ``''``). | ||||
`linenos` | ||||
If set to ``True``, output line numbers (default: ``False``). | ||||
`linenostart` | ||||
The line number for the first line (default: ``1``). | ||||
`linenostep` | ||||
If set to a number n > 1, only every nth line number is printed. | ||||
`verboptions` | ||||
Additional options given to the Verbatim environment (see the *fancyvrb* | ||||
docs for possible values) (default: ``''``). | ||||
`commandprefix` | ||||
The LaTeX commands used to produce colored output are constructed | ||||
using this prefix and some letters (default: ``'PY'``). | ||||
*New in Pygments 0.7.* | ||||
*New in Pygments 0.10:* the default is now ``'PY'`` instead of ``'C'``. | ||||
`texcomments` | ||||
If set to ``True``, enables LaTeX comment lines. That is, LaTex markup | ||||
in comment tokens is not escaped so that LaTeX can render it (default: | ||||
``False``). *New in Pygments 1.2.* | ||||
`mathescape` | ||||
If set to ``True``, enables LaTeX math mode escape in comments. That | ||||
is, ``'$...$'`` inside a comment will trigger math mode (default: | ||||
``False``). *New in Pygments 1.2.* | ||||
""" | ||||
name = 'LaTeX' | ||||
aliases = ['latex', 'tex'] | ||||
filenames = ['*.tex'] | ||||
def __init__(self, **options): | ||||
Formatter.__init__(self, **options) | ||||
self.docclass = options.get('docclass', 'article') | ||||
self.preamble = options.get('preamble', '') | ||||
self.linenos = get_bool_opt(options, 'linenos', False) | ||||
self.linenostart = abs(get_int_opt(options, 'linenostart', 1)) | ||||
self.linenostep = abs(get_int_opt(options, 'linenostep', 1)) | ||||
self.verboptions = options.get('verboptions', '') | ||||
self.nobackground = get_bool_opt(options, 'nobackground', False) | ||||
self.commandprefix = options.get('commandprefix', 'PY') | ||||
self.texcomments = get_bool_opt(options, 'texcomments', False) | ||||
self.mathescape = get_bool_opt(options, 'mathescape', False) | ||||
self._create_stylesheet() | ||||
def _create_stylesheet(self): | ||||
t2n = self.ttype2name = {Token: ''} | ||||
c2d = self.cmd2def = {} | ||||
cp = self.commandprefix | ||||
def rgbcolor(col): | ||||
if col: | ||||
return ','.join(['%.2f' %(int(col[i] + col[i + 1], 16) / 255.0) | ||||
for i in (0, 2, 4)]) | ||||
else: | ||||
return '1,1,1' | ||||
for ttype, ndef in self.style: | ||||
name = _get_ttype_name(ttype) | ||||
cmndef = '' | ||||
if ndef['bold']: | ||||
cmndef += r'\let\$$@bf=\textbf' | ||||
if ndef['italic']: | ||||
cmndef += r'\let\$$@it=\textit' | ||||
if ndef['underline']: | ||||
cmndef += r'\let\$$@ul=\underline' | ||||
if ndef['roman']: | ||||
cmndef += r'\let\$$@ff=\textrm' | ||||
if ndef['sans']: | ||||
cmndef += r'\let\$$@ff=\textsf' | ||||
if ndef['mono']: | ||||
cmndef += r'\let\$$@ff=\textsf' | ||||
if ndef['color']: | ||||
cmndef += (r'\def\$$@tc##1{\textcolor[rgb]{%s}{##1}}' % | ||||
rgbcolor(ndef['color'])) | ||||
if ndef['border']: | ||||
cmndef += (r'\def\$$@bc##1{\setlength{\fboxsep}{0pt}' | ||||
r'\fcolorbox[rgb]{%s}{%s}{\strut ##1}}' % | ||||
(rgbcolor(ndef['border']), | ||||
rgbcolor(ndef['bgcolor']))) | ||||
elif ndef['bgcolor']: | ||||
cmndef += (r'\def\$$@bc##1{\setlength{\fboxsep}{0pt}' | ||||
r'\colorbox[rgb]{%s}{\strut ##1}}' % | ||||
rgbcolor(ndef['bgcolor'])) | ||||
if cmndef == '': | ||||
continue | ||||
cmndef = cmndef.replace('$$', cp) | ||||
t2n[ttype] = name | ||||
c2d[name] = cmndef | ||||
def get_style_defs(self, arg=''): | ||||
""" | ||||
Return the command sequences needed to define the commands | ||||
used to format text in the verbatim environment. ``arg`` is ignored. | ||||
""" | ||||
cp = self.commandprefix | ||||
styles = [] | ||||
for name, definition in self.cmd2def.iteritems(): | ||||
styles.append(r'\expandafter\def\csname %s@tok@%s\endcsname{%s}' % | ||||
(cp, name, definition)) | ||||
return STYLE_TEMPLATE % {'cp': self.commandprefix, | ||||
'styles': '\n'.join(styles)} | ||||
def format_unencoded(self, tokensource, outfile): | ||||
# TODO: add support for background colors | ||||
t2n = self.ttype2name | ||||
cp = self.commandprefix | ||||
if self.full: | ||||
realoutfile = outfile | ||||
outfile = StringIO() | ||||
outfile.write(r'\begin{Verbatim}[commandchars=\\\{\}') | ||||
if self.linenos: | ||||
start, step = self.linenostart, self.linenostep | ||||
outfile.write(',numbers=left' + | ||||
(start and ',firstnumber=%d' % start or '') + | ||||
(step and ',stepnumber=%d' % step or '')) | ||||
if self.mathescape or self.texcomments: | ||||
outfile.write(r',codes={\catcode`\$=3\catcode`\^=7\catcode`\_=8}') | ||||
if self.verboptions: | ||||
outfile.write(',' + self.verboptions) | ||||
outfile.write(']\n') | ||||
for ttype, value in tokensource: | ||||
if ttype in Token.Comment: | ||||
if self.texcomments: | ||||
# Try to guess comment starting lexeme and escape it ... | ||||
start = value[0:1] | ||||
for i in xrange(1, len(value)): | ||||
if start[0] != value[i]: | ||||
break | ||||
start += value[i] | ||||
value = value[len(start):] | ||||
start = escape_tex(start, self.commandprefix) | ||||
# ... but do not escape inside comment. | ||||
value = start + value | ||||
elif self.mathescape: | ||||
# Only escape parts not inside a math environment. | ||||
parts = value.split('$') | ||||
in_math = False | ||||
for i, part in enumerate(parts): | ||||
if not in_math: | ||||
parts[i] = escape_tex(part, self.commandprefix) | ||||
in_math = not in_math | ||||
value = '$'.join(parts) | ||||
else: | ||||
value = escape_tex(value, self.commandprefix) | ||||
else: | ||||
value = escape_tex(value, self.commandprefix) | ||||
styles = [] | ||||
while ttype is not Token: | ||||
try: | ||||
styles.append(t2n[ttype]) | ||||
except KeyError: | ||||
# not in current style | ||||
styles.append(_get_ttype_name(ttype)) | ||||
ttype = ttype.parent | ||||
styleval = '+'.join(reversed(styles)) | ||||
if styleval: | ||||
spl = value.split('\n') | ||||
for line in spl[:-1]: | ||||
if line: | ||||
outfile.write("\\%s{%s}{%s}" % (cp, styleval, line)) | ||||
outfile.write('\n') | ||||
if spl[-1]: | ||||
outfile.write("\\%s{%s}{%s}" % (cp, styleval, spl[-1])) | ||||
else: | ||||
outfile.write(value) | ||||
outfile.write('\\end{Verbatim}\n') | ||||
if self.full: | ||||
realoutfile.write(DOC_TEMPLATE % | ||||
dict(docclass = self.docclass, | ||||
preamble = self.preamble, | ||||
title = self.title, | ||||
encoding = self.encoding or 'utf-8', | ||||
styledefs = self.get_style_defs(), | ||||
code = outfile.getvalue())) | ||||