|
|
# -*- coding: utf-8 -*-
|
|
|
"""
|
|
|
pygments.filters
|
|
|
~~~~~~~~~~~~~~~~
|
|
|
|
|
|
Module containing filter lookup functions and default
|
|
|
filters.
|
|
|
|
|
|
:copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS.
|
|
|
:license: BSD, see LICENSE for details.
|
|
|
"""
|
|
|
|
|
|
import re
|
|
|
|
|
|
from pygments.token import String, Comment, Keyword, Name, Error, Whitespace, \
|
|
|
string_to_tokentype
|
|
|
from pygments.filter import Filter
|
|
|
from pygments.util import get_list_opt, get_int_opt, get_bool_opt, \
|
|
|
get_choice_opt, ClassNotFound, OptionError
|
|
|
from pygments.plugin import find_plugin_filters
|
|
|
|
|
|
|
|
|
def find_filter_class(filtername):
|
|
|
"""
|
|
|
Lookup a filter by name. Return None if not found.
|
|
|
"""
|
|
|
if filtername in FILTERS:
|
|
|
return FILTERS[filtername]
|
|
|
for name, cls in find_plugin_filters():
|
|
|
if name == filtername:
|
|
|
return cls
|
|
|
return None
|
|
|
|
|
|
|
|
|
def get_filter_by_name(filtername, **options):
|
|
|
"""
|
|
|
Return an instantiated filter. Options are passed to the filter
|
|
|
initializer if wanted. Raise a ClassNotFound if not found.
|
|
|
"""
|
|
|
cls = find_filter_class(filtername)
|
|
|
if cls:
|
|
|
return cls(**options)
|
|
|
else:
|
|
|
raise ClassNotFound('filter %r not found' % filtername)
|
|
|
|
|
|
|
|
|
def get_all_filters():
|
|
|
"""
|
|
|
Return a generator of all filter names.
|
|
|
"""
|
|
|
for name in FILTERS:
|
|
|
yield name
|
|
|
for name, _ in find_plugin_filters():
|
|
|
yield name
|
|
|
|
|
|
|
|
|
def _replace_special(ttype, value, regex, specialttype,
|
|
|
replacefunc=lambda x: x):
|
|
|
last = 0
|
|
|
for match in regex.finditer(value):
|
|
|
start, end = match.start(), match.end()
|
|
|
if start != last:
|
|
|
yield ttype, value[last:start]
|
|
|
yield specialttype, replacefunc(value[start:end])
|
|
|
last = end
|
|
|
if last != len(value):
|
|
|
yield ttype, value[last:]
|
|
|
|
|
|
|
|
|
class CodeTagFilter(Filter):
|
|
|
"""
|
|
|
Highlight special code tags in comments and docstrings.
|
|
|
|
|
|
Options accepted:
|
|
|
|
|
|
`codetags` : list of strings
|
|
|
A list of strings that are flagged as code tags. The default is to
|
|
|
highlight ``XXX``, ``TODO``, ``BUG`` and ``NOTE``.
|
|
|
"""
|
|
|
|
|
|
def __init__(self, **options):
|
|
|
Filter.__init__(self, **options)
|
|
|
tags = get_list_opt(options, 'codetags',
|
|
|
['XXX', 'TODO', 'BUG', 'NOTE'])
|
|
|
self.tag_re = re.compile(r'\b(%s)\b' % '|'.join([
|
|
|
re.escape(tag) for tag in tags if tag
|
|
|
]))
|
|
|
|
|
|
def filter(self, lexer, stream):
|
|
|
regex = self.tag_re
|
|
|
for ttype, value in stream:
|
|
|
if ttype in String.Doc or \
|
|
|
ttype in Comment and \
|
|
|
ttype not in Comment.Preproc:
|
|
|
for sttype, svalue in _replace_special(ttype, value, regex,
|
|
|
Comment.Special):
|
|
|
yield sttype, svalue
|
|
|
else:
|
|
|
yield ttype, value
|
|
|
|
|
|
|
|
|
class KeywordCaseFilter(Filter):
|
|
|
"""
|
|
|
Convert keywords to lowercase or uppercase or capitalize them, which
|
|
|
means first letter uppercase, rest lowercase.
|
|
|
|
|
|
This can be useful e.g. if you highlight Pascal code and want to adapt the
|
|
|
code to your styleguide.
|
|
|
|
|
|
Options accepted:
|
|
|
|
|
|
`case` : string
|
|
|
The casing to convert keywords to. Must be one of ``'lower'``,
|
|
|
``'upper'`` or ``'capitalize'``. The default is ``'lower'``.
|
|
|
"""
|
|
|
|
|
|
def __init__(self, **options):
|
|
|
Filter.__init__(self, **options)
|
|
|
case = get_choice_opt(options, 'case', ['lower', 'upper', 'capitalize'], 'lower')
|
|
|
self.convert = getattr(unicode, case)
|
|
|
|
|
|
def filter(self, lexer, stream):
|
|
|
for ttype, value in stream:
|
|
|
if ttype in Keyword:
|
|
|
yield ttype, self.convert(value)
|
|
|
else:
|
|
|
yield ttype, value
|
|
|
|
|
|
|
|
|
class NameHighlightFilter(Filter):
|
|
|
"""
|
|
|
Highlight a normal Name token with a different token type.
|
|
|
|
|
|
Example::
|
|
|
|
|
|
filter = NameHighlightFilter(
|
|
|
names=['foo', 'bar', 'baz'],
|
|
|
tokentype=Name.Function,
|
|
|
)
|
|
|
|
|
|
This would highlight the names "foo", "bar" and "baz"
|
|
|
as functions. `Name.Function` is the default token type.
|
|
|
|
|
|
Options accepted:
|
|
|
|
|
|
`names` : list of strings
|
|
|
A list of names that should be given the different token type.
|
|
|
There is no default.
|
|
|
`tokentype` : TokenType or string
|
|
|
A token type or a string containing a token type name that is
|
|
|
used for highlighting the strings in `names`. The default is
|
|
|
`Name.Function`.
|
|
|
"""
|
|
|
|
|
|
def __init__(self, **options):
|
|
|
Filter.__init__(self, **options)
|
|
|
self.names = set(get_list_opt(options, 'names', []))
|
|
|
tokentype = options.get('tokentype')
|
|
|
if tokentype:
|
|
|
self.tokentype = string_to_tokentype(tokentype)
|
|
|
else:
|
|
|
self.tokentype = Name.Function
|
|
|
|
|
|
def filter(self, lexer, stream):
|
|
|
for ttype, value in stream:
|
|
|
if ttype is Name and value in self.names:
|
|
|
yield self.tokentype, value
|
|
|
else:
|
|
|
yield ttype, value
|
|
|
|
|
|
|
|
|
class ErrorToken(Exception):
|
|
|
pass
|
|
|
|
|
|
class RaiseOnErrorTokenFilter(Filter):
|
|
|
"""
|
|
|
Raise an exception when the lexer generates an error token.
|
|
|
|
|
|
Options accepted:
|
|
|
|
|
|
`excclass` : Exception class
|
|
|
The exception class to raise.
|
|
|
The default is `pygments.filters.ErrorToken`.
|
|
|
|
|
|
*New in Pygments 0.8.*
|
|
|
"""
|
|
|
|
|
|
def __init__(self, **options):
|
|
|
Filter.__init__(self, **options)
|
|
|
self.exception = options.get('excclass', ErrorToken)
|
|
|
try:
|
|
|
# issubclass() will raise TypeError if first argument is not a class
|
|
|
if not issubclass(self.exception, Exception):
|
|
|
raise TypeError
|
|
|
except TypeError:
|
|
|
raise OptionError('excclass option is not an exception class')
|
|
|
|
|
|
def filter(self, lexer, stream):
|
|
|
for ttype, value in stream:
|
|
|
if ttype is Error:
|
|
|
raise self.exception(value)
|
|
|
yield ttype, value
|
|
|
|
|
|
|
|
|
class VisibleWhitespaceFilter(Filter):
|
|
|
"""
|
|
|
Convert tabs, newlines and/or spaces to visible characters.
|
|
|
|
|
|
Options accepted:
|
|
|
|
|
|
`spaces` : string or bool
|
|
|
If this is a one-character string, spaces will be replaces by this string.
|
|
|
If it is another true value, spaces will be replaced by ``·`` (unicode
|
|
|
MIDDLE DOT). If it is a false value, spaces will not be replaced. The
|
|
|
default is ``False``.
|
|
|
`tabs` : string or bool
|
|
|
The same as for `spaces`, but the default replacement character is ``»``
|
|
|
(unicode RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK). The default value
|
|
|
is ``False``. Note: this will not work if the `tabsize` option for the
|
|
|
lexer is nonzero, as tabs will already have been expanded then.
|
|
|
`tabsize` : int
|
|
|
If tabs are to be replaced by this filter (see the `tabs` option), this
|
|
|
is the total number of characters that a tab should be expanded to.
|
|
|
The default is ``8``.
|
|
|
`newlines` : string or bool
|
|
|
The same as for `spaces`, but the default replacement character is ``¶``
|
|
|
(unicode PILCROW SIGN). The default value is ``False``.
|
|
|
`wstokentype` : bool
|
|
|
If true, give whitespace the special `Whitespace` token type. This allows
|
|
|
styling the visible whitespace differently (e.g. greyed out), but it can
|
|
|
disrupt background colors. The default is ``True``.
|
|
|
|
|
|
*New in Pygments 0.8.*
|
|
|
"""
|
|
|
|
|
|
def __init__(self, **options):
|
|
|
Filter.__init__(self, **options)
|
|
|
for name, default in {'spaces': u'·', 'tabs': u'»', 'newlines': u'¶'}.items():
|
|
|
opt = options.get(name, False)
|
|
|
if isinstance(opt, basestring) and len(opt) == 1:
|
|
|
setattr(self, name, opt)
|
|
|
else:
|
|
|
setattr(self, name, (opt and default or ''))
|
|
|
tabsize = get_int_opt(options, 'tabsize', 8)
|
|
|
if self.tabs:
|
|
|
self.tabs += ' '*(tabsize-1)
|
|
|
if self.newlines:
|
|
|
self.newlines += '\n'
|
|
|
self.wstt = get_bool_opt(options, 'wstokentype', True)
|
|
|
|
|
|
def filter(self, lexer, stream):
|
|
|
if self.wstt:
|
|
|
spaces = self.spaces or ' '
|
|
|
tabs = self.tabs or '\t'
|
|
|
newlines = self.newlines or '\n'
|
|
|
regex = re.compile(r'\s')
|
|
|
def replacefunc(wschar):
|
|
|
if wschar == ' ':
|
|
|
return spaces
|
|
|
elif wschar == '\t':
|
|
|
return tabs
|
|
|
elif wschar == '\n':
|
|
|
return newlines
|
|
|
return wschar
|
|
|
|
|
|
for ttype, value in stream:
|
|
|
for sttype, svalue in _replace_special(ttype, value, regex,
|
|
|
Whitespace, replacefunc):
|
|
|
yield sttype, svalue
|
|
|
else:
|
|
|
spaces, tabs, newlines = self.spaces, self.tabs, self.newlines
|
|
|
# simpler processing
|
|
|
for ttype, value in stream:
|
|
|
if spaces:
|
|
|
value = value.replace(' ', spaces)
|
|
|
if tabs:
|
|
|
value = value.replace('\t', tabs)
|
|
|
if newlines:
|
|
|
value = value.replace('\n', newlines)
|
|
|
yield ttype, value
|
|
|
|
|
|
|
|
|
class GobbleFilter(Filter):
|
|
|
"""
|
|
|
Gobbles source code lines (eats initial characters).
|
|
|
|
|
|
This filter drops the first ``n`` characters off every line of code. This
|
|
|
may be useful when the source code fed to the lexer is indented by a fixed
|
|
|
amount of space that isn't desired in the output.
|
|
|
|
|
|
Options accepted:
|
|
|
|
|
|
`n` : int
|
|
|
The number of characters to gobble.
|
|
|
|
|
|
*New in Pygments 1.2.*
|
|
|
"""
|
|
|
def __init__(self, **options):
|
|
|
Filter.__init__(self, **options)
|
|
|
self.n = get_int_opt(options, 'n', 0)
|
|
|
|
|
|
def gobble(self, value, left):
|
|
|
if left < len(value):
|
|
|
return value[left:], 0
|
|
|
else:
|
|
|
return '', left - len(value)
|
|
|
|
|
|
def filter(self, lexer, stream):
|
|
|
n = self.n
|
|
|
left = n # How many characters left to gobble.
|
|
|
for ttype, value in stream:
|
|
|
# Remove ``left`` tokens from first line, ``n`` from all others.
|
|
|
parts = value.split('\n')
|
|
|
(parts[0], left) = self.gobble(parts[0], left)
|
|
|
for i in range(1, len(parts)):
|
|
|
(parts[i], left) = self.gobble(parts[i], n)
|
|
|
value = '\n'.join(parts)
|
|
|
|
|
|
if value != '':
|
|
|
yield ttype, value
|
|
|
|
|
|
|
|
|
class TokenMergeFilter(Filter):
|
|
|
"""
|
|
|
Merges consecutive tokens with the same token type in the output stream of a
|
|
|
lexer.
|
|
|
|
|
|
*New in Pygments 1.2.*
|
|
|
"""
|
|
|
def __init__(self, **options):
|
|
|
Filter.__init__(self, **options)
|
|
|
|
|
|
def filter(self, lexer, stream):
|
|
|
output = []
|
|
|
current_type = None
|
|
|
current_value = None
|
|
|
for ttype, value in stream:
|
|
|
if ttype is current_type:
|
|
|
current_value += value
|
|
|
else:
|
|
|
if current_type is not None:
|
|
|
yield current_type, current_value
|
|
|
current_type = ttype
|
|
|
current_value = value
|
|
|
if current_type is not None:
|
|
|
yield current_type, current_value
|
|
|
|
|
|
|
|
|
FILTERS = {
|
|
|
'codetagify': CodeTagFilter,
|
|
|
'keywordcase': KeywordCaseFilter,
|
|
|
'highlight': NameHighlightFilter,
|
|
|
'raiseonerror': RaiseOnErrorTokenFilter,
|
|
|
'whitespace': VisibleWhitespaceFilter,
|
|
|
'gobble': GobbleFilter,
|
|
|
'tokenmerge': TokenMergeFilter,
|
|
|
}
|
|
|
|