__init__.py
357 lines
| 11.2 KiB
| text/x-python
|
PythonLexer
Jeandet Alexis
|
r0 | # -*- coding: utf-8 -*- | ||
""" | ||||
pygments.filters | ||||
~~~~~~~~~~~~~~~~ | ||||
Module containing filter lookup functions and default | ||||
filters. | ||||
:copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS. | ||||
:license: BSD, see LICENSE for details. | ||||
""" | ||||
import re | ||||
from pygments.token import String, Comment, Keyword, Name, Error, Whitespace, \ | ||||
string_to_tokentype | ||||
from pygments.filter import Filter | ||||
from pygments.util import get_list_opt, get_int_opt, get_bool_opt, \ | ||||
get_choice_opt, ClassNotFound, OptionError | ||||
from pygments.plugin import find_plugin_filters | ||||
def find_filter_class(filtername): | ||||
""" | ||||
Lookup a filter by name. Return None if not found. | ||||
""" | ||||
if filtername in FILTERS: | ||||
return FILTERS[filtername] | ||||
for name, cls in find_plugin_filters(): | ||||
if name == filtername: | ||||
return cls | ||||
return None | ||||
def get_filter_by_name(filtername, **options): | ||||
""" | ||||
Return an instantiated filter. Options are passed to the filter | ||||
initializer if wanted. Raise a ClassNotFound if not found. | ||||
""" | ||||
cls = find_filter_class(filtername) | ||||
if cls: | ||||
return cls(**options) | ||||
else: | ||||
raise ClassNotFound('filter %r not found' % filtername) | ||||
def get_all_filters(): | ||||
""" | ||||
Return a generator of all filter names. | ||||
""" | ||||
for name in FILTERS: | ||||
yield name | ||||
for name, _ in find_plugin_filters(): | ||||
yield name | ||||
def _replace_special(ttype, value, regex, specialttype, | ||||
replacefunc=lambda x: x): | ||||
last = 0 | ||||
for match in regex.finditer(value): | ||||
start, end = match.start(), match.end() | ||||
if start != last: | ||||
yield ttype, value[last:start] | ||||
yield specialttype, replacefunc(value[start:end]) | ||||
last = end | ||||
if last != len(value): | ||||
yield ttype, value[last:] | ||||
class CodeTagFilter(Filter): | ||||
""" | ||||
Highlight special code tags in comments and docstrings. | ||||
Options accepted: | ||||
`codetags` : list of strings | ||||
A list of strings that are flagged as code tags. The default is to | ||||
highlight ``XXX``, ``TODO``, ``BUG`` and ``NOTE``. | ||||
""" | ||||
def __init__(self, **options): | ||||
Filter.__init__(self, **options) | ||||
tags = get_list_opt(options, 'codetags', | ||||
['XXX', 'TODO', 'BUG', 'NOTE']) | ||||
self.tag_re = re.compile(r'\b(%s)\b' % '|'.join([ | ||||
re.escape(tag) for tag in tags if tag | ||||
])) | ||||
def filter(self, lexer, stream): | ||||
regex = self.tag_re | ||||
for ttype, value in stream: | ||||
if ttype in String.Doc or \ | ||||
ttype in Comment and \ | ||||
ttype not in Comment.Preproc: | ||||
for sttype, svalue in _replace_special(ttype, value, regex, | ||||
Comment.Special): | ||||
yield sttype, svalue | ||||
else: | ||||
yield ttype, value | ||||
class KeywordCaseFilter(Filter): | ||||
""" | ||||
Convert keywords to lowercase or uppercase or capitalize them, which | ||||
means first letter uppercase, rest lowercase. | ||||
This can be useful e.g. if you highlight Pascal code and want to adapt the | ||||
code to your styleguide. | ||||
Options accepted: | ||||
`case` : string | ||||
The casing to convert keywords to. Must be one of ``'lower'``, | ||||
``'upper'`` or ``'capitalize'``. The default is ``'lower'``. | ||||
""" | ||||
def __init__(self, **options): | ||||
Filter.__init__(self, **options) | ||||
case = get_choice_opt(options, 'case', ['lower', 'upper', 'capitalize'], 'lower') | ||||
self.convert = getattr(unicode, case) | ||||
def filter(self, lexer, stream): | ||||
for ttype, value in stream: | ||||
if ttype in Keyword: | ||||
yield ttype, self.convert(value) | ||||
else: | ||||
yield ttype, value | ||||
class NameHighlightFilter(Filter): | ||||
""" | ||||
Highlight a normal Name token with a different token type. | ||||
Example:: | ||||
filter = NameHighlightFilter( | ||||
names=['foo', 'bar', 'baz'], | ||||
tokentype=Name.Function, | ||||
) | ||||
This would highlight the names "foo", "bar" and "baz" | ||||
as functions. `Name.Function` is the default token type. | ||||
Options accepted: | ||||
`names` : list of strings | ||||
A list of names that should be given the different token type. | ||||
There is no default. | ||||
`tokentype` : TokenType or string | ||||
A token type or a string containing a token type name that is | ||||
used for highlighting the strings in `names`. The default is | ||||
`Name.Function`. | ||||
""" | ||||
def __init__(self, **options): | ||||
Filter.__init__(self, **options) | ||||
self.names = set(get_list_opt(options, 'names', [])) | ||||
tokentype = options.get('tokentype') | ||||
if tokentype: | ||||
self.tokentype = string_to_tokentype(tokentype) | ||||
else: | ||||
self.tokentype = Name.Function | ||||
def filter(self, lexer, stream): | ||||
for ttype, value in stream: | ||||
if ttype is Name and value in self.names: | ||||
yield self.tokentype, value | ||||
else: | ||||
yield ttype, value | ||||
class ErrorToken(Exception): | ||||
pass | ||||
class RaiseOnErrorTokenFilter(Filter): | ||||
""" | ||||
Raise an exception when the lexer generates an error token. | ||||
Options accepted: | ||||
`excclass` : Exception class | ||||
The exception class to raise. | ||||
The default is `pygments.filters.ErrorToken`. | ||||
*New in Pygments 0.8.* | ||||
""" | ||||
def __init__(self, **options): | ||||
Filter.__init__(self, **options) | ||||
self.exception = options.get('excclass', ErrorToken) | ||||
try: | ||||
# issubclass() will raise TypeError if first argument is not a class | ||||
if not issubclass(self.exception, Exception): | ||||
raise TypeError | ||||
except TypeError: | ||||
raise OptionError('excclass option is not an exception class') | ||||
def filter(self, lexer, stream): | ||||
for ttype, value in stream: | ||||
if ttype is Error: | ||||
raise self.exception(value) | ||||
yield ttype, value | ||||
class VisibleWhitespaceFilter(Filter): | ||||
""" | ||||
Convert tabs, newlines and/or spaces to visible characters. | ||||
Options accepted: | ||||
`spaces` : string or bool | ||||
If this is a one-character string, spaces will be replaces by this string. | ||||
If it is another true value, spaces will be replaced by ``·`` (unicode | ||||
MIDDLE DOT). If it is a false value, spaces will not be replaced. The | ||||
default is ``False``. | ||||
`tabs` : string or bool | ||||
The same as for `spaces`, but the default replacement character is ``»`` | ||||
(unicode RIGHT-POINTING DOUBLE ANGLE QUOTATION MARK). The default value | ||||
is ``False``. Note: this will not work if the `tabsize` option for the | ||||
lexer is nonzero, as tabs will already have been expanded then. | ||||
`tabsize` : int | ||||
If tabs are to be replaced by this filter (see the `tabs` option), this | ||||
is the total number of characters that a tab should be expanded to. | ||||
The default is ``8``. | ||||
`newlines` : string or bool | ||||
The same as for `spaces`, but the default replacement character is ``¶`` | ||||
(unicode PILCROW SIGN). The default value is ``False``. | ||||
`wstokentype` : bool | ||||
If true, give whitespace the special `Whitespace` token type. This allows | ||||
styling the visible whitespace differently (e.g. greyed out), but it can | ||||
disrupt background colors. The default is ``True``. | ||||
*New in Pygments 0.8.* | ||||
""" | ||||
def __init__(self, **options): | ||||
Filter.__init__(self, **options) | ||||
for name, default in {'spaces': u'·', 'tabs': u'»', 'newlines': u'¶'}.items(): | ||||
opt = options.get(name, False) | ||||
if isinstance(opt, basestring) and len(opt) == 1: | ||||
setattr(self, name, opt) | ||||
else: | ||||
setattr(self, name, (opt and default or '')) | ||||
tabsize = get_int_opt(options, 'tabsize', 8) | ||||
if self.tabs: | ||||
self.tabs += ' '*(tabsize-1) | ||||
if self.newlines: | ||||
self.newlines += '\n' | ||||
self.wstt = get_bool_opt(options, 'wstokentype', True) | ||||
def filter(self, lexer, stream): | ||||
if self.wstt: | ||||
spaces = self.spaces or ' ' | ||||
tabs = self.tabs or '\t' | ||||
newlines = self.newlines or '\n' | ||||
regex = re.compile(r'\s') | ||||
def replacefunc(wschar): | ||||
if wschar == ' ': | ||||
return spaces | ||||
elif wschar == '\t': | ||||
return tabs | ||||
elif wschar == '\n': | ||||
return newlines | ||||
return wschar | ||||
for ttype, value in stream: | ||||
for sttype, svalue in _replace_special(ttype, value, regex, | ||||
Whitespace, replacefunc): | ||||
yield sttype, svalue | ||||
else: | ||||
spaces, tabs, newlines = self.spaces, self.tabs, self.newlines | ||||
# simpler processing | ||||
for ttype, value in stream: | ||||
if spaces: | ||||
value = value.replace(' ', spaces) | ||||
if tabs: | ||||
value = value.replace('\t', tabs) | ||||
if newlines: | ||||
value = value.replace('\n', newlines) | ||||
yield ttype, value | ||||
class GobbleFilter(Filter): | ||||
""" | ||||
Gobbles source code lines (eats initial characters). | ||||
This filter drops the first ``n`` characters off every line of code. This | ||||
may be useful when the source code fed to the lexer is indented by a fixed | ||||
amount of space that isn't desired in the output. | ||||
Options accepted: | ||||
`n` : int | ||||
The number of characters to gobble. | ||||
*New in Pygments 1.2.* | ||||
""" | ||||
def __init__(self, **options): | ||||
Filter.__init__(self, **options) | ||||
self.n = get_int_opt(options, 'n', 0) | ||||
def gobble(self, value, left): | ||||
if left < len(value): | ||||
return value[left:], 0 | ||||
else: | ||||
return '', left - len(value) | ||||
def filter(self, lexer, stream): | ||||
n = self.n | ||||
left = n # How many characters left to gobble. | ||||
for ttype, value in stream: | ||||
# Remove ``left`` tokens from first line, ``n`` from all others. | ||||
parts = value.split('\n') | ||||
(parts[0], left) = self.gobble(parts[0], left) | ||||
for i in range(1, len(parts)): | ||||
(parts[i], left) = self.gobble(parts[i], n) | ||||
value = '\n'.join(parts) | ||||
if value != '': | ||||
yield ttype, value | ||||
class TokenMergeFilter(Filter): | ||||
""" | ||||
Merges consecutive tokens with the same token type in the output stream of a | ||||
lexer. | ||||
*New in Pygments 1.2.* | ||||
""" | ||||
def __init__(self, **options): | ||||
Filter.__init__(self, **options) | ||||
def filter(self, lexer, stream): | ||||
output = [] | ||||
current_type = None | ||||
current_value = None | ||||
for ttype, value in stream: | ||||
if ttype is current_type: | ||||
current_value += value | ||||
else: | ||||
if current_type is not None: | ||||
yield current_type, current_value | ||||
current_type = ttype | ||||
current_value = value | ||||
if current_type is not None: | ||||
yield current_type, current_value | ||||
FILTERS = { | ||||
'codetagify': CodeTagFilter, | ||||
'keywordcase': KeywordCaseFilter, | ||||
'highlight': NameHighlightFilter, | ||||
'raiseonerror': RaiseOnErrorTokenFilter, | ||||
'whitespace': VisibleWhitespaceFilter, | ||||
'gobble': GobbleFilter, | ||||
'tokenmerge': TokenMergeFilter, | ||||
} | ||||