scanner.py
104 lines
| 3.0 KiB
| text/x-python
|
PythonLexer
Jeandet Alexis
|
r0 | # -*- coding: utf-8 -*- | ||
""" | ||||
pygments.scanner | ||||
~~~~~~~~~~~~~~~~ | ||||
This library implements a regex based scanner. Some languages | ||||
like Pascal are easy to parse but have some keywords that | ||||
depend on the context. Because of this it's impossible to lex | ||||
that just by using a regular expression lexer like the | ||||
`RegexLexer`. | ||||
Have a look at the `DelphiLexer` to get an idea of how to use | ||||
this scanner. | ||||
:copyright: Copyright 2006-2012 by the Pygments team, see AUTHORS. | ||||
:license: BSD, see LICENSE for details. | ||||
""" | ||||
import re | ||||
class EndOfText(RuntimeError): | ||||
""" | ||||
Raise if end of text is reached and the user | ||||
tried to call a match function. | ||||
""" | ||||
class Scanner(object): | ||||
""" | ||||
Simple scanner | ||||
All method patterns are regular expression strings (not | ||||
compiled expressions!) | ||||
""" | ||||
def __init__(self, text, flags=0): | ||||
""" | ||||
:param text: The text which should be scanned | ||||
:param flags: default regular expression flags | ||||
""" | ||||
self.data = text | ||||
self.data_length = len(text) | ||||
self.start_pos = 0 | ||||
self.pos = 0 | ||||
self.flags = flags | ||||
self.last = None | ||||
self.match = None | ||||
self._re_cache = {} | ||||
def eos(self): | ||||
"""`True` if the scanner reached the end of text.""" | ||||
return self.pos >= self.data_length | ||||
eos = property(eos, eos.__doc__) | ||||
def check(self, pattern): | ||||
""" | ||||
Apply `pattern` on the current position and return | ||||
the match object. (Doesn't touch pos). Use this for | ||||
lookahead. | ||||
""" | ||||
if self.eos: | ||||
raise EndOfText() | ||||
if pattern not in self._re_cache: | ||||
self._re_cache[pattern] = re.compile(pattern, self.flags) | ||||
return self._re_cache[pattern].match(self.data, self.pos) | ||||
def test(self, pattern): | ||||
"""Apply a pattern on the current position and check | ||||
if it patches. Doesn't touch pos.""" | ||||
return self.check(pattern) is not None | ||||
def scan(self, pattern): | ||||
""" | ||||
Scan the text for the given pattern and update pos/match | ||||
and related fields. The return value is a boolen that | ||||
indicates if the pattern matched. The matched value is | ||||
stored on the instance as ``match``, the last value is | ||||
stored as ``last``. ``start_pos`` is the position of the | ||||
pointer before the pattern was matched, ``pos`` is the | ||||
end position. | ||||
""" | ||||
if self.eos: | ||||
raise EndOfText() | ||||
if pattern not in self._re_cache: | ||||
self._re_cache[pattern] = re.compile(pattern, self.flags) | ||||
self.last = self.match | ||||
m = self._re_cache[pattern].match(self.data, self.pos) | ||||
if m is None: | ||||
return False | ||||
self.start_pos = m.start() | ||||
self.pos = m.end() | ||||
self.match = m.group() | ||||
return True | ||||
def get_char(self): | ||||
"""Scan exactly one char.""" | ||||
self.scan('.') | ||||
def __repr__(self): | ||||
return '<%s %d/%d>' % ( | ||||
self.__class__.__name__, | ||||
self.pos, | ||||
self.data_length | ||||
) | ||||