python 由robturtle创建的DFA - https://repl.it/EzaW/47
Posted
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python 由robturtle创建的DFA - https://repl.it/EzaW/47相关的知识,希望对你有一定的参考价值。
from NFA import NFA
def makeTest(stringGen):
def test(nfa):
#nfa.printEdges()
s = stringGen()
matches = map(
lambda i: 'O' if nfa.match(s[:i]) else '-',
range(len(s) + 1))
print(' ' + s)
print(''.join(matches))
return test
test = makeTest(lambda: 'yang' * 3)
test(NFA())
test(NFA.of('yang'))
test(NFA.of('yang').zeroOrMore())
test(NFA.of('yang').zeroOrOne())
test(NFA.of('yang').oneOrMore())
import random
test = makeTest(lambda: ''.join(
map(lambda _: random.choice(['a', 'b', '^V']), range(9))))
test(NFA())
# [ab]+(^V)+
test(NFA.of('a')
.either(NFA.of('b'))
.oneOrMore()
.concat(
NFA.of('^V').oneOrMore()
))
# (a|b|^V)+
test(NFA.of('a').either(NFA.of('b')).either(NFA.of('^V')).oneOrMore())
test = makeTest(lambda: 'a' + 'b'*5)
test(NFA.of('a').concat(NFA.of('b').oneOrMore()))
import string
from DFA import parser
@parser
class Power(object):
'''Get the exponent part value
syntax: <"e"|"E"> ["+"|"-"] <digits+>
'''
def __init__(self):
self.sign = 1
self.power = 0
def addDigit(self, c):
self.power = self.power * 10 + ord(c) - ord('0')
def setSign(self, c):
self.sign = 1 if c == '+' else -1
def get(self):
return self.sign * self.power
rules = [
{ 'eE': (1, None) },
{
'+-': (3, setSign),
string.digits: (2, addDigit)
},
{ string.digits: (2, addDigit) },
{ string.digits: (2, addDigit) }
]
terminals = (2,)
examples = ['e', '3', 'E', 'e+', 'E-',
'e-', 'E+', 'e+31109', 'e0',
'E1', 'e-13412', 'e++', 'e--',
'E+-', 'E-+', 'e1e4', 'e4+3',
'e13.2']
def runExample():
print('-'*40)
print('{:^40s}'.format('Exponent Parser Demo:'))
print('-'*40)
for e in examples:
try:
power = Power.parse(e)
print('Correct: {}'.format(power))
except SyntaxError as err:
print(err)
print('-'*40)
#runExample()
from floatTest import test
from FloatNFA import *
test(floatLiteral.syntaxCheck)
corrects = [('0', 0), ('0.', 0), ('.0', 0), ('1.', 1), ('.1', .1),
('123.', 123), ('123', 123), ('.123', .123), ('0.0', 0), ('1.0', 1),
('0.1', .1), ('1.1', 1.1), ('12.1', 12.1), ('1.12', 1.12),
('0e0', 0), ('0e123', 0), ('1e0', 1), ('1e123', 1), ('123e0', 1),
('123e13', 123e13), ('3.14e7', 3.14e7), ('3.14e0', 1),
('3.14e+7', 3.14e7), ('3.14e-7', 3.14e-7), ('+3.14e+7', 3.14e7),
('-3.14e-7', -3.14e-7), ('-3.14E+7', -3.14e7),
('-3.141592653e-3', -3.141592653e-3)]
wrongs = ['+', '-', '++', '--', '+-', '.', '..', 'e', 'E',
'eE', 'EE', 'yang', 'f', '3e', 'e7', '3e-', '4e+',
'+3e', '-4E', '1.e', '.1e', '1.2E', '1.2e3.4', '1.2e.4',
'1.2e4.', '+1.2e1+', '1.2e1+', '1.2E1+2', '1.2f1', '-1.2f+12',
'1.fe+1.']
def test(parser, checkValue = False):
for s, v in corrects:
try:
value = parser(s)
if checkValue and abs(value - v) > 1e-5:
print('Wrong result for {!r}, expect {:5g}, got {:5g} instead'.format(s, v, value))
else:
print('{!r}: pass'.format(s))
except SyntaxError as err:
print('Wrong result for {!r}, expect {:5g}, got SyntaxError:\n{}'.format(s, v, err))
for s in wrongs:
try:
value = parser(s)
if value is None:
value = 0
print('Wrong result for {!r}, expect SyntaxError, got {:5g} instead'.format(s, value))
except SyntaxError:
print('{!r}: pass'.format(s))
from functools import reduce
class State:
id = 0
def __init__(self):
self.epsilonDests = {self}
self.edges = {}
self.id = State.id
State.id += 1
def __repr__(self):
return str(self.id)
def link(self, char, dest):
if char is None:
self.epsilonDests.add(dest)
else:
self.edges[char] = dest
def __getitem__(self, char):
return self.edges[char]
def epsilonClosure(self):
met = set()
current = self.epsilonDests
while len(current) > 0:
met = met | current
current = reduce(set.union,
map(lambda s: s.epsilonDests, current), set()) - met
return met
def copyWithMapping(self, mapping = None):
if mapping is None:
mapping = {}
clone = State()
mapping[self] = clone
for epsilon in self.epsilonDests:
if epsilon not in mapping:
mapping[epsilon], _ = epsilon.copyWithMapping(mapping)
clone.link(None, mapping[epsilon])
for char, dest in self.edges.items():
if dest not in mapping:
mapping[dest], _ = dest.copyWithMapping(mapping)
clone.link(char, mapping[dest])
return clone, mapping
def children(self):
return self.epsilonDests | set(self.edges.values())
def depthFirst(self, visitor, met = None):
if met is None:
met = set()
met.add(self)
visitor(self)
for child in self.children() - met:
if child not in met:
child.depthFirst(visitor, met)
def printEdges(self):
self.depthFirst(lambda v: print('{!r}{} -> {}'.format(
v, v.epsilonClosure(), list(v.edges.items()))))
class NFA:
@staticmethod
def of(chars):
nfa = NFA()
tail = nfa.head
for c in chars:
nxt = State()
tail.link(c, nxt)
tail = nxt
nfa.tail = tail
return nfa
@staticmethod
def eitherOneOf(chars):
assert len(chars) > 0
nfa = NFA.of(chars[0])
for c in chars[1:]:
nfa = nfa.either(NFA.of(c))
return nfa
def __init__(self, head = None, tail = None):
if head is None:
head = State()
self.head = head
if tail is None:
self.tail = self.head
else:
self.tail = tail
def copy(self):
head, mapping = self.head.copyWithMapping()
clone = NFA(head, mapping[self.tail])
return clone
def concat(self, other = None, callback = None):
if other is None:
other = State()
if isinstance(other, State):
new = self.copy()
new.tail.link(None, other)
new.tail = other
elif isinstance(other, NFA):
new = self.copy()
other = other.copy()
new.tail.link(None, other.head)
new.tail = other.tail
else:
raise TypeError('argument 2 expect a State or NFA type')
return new
def either(self, other):
tail = State()
one = self.concat(tail)
another = other.concat(tail)
new = NFA()
new.head.link(None, one.head)
new.head.link(None, another.head)
new.tail = tail
return new
def zeroOrMore(self):
copy = self.copy()
head = State()
head.link(None, copy.head)
copy.tail.link(None, head)
copy.head = copy.tail = head
return copy
def zeroOrOne(self):
copy = self.copy()
copy.head.link(None, copy.tail)
return copy
def oneOrMore(self):
copy = self.copy()
copy.tail.link(None, copy.head)
return copy
def syntaxCheck(self, chars):
current = self.head.epsilonClosure()
i = 0
for i, c in enumerate(chars):
nxt = set()
for state in current:
if c in state.edges:
nxt = nxt | state[c].epsilonClosure()
current = nxt
if len(current) == 0:
break
else:
i += 1
if self.tail not in current:
raise SyntaxError('\n' + chars + '\n' + ' '*i + '^')
def match(self, chars):
try:
self.syntaxCheck(chars)
return True
except SyntaxError:
return False
def printEdges(self):
self.head.printEdges()
print('-'*40)
import string
import NFA
from NFA import NFA
'''
floatLiteral = [sign] pointfloat [exp]
pointfloat = [intpart] fraction | intpart ["."]
intpart = [0-9]+
fraction = "." intpart
exp = /[eE]/ [sign] intpart
'''
sign = NFA.eitherOneOf('-+')
intpart = NFA.eitherOneOf(string.digits).oneOrMore()
fraction = NFA.of('.').concat(intpart)
pointfloat = intpart.zeroOrOne().concat(fraction)
pointfloat = pointfloat.either(intpart.concat(NFA.of('.').zeroOrOne()))
exp = NFA.eitherOneOf('eE').concat(sign.zeroOrOne()).concat(intpart)
floatLiteral = sign.zeroOrOne().concat(pointfloat).concat(exp.zeroOrOne())
Empty file
def makeStateAutomata(rules, terminals, resultGetter):
'''
Rules should be indexed and has element at index 0 as initial state.
For each rule, there should be zero or more tranfer entries, where an
entry is a key-value pair, the key will be tested against the input
char by keyword `in`. And the value is a 2-tuple, where the first one
is the next state, and the second one is the payload function that will
be executed when transfer took place.
Terminals should be a iterable containing all the terminating states.
ResultGetter should be a function used for returning value.
Example:
class IntPattern:
rules = {
0 : {
'123456789': (1, addDigit),
},
1: {
'0123456789': (1, addDigit),
}
}
terminals = (1,)
def __init__(self):
self.value = 0
def addDigit(self, char):
self.value = self.value * 10 + ord(char) - ord('0')
def get(self):
return self.value
intParser = makeStateAutomata(IntPattern.rules, IntPattern.terminals, IntPattern.get)
value = intParser(IntPattern(), '309')
^- use an instance to store the parsing result
'''
def startAutomata(obj, chars):
state = 0
i = 0
for i, c in enumerate(chars):
for rule in rules[state]:
if c in rule:
pair = rules[state][rule]
state = pair[0]
if pair[1] is not None:
pair[1](obj, c)
break
else:
state = 0
break
else:
i += 1
if state not in terminals:
raise SyntaxError(chars + '\n' + ' '*i + '^')
return resultGetter(obj)
return startAutomata
def parser(clz):
'''A convenient decorator to add a parse functionality to a type
Example:
@parser
class IntPattern: # as described above
value = IntPattern.parse('309')
'''
if not (hasattr(clz, 'rules') and hasattr(clz, 'terminals') and hasattr(clz, 'get')):
raise TypeError(
"type {!r} should has attributes 'rules', 'terminals' and 'get'".format(clz))
clz.parser = makeStateAutomata(clz.rules, clz.terminals, clz.get)
clz.parse = lambda chars: clz.parser(clz(), chars)
return clz
BNF_syntax = '''
rule = id "=" grammar
grammar = "\"" literal "\""
grammar = id
grammar = concat "|" concat
concat = optional " " optional
optional = "[" grammar "]"
optional = grammar "+"
optional = grammar "*"
embraced = "(" grammar ")"
'''
import string
# literal is all chars but line separators
precedence = {
"|": 0,
".": 1,
"+": 2,
"*": 2,
"[": 3,
"(": 4,
'"': 5,
}
binaryOps = {'|', '.'}
def toPrefix(chars):
tokens = []
parsestack = [0]
quoted = False
def insertOp(op):
prec = precedence[op]
tokens.append(op)
print('stack = {}'.format(parsestack))
print('insert {}, top is {}'.format(op, parsestack[-1]))
for i in range(len(tokens) - 1, parsestack[-1], -1):
if tokens[i-1] not in precedence or precedence[tokens[i-1]] >= prec:
tokens[i-1], tokens[i] = tokens[i], tokens[i-1]
elif i != len(tokens) - 1:
tokens[i+1], tokens[i] = tokens[i], tokens[i+1]
token = ''
for c, n in zip(chars, chars[1:] + '!'):
if c in string.whitespace:
continue
if c in string.ascii_letters:
token += c
if n not in string.ascii_letters:
tokens.append(token)
token = ''
if n in string.whitespace:
insertOp('.')
if c == '"':
if quoted:
parsestack.pop()
quoted = False
else:
quoted = True
parsestack.append(len(tokens))
insertOp('"')
if c in (')', ']'):
parsestack.pop()
if n is not None and n in string.whitespace:
insertOp('.')
if c == '(':
parsestack.append(len(tokens))
if c == '[':
insertOp('[')
parsestack.append(len(tokens))
if c in ('|', '*', '+'):
insertOp(c)
return tokens
def parseRule(line, mapping):
id, grammar = line.split('=')
tokens = toPrefix(grammar)
print(tokens)
nfa = NFA.fromTokens(tokens)
if id in mapping:
mapping[id] = mapping[id].either(nfa)
else:
mapping[id] = nfa
以上是关于python 由robturtle创建的DFA - https://repl.it/EzaW/47的主要内容,如果未能解决你的问题,请参考以下文章