python 由robturtle创建的DFA - https://repl.it/EzaW/47

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了python 由robturtle创建的DFA - https://repl.it/EzaW/47相关的知识,希望对你有一定的参考价值。

from NFA import NFA

def makeTest(stringGen):
  def test(nfa):
    #nfa.printEdges()
    s = stringGen()
    matches = map(
        lambda i: 'O' if nfa.match(s[:i]) else '-',
        range(len(s) + 1))
    print(' ' + s)
    print(''.join(matches))
  return test

test = makeTest(lambda: 'yang' * 3)
test(NFA())
test(NFA.of('yang'))
test(NFA.of('yang').zeroOrMore())
test(NFA.of('yang').zeroOrOne())
test(NFA.of('yang').oneOrMore())

import random
test = makeTest(lambda: ''.join(
      map(lambda _: random.choice(['a', 'b', '^V']), range(9))))
      
test(NFA())
# [ab]+(^V)+
test(NFA.of('a')
    .either(NFA.of('b'))
    .oneOrMore()
    .concat(
      NFA.of('^V').oneOrMore()
    ))
    
# (a|b|^V)+
test(NFA.of('a').either(NFA.of('b')).either(NFA.of('^V')).oneOrMore())

test = makeTest(lambda: 'a' + 'b'*5)
test(NFA.of('a').concat(NFA.of('b').oneOrMore()))
import string
from DFA import parser 

@parser
class Power(object):
  '''Get the exponent part value
  syntax: <"e"|"E"> ["+"|"-"] <digits+>
  '''
  def __init__(self):
    self.sign = 1
    self.power = 0
    
  def addDigit(self, c):
    self.power = self.power * 10 + ord(c) - ord('0')
  
  def setSign(self, c):
    self.sign = 1 if c == '+' else -1
    
  def get(self):
    return self.sign * self.power
    
  rules = [
    { 'eE': (1, None) },
    {
      '+-': (3, setSign),
      string.digits: (2, addDigit)
    },
    { string.digits: (2, addDigit) },
    { string.digits: (2, addDigit) }
  ]
  
  terminals = (2,)

examples = ['e', '3', 'E', 'e+', 'E-', 
            'e-', 'E+', 'e+31109', 'e0',
            'E1', 'e-13412', 'e++', 'e--', 
            'E+-', 'E-+', 'e1e4', 'e4+3', 
            'e13.2']
            
def runExample():
  print('-'*40)
  print('{:^40s}'.format('Exponent Parser Demo:'))
  print('-'*40)
  for e in examples:
    try:
      power = Power.parse(e)
      print('Correct: {}'.format(power))
    except SyntaxError as err:
      print(err)
  print('-'*40)

#runExample()
from floatTest import test
from FloatNFA import *
test(floatLiteral.syntaxCheck)
corrects = [('0', 0), ('0.', 0), ('.0', 0), ('1.', 1), ('.1', .1),
            ('123.', 123), ('123', 123), ('.123', .123), ('0.0', 0), ('1.0', 1),
            ('0.1', .1), ('1.1', 1.1), ('12.1', 12.1), ('1.12', 1.12),
            ('0e0', 0), ('0e123', 0), ('1e0', 1), ('1e123', 1), ('123e0', 1),
            ('123e13', 123e13), ('3.14e7', 3.14e7), ('3.14e0', 1),
            ('3.14e+7', 3.14e7), ('3.14e-7', 3.14e-7), ('+3.14e+7', 3.14e7),
            ('-3.14e-7', -3.14e-7), ('-3.14E+7', -3.14e7),
            ('-3.141592653e-3', -3.141592653e-3)]
            
wrongs = ['+', '-', '++', '--', '+-', '.', '..', 'e', 'E',
          'eE', 'EE', 'yang', 'f', '3e', 'e7', '3e-', '4e+',
          '+3e', '-4E', '1.e', '.1e', '1.2E', '1.2e3.4', '1.2e.4',
          '1.2e4.', '+1.2e1+', '1.2e1+', '1.2E1+2', '1.2f1', '-1.2f+12',
          '1.fe+1.']
          
def test(parser, checkValue = False):            
  for s, v in corrects:
    try:
      value = parser(s)
      if checkValue and abs(value - v) > 1e-5:
        print('Wrong result for {!r}, expect {:5g}, got {:5g} instead'.format(s, v, value))
      else:
        print('{!r}: pass'.format(s))
    except SyntaxError as err:
      print('Wrong result for {!r}, expect {:5g}, got SyntaxError:\n{}'.format(s, v, err))
      
  for s in wrongs:
    try:
      value = parser(s)
      if value is None:
        value = 0
      print('Wrong result for {!r}, expect SyntaxError, got {:5g} instead'.format(s, value))
    except SyntaxError:
      print('{!r}: pass'.format(s))
from functools import reduce

class State:
  
  id = 0
  
  def __init__(self):
    self.epsilonDests = {self}
    self.edges = {}
    self.id = State.id
    State.id += 1
    
  def __repr__(self):
    return str(self.id)
    
  def link(self, char, dest):
    if char is None:
      self.epsilonDests.add(dest)
    else:
      self.edges[char] = dest
    
  def __getitem__(self, char):
    return self.edges[char]
    
  def epsilonClosure(self):
    met = set()
    current = self.epsilonDests
    while len(current) > 0:
      met = met | current
      current = reduce(set.union,
          map(lambda s: s.epsilonDests, current), set()) - met
    return met
  
  def copyWithMapping(self, mapping = None):
    if mapping is None:
      mapping = {}
    clone = State()
    mapping[self] = clone
    
    for epsilon in self.epsilonDests:
      if epsilon not in mapping:
        mapping[epsilon], _ = epsilon.copyWithMapping(mapping)
      clone.link(None, mapping[epsilon])
      
    for char, dest in self.edges.items():
      if dest not in mapping:
        mapping[dest], _ = dest.copyWithMapping(mapping)
      clone.link(char, mapping[dest])
    return clone, mapping
    
  def children(self):
    return self.epsilonDests | set(self.edges.values())
    
  def depthFirst(self, visitor, met = None):
    if met is None:
      met = set()
      
    met.add(self)
    visitor(self)
    for child in self.children() - met:
      if child not in met:
        child.depthFirst(visitor, met)
        
  def printEdges(self):
    self.depthFirst(lambda v: print('{!r}{} -> {}'.format(
        v, v.epsilonClosure(), list(v.edges.items()))))

class NFA:
  @staticmethod
  def of(chars):
    nfa = NFA()
    tail = nfa.head
    for c in chars:
      nxt = State()
      tail.link(c, nxt)
      tail = nxt
    nfa.tail = tail
    return nfa
    
  @staticmethod
  def eitherOneOf(chars):
    assert len(chars) > 0
    nfa = NFA.of(chars[0])
    for c in chars[1:]:
      nfa = nfa.either(NFA.of(c))
    return nfa
  
  def __init__(self, head = None, tail = None):
    if head is None:
      head = State()
    self.head = head
    if tail is None:
      self.tail = self.head
    else:
      self.tail = tail
    
  def copy(self):
    head, mapping = self.head.copyWithMapping()
    clone = NFA(head, mapping[self.tail])
    return clone
    
  def concat(self, other = None, callback = None):
    if other is None:
      other = State()
      
    if isinstance(other, State):
      new = self.copy()
      new.tail.link(None, other)
      new.tail = other
    elif isinstance(other, NFA):
      new = self.copy()
      other = other.copy()
      new.tail.link(None, other.head)
      new.tail = other.tail
    else:
      raise TypeError('argument 2 expect a State or NFA type')
    return new
      
  def either(self, other):
    tail = State()
    one = self.concat(tail)
    another = other.concat(tail)
    new = NFA()
    new.head.link(None, one.head)
    new.head.link(None, another.head)
    new.tail = tail
    return new
    
  def zeroOrMore(self):
    copy = self.copy()
    head = State()
    head.link(None, copy.head)
    copy.tail.link(None, head)
    copy.head = copy.tail = head
    return copy
    
  def zeroOrOne(self):
    copy = self.copy()
    copy.head.link(None, copy.tail)
    return copy
    
  def oneOrMore(self):
    copy = self.copy()
    copy.tail.link(None, copy.head)
    return copy
    
  def syntaxCheck(self, chars):
    current = self.head.epsilonClosure()
    i = 0
    for i, c in enumerate(chars):
      nxt = set()
      for state in current:
        if c in state.edges:
          nxt = nxt | state[c].epsilonClosure()
      current = nxt
      if len(current) == 0:
        break
    else:
      i += 1
    if self.tail not in current:
      raise SyntaxError('\n' + chars + '\n' + ' '*i + '^')
    
  def match(self, chars):
    try:
      self.syntaxCheck(chars)
      return True
    except SyntaxError:
      return False
    
  def printEdges(self):
    self.head.printEdges()
    print('-'*40)
import string
import NFA
from NFA import NFA

'''
floatLiteral = [sign] pointfloat [exp]

pointfloat = [intpart] fraction | intpart ["."]
intpart = [0-9]+
fraction = "." intpart

exp = /[eE]/ [sign] intpart
'''
sign = NFA.eitherOneOf('-+')
intpart = NFA.eitherOneOf(string.digits).oneOrMore()
fraction = NFA.of('.').concat(intpart)
pointfloat = intpart.zeroOrOne().concat(fraction)
pointfloat = pointfloat.either(intpart.concat(NFA.of('.').zeroOrOne()))
exp = NFA.eitherOneOf('eE').concat(sign.zeroOrOne()).concat(intpart)

floatLiteral = sign.zeroOrOne().concat(pointfloat).concat(exp.zeroOrOne())
Empty file
def makeStateAutomata(rules, terminals, resultGetter):
  '''
  Rules should be indexed and has element at index 0 as initial state.
  For each rule, there should be zero or more tranfer entries, where an
  entry is a key-value pair, the key will be tested against the input 
  char by keyword `in`. And the value is a 2-tuple, where the first one
  is the next state, and the second one is the payload function that will
  be executed when transfer took place.
  
  Terminals should be a iterable containing all the terminating states.
  
  ResultGetter should be a function used for returning value.
  
  Example:
  
    class IntPattern:
      rules = {
        0 : {
          '123456789': (1, addDigit),
        },
        1: {
          '0123456789': (1, addDigit),
        }
      }
      
      terminals = (1,)
      
      def __init__(self):
        self.value = 0
        
      def addDigit(self, char):
        self.value = self.value * 10 + ord(char) - ord('0')
      
      def get(self):
        return self.value
        
    intParser = makeStateAutomata(IntPattern.rules, IntPattern.terminals, IntPattern.get)
    value = intParser(IntPattern(), '309')
                      ^- use an instance to store the parsing result 
  '''
  def startAutomata(obj, chars):
    state = 0
    i = 0
    for i, c in enumerate(chars):
      for rule in rules[state]:
        if c in rule:
          pair = rules[state][rule]
          state = pair[0]
          if pair[1] is not None:
            pair[1](obj, c)
          break
      else:
        state = 0
        break
    else:
      i += 1
    if state not in terminals:
        raise SyntaxError(chars + '\n' + ' '*i + '^')
    return resultGetter(obj)
  return startAutomata

def parser(clz):
  '''A convenient decorator to add a parse functionality to a type
  
  Example:
    @parser
    class IntPattern: # as described above
    
    value = IntPattern.parse('309')
  '''
  if not (hasattr(clz, 'rules') and hasattr(clz, 'terminals') and hasattr(clz, 'get')):
    raise TypeError(
        "type {!r} should has attributes 'rules', 'terminals' and 'get'".format(clz))
  clz.parser = makeStateAutomata(clz.rules, clz.terminals, clz.get)
  clz.parse = lambda chars: clz.parser(clz(), chars)
  return clz
BNF_syntax =  '''
rule = id "=" grammar
grammar = "\"" literal "\""
grammar = id
grammar = concat "|" concat
concat = optional " " optional
optional = "[" grammar "]"
optional = grammar "+"
optional = grammar "*"
embraced = "(" grammar ")"
'''
import string
# literal is all chars but line separators
precedence = {
  "|": 0,
  ".": 1,
  "+": 2,
  "*": 2,
  "[": 3,
  "(": 4,
  '"': 5,
}
binaryOps = {'|', '.'}

def toPrefix(chars):
  tokens = []
  parsestack = [0]
  quoted = False
  
  def insertOp(op):
    prec = precedence[op]
    tokens.append(op)
    print('stack = {}'.format(parsestack))
    print('insert {}, top is {}'.format(op, parsestack[-1]))
    for i in range(len(tokens) - 1, parsestack[-1], -1):
      if tokens[i-1] not in precedence or precedence[tokens[i-1]] >= prec:
        tokens[i-1], tokens[i] = tokens[i], tokens[i-1]
      elif i != len(tokens) - 1:
        tokens[i+1], tokens[i] = tokens[i], tokens[i+1]
        
  token = ''
  for c, n in zip(chars, chars[1:] + '!'):
    if c in string.whitespace:
      continue
    if c in string.ascii_letters:
      token += c
      if n not in string.ascii_letters:
        tokens.append(token)
        token = ''
        if n in string.whitespace:
          insertOp('.')
    if c == '"':
      if quoted:
        parsestack.pop()
        quoted = False
      else:
        quoted = True
        parsestack.append(len(tokens))
        insertOp('"')
    if c in (')', ']'):
      parsestack.pop()
      if n is not None and n in string.whitespace:
        insertOp('.')
    if c == '(':
      parsestack.append(len(tokens))
    if c == '[':
      insertOp('[')
      parsestack.append(len(tokens))
    if c in ('|', '*', '+'):
      insertOp(c)
      
  return tokens
      
def parseRule(line, mapping):
  id, grammar = line.split('=')
  tokens = toPrefix(grammar)
  print(tokens)
  nfa = NFA.fromTokens(tokens)
  if id in mapping:
    mapping[id] = mapping[id].either(nfa)
  else:
    mapping[id] = nfa
  

以上是关于python 由robturtle创建的DFA - https://repl.it/EzaW/47的主要内容,如果未能解决你的问题,请参考以下文章

C语言实现NFA转DFA

DFA 算法实现关键词匹配

由 MPDF 创建的双面打印 PDF

DFA和NFA

编译原理-NFA构造DFA

自己动手写编译器:从NFA到DFA