• 主页
  • 相册
  • 随笔
  • 目录
  • 存档
Total 244
Search AboutMe

  • 主页
  • 相册
  • 随笔
  • 目录
  • 存档

正则式到dfa最小化实现

2019-10-30

python实现:

  • re到NFA
  • NFA到DFA
  • DFA最小化

注:1.非完全实现,不支持[a-zA-Z] (当然如果需要的话也可以很轻松地改为完全实现)2.存储变量的数据结构设计很糟糕..

1. re到DFA最小化

_class.py

  • Regex类:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    class Regex():
    def __init__(self, rstr):
    self.rstr = rstr
    self.out = []
    self.ops = []
    self.dic = {'*': 3, '.': 2, '|': 1}

    def getWeight(self, ch):
    return self.dic[ch]

    def reshape(self):
    i = 0
    while(i < len(self.rstr[:])-1):
    if(
    (re.match("[0-9]", self.rstr[i+1]) and
    re.match("[0-9\*\)]", self.rstr[i])) or
    re.match("[\(]", self.rstr[i+1]) and
    re.match("[0-9]", self.rstr[i])
    ):
    _ = list(self.rstr) # string to list
    _.insert(i+1, '.')
    self.rstr = "".join(_)
    i += 1

    def postfix(self):
    for ch in self.rstr:
    if(ch == '('):
    self.ops.append(ch)
    elif(ch == ')'):
    while(self.ops[-1] != '('):
    self.out.append(self.ops.pop())
    self.ops.pop()
    elif(re.match('[\*\.\|]', ch)):
    while(len(self.ops) != 0 and self.ops[-1] != '(' and self.getWeight(self.ops[-1]) >= self.getWeight(ch)):
    self.out.append(self.ops.pop())
    self.ops.append(ch)
    else:
    self.out.append(ch)
    while(len(self.ops) != 0):
    self.out.append(self.ops.pop())
    • reshape:将省略的连接符号*用.代替,注意到对括号的额外处理
    • postfix:转换为逆波兰式,注意对单目运算符和括号的处理^1
      • ops:符号栈
      • out:记录最终结果
    • 添加.:先把string转为list,然后调用insert(),再用join()转回来
      • string是immutable,所以一般要转为mutable的list
  • Base基类:

    1
    2
    3
    4
    5
    6
    7
    class Base():
    def __init__(self):
    self.set1 = set()
    self.func = defaultdict(set)
    self.cnt = 0
    self.nfq = ()
    self.setS = {'0', '1'}
    • set1:状态的有限集合
    • nfq:初态与终态
    • setS:输入符号有限集合
    • func:映射关系
  • NFA类:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    class NFA(Regex, Base):
    def __init__(self, rstr):
    Regex.__init__(self, rstr)
    Base.__init__(self)
    self.stack = []

    def nfa(self):
    for o in self.out[:]:
    if(re.match("[0-9]", o)):
    self.calc(self.out.pop(0))
    else:
    if(re.match("\*", o)):
    op = self.out.pop(0)
    a = self.stack.pop()
    self.calc(op, a, a)
    else:
    op = self.out.pop(0)
    b = self.stack.pop()
    a = self.stack.pop()
    self.calc(op, a, b)
    self.nfq = self.stack.pop()

    def calc(self, op, a=-1, b=-1):
    if(op == '.'):
    self.func[a[1]].add(('e', b[0]))
    self.stack.append((a[0], b[1]))
    else:
    temp = list()
    for i in range(2):
    self.set1.add(str(self.cnt)) # auto remove same element
    temp.append(str(self.cnt))
    self.cnt += 1
    if(op == '*'):
    self.func[temp[0]].add(('e', a[0]))
    self.func[temp[0]].add(('e', temp[1]))
    self.func[a[1]].add(('e', a[0]))
    self.func[a[1]].add(('e', temp[1]))
    elif(op == '|'):
    self.func[temp[0]].add(('e', a[0]))
    self.func[temp[0]].add(('e', b[0]))
    self.func[a[1]].add(('e', temp[1]))
    self.func[b[1]].add(('e', temp[1]))
    else:
    self.func[temp[0]].add((op, temp[1]))
    self.stack.append((temp[0], temp[1]))
    • nfa.stack:根据逆波兰式计算的临时结果存储^1
    • Thompson构造法^2
  • DFA类:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    class DFA(NFA):
    def __init__(self, rstr):
    super().__init__(rstr)
    self.cdic = dict()
    self.ddic = defaultdict(set)
    self.edic = defaultdict(set)
    self.set2 = set()
    self.cmax = 0
    self.final=None

    def dfa(self):
    tps = set()
    for i in self.set1:
    for j in self.func[i]:
    if (j[0] != 'e'):
    self.edic[(j[0], i)].add(j[1])
    self.set2.add(str(self.cmax))
    self.cdic[str(self.cmax)] = self.clousre(self.nfq[0], set(self.nfq[0]))
    dot = 0
    while(dot <= self.cmax):
    for i in self.setS:
    self.move(dot, i)
    dot += 1

    def clousre(self, i, tset):
    for j in self.func[i]:
    if(j[0] == 'e'):
    tset |= self.clousre(j[1], tset)
    tset.add(j[1])
    return tset

    def move(self, dot, i):
    tp = set()
    tp2 = set()
    for j in self.cdic[str(dot)]: # move('A',j)
    tp |= self.edic[(i, j)]
    for n in tp:
    tp2 |= self.clousre(n, set())
    tp2 |= tp
    if(tp2):
    if(tp2 not in self.cdic.values()): # new value
    self.cmax += 1
    self.cdic[str(self.cmax)] = tp2
    if(self.nfq[1] in tp):
    self.final = str(self.cmax)
    self.set2.add(str(self.cmax))
    self.ddic[str(dot)].add((i, str(self.cmax)))
    else: # new value
    for k, v in self.cdic.items():
    if(v == tp2):
    self.ddic[str(dot)].add((i, str(k)))
    • dfa.edic:由于之前没按NFA和DFA的定义设计合适的数据结构,因此要多转换一下
    • dfa.cdic:记录重命名前后的映射关系
    • dfa.ddic:映射关系
    • set2:状态的有限集合
  • sDFA:

    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
    42
    43
    44
    45
    46
    47
    48
    49
    50
    51
    52
    53
    54
    55
    56
    57
    58
    59
    60
    61
    62
    63
    64
    65
    66
    67
    68
    69
    70
    71
    72
    class sDFA(DFA):
    def __init__(self, rstr):
    super().__init__(rstr)
    self.fdic = dict()

    def hopcroft(self):
    self.dfq = ('0', self.final)
    self.set3 = self.set2.copy()
    self.set3.remove(self.dfq[1])


    self.li = list()
    self.li.append(list(self.set3.copy()))
    self.li.append([self.dfq[1]])
    # li = [ [1,2,3], [4] ]
    gdic = defaultdict(int) # set default 0

    for temp, i in enumerate(self.li):
    for j in i:
    gdic[j] = temp
    self.c = temp

    for i in self.set2:
    for j in self.ddic[i]:
    self.fdic[(j[0], i)] = j[1]

    while True:
    tp = len(self.li)
    flg = -1 # 第一次并记录下标
    flg2 = False # 产生新分组
    for v, i in enumerate(self.li[:]): # v-> 0,1 i-> [0,1,2] [3]
    for n in self.setS: # n in {'0','1'}
    for j in i[:]: # j in [0,1,2]
    if((n, j) in self.fdic.keys()):
    if(flg == -1 or flg == gdic[self.fdic[(n, j)]]):
    flg = gdic[self.fdic[(n, j)]]
    else:
    self.li[v].remove(j)
    self.li.append([j])
    self.c += 1
    gdic[j] = self.c
    flg2 = True
    flg = -1
    if(flg2):
    flg2 = False
    break
    if(tp == len(self.li)):
    break

    # li分好组,合并同类组
    t = None
    flg = False
    for v,i in enumerate(self.li[:]):
    for j in i[:]:
    if flg == False: # 有可合并项
    flg = True
    t = j
    else:
    self.conversion(j, t,v)
    flg = False
    t = None
    self.set3 = set(x for y in self.li for x in y)

    def conversion(self, t1, t2,v):
    for i in self.set2:
    tp = self.ddic[i].copy()
    for j in tp:
    if(j[1] == t1):
    self.ddic[i].add((j[0], t2))
    self.ddic[i].remove(j)
    self.ddic[t2] |= self.ddic[t1]
    self.li[v].remove(t1)
    • self.final

      • dfa终态的确定:包含nfa终态的cdic代表的结点

      • 本算法中,dfa最大序号结点不能代表终态结点,如:

    • hopcroft

      • 子集划分法^3
    • set3:状态的有限集合

    • dfq:初态和终态

    • fdic:同edic

    • conversion:合并等价状态

一文通:参考链接

2. tkinter包装

_main.py

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
import tkinter as tk
import graphviz as gv
from PIL import Image, ImageTk
from _class import *


class automata():
def __init__(self):
self.rstr = ""
self.window = None
self.addr = list()

def resize(self, w_box, h_box, pil_image): # 参数是:要适应的窗口宽、高、Image.open后的图片
w, h = pil_image.size # 获取图像的原始大小
f1 = 1.0*w_box/w
f2 = 1.0*h_box/h
factor = min([f1, f2])
width = int(w*factor)
height = int(h*factor)
return pil_image.resize((width, height), Image.ANTIALIAS)

def drawDot(self, g, ch1, ch2, w):
g.node(name=str(ch1))
g.node(name=str(ch2))
g.edge(str(ch1), str(ch2), label=w)
#g.edge_attr['label']=w

def drawGraph(self, g, sett, func, filename):
for i in sett:
for j in func[i]:
self.drawDot(g, i, j[1], j[0])
g.render(filename, format='png')
self.addr.append(filename+".png")

def drawWindow(self):
self.window = tk.Tk()
self.window.title('DFA CONVERT')
self.window.geometry('800x600+100+100')
lb1 = tk.Label(self.window,
text='input ur fucking re asshole', # 标签的文字
bg='pink', # 背景颜色
font=('Arial', 12), # 字体和字体大小
height=2) # 标签长宽
lb1.pack()
txt = tk.Entry(self.window)
txt.insert(40, 'type re expression in 40')
txt.pack()

def imgpack(i, sett, dicc, filename):
g = gv.Digraph()
self.drawGraph(g, sett, dicc, filename)
img = Image.open(self.addr[i])
img = self.resize(600, 400, img)
img = ImageTk.PhotoImage(img)
lb2 = tk.Label(self.window, image=img)
lb2.image = img
lb2.pack(side=tk.LEFT, padx=25)

def sGet():
self.rstr = txt.get()
N = sDFA(self.rstr)
N.reshape()
N.postfix()

N.nfa()
imgpack(0, N.set1, N.func, "NFA")

N.dfa()
imgpack(1, N.set2, N.ddic, "DFA")

N.hopcroft()
imgpack(2, N.set3, N.ddic, "sDFA")
btn = tk.Button(self.window, text='键入', width=15,
height=2, command=sGet)
btn.pack()
self.window.mainloop()

def run(self):
self.drawWindow()


if __name__ == "__main__":
t = automata()
t.run()

wsl-ubuntu不支持绘图真是…

  • resize:轮子。^4显示本地上的图片并且随着窗口而改变大小的代码(不然容易超出窗口而显示不全)。
  • 不能动态跟新图片,偷懒(不过目前我也不知道怎么实现)

3. 结果展示

  • Lab
  • Compiler Construction
  • Lab
mysql备忘录
实验:图像处理:pnsr~q曲线
© 2024 何决云 载入天数...