• 主页
  • 相册
  • 随笔
  • 目录
  • 存档
Total 244
Search AboutMe

  • 主页
  • 相册
  • 随笔
  • 目录
  • 存档

实验:四元式编译器实现

2020-01-10

1. 实验目的

  • 实现一个支持对四则运算与条件转移四元式翻译的简易编译器

2. 实验内容

3. 词法分析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
from collections import namedtuple, deque
import re
from beeprint import pp

_BLANK = re.compile(r'[\s]')
_DIGIT = re.compile(r'[0-9]')
_ALPHA = re.compile(r'[_a-zA-Z]')
_EQUAL = re.compile(r'=')
_GREATER_LESS = re.compile(r'[<>]')
lexer = namedtuple('lexer', ['type', 'value'])
tokens_rw = ('IF', 'ELSE', 'END')
tokens_op = ('PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'LPAREN', 'RPAREN','GREATER','GREATERQ','LESS','LESSQ')
tokens_num = ('NUMBER',)
file = open('1.txt', 'rb')
tokens = deque()


def getChar():
return file.read(1).decode()


def unitDigit(wstr):
while True:
ch = getChar()
if not _DIGIT.match(ch):
break
wstr += ch
file.seek(-1, 1)
unit = lexer('NUMBER', wstr)
tokens.append(unit)


def unitAlpha(wstr):
while True:
ch = getChar()
if not _DIGIT.match(ch) and not _ALPHA.match(ch):
break
wstr += ch
file.seek(-1, 1)
unit = lexer('RESERVED', wstr) if wstr in tokens_rw else lexer(
'IDENTIFIER', wstr)
tokens.append(unit)


def unitEqual(wstr):
unit = lexer('EQUAL', wstr)
tokens.append(unit)


def unitGL(wstr):
ch = getChar()
if not _EQUAL.match(ch):
file.seek(-1, 1)
unit = lexer('GREATER', wstr) if wstr == '>' else lexer('LESS', wstr)
else:
wstr += ch
unit = lexer('GREATERQ', wstr) if wstr == '>=' else lexer(
'LESSQ', wstr)
tokens.append(unit)


def unitSymbol(wstr):
if wstr == '+':
unit = lexer('PLUS', wstr)
elif wstr == '-':
unit = lexer('MINUS', wstr)
elif wstr == '*':
unit = lexer('TIMES', wstr)
elif wstr == '/':
unit = lexer('DIVIDE', wstr)
elif wstr == '(':
unit = lexer('LPAREN', wstr)
elif wstr == ')':
unit = lexer('RPAREN', wstr)
else:
raise
tokens.append(unit)


def excLexer():
while(file.read(1)):
file.seek(-1, 1)
for ch in iter(lambda: getChar(), '\n'):
while _BLANK.match(ch):
ch = getChar()
if _DIGIT.match(ch):
unitDigit(ch)
elif _ALPHA.match(ch):
unitAlpha(ch)
elif _EQUAL.match(ch):
unitEqual(ch)
elif _GREATER_LESS.match(ch):
unitGL(ch)
else:
unitSymbol(ch)
tokens.append(lexer('EOL','#'))

简要说明

  • token划分标准:空白符、数字、字母、等号、大\小于号、加减乘除等其余符号
  • 退回操作:例如在检测到大\小于号时,需要向前继续查看下一个字符是数字还是等号(其余情况转错误处理),如果为数字,则将其退回缓冲区
    • python实现退回:file.seek(-1, 1)
  • token格式:(type,value),例如(NUMBER, 10)

4. LL1制导的语义分析

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
cur_tok = None
cnt = 0
lcnt = 0
tac = list()
lbl = list()
# name=locals()


def getNewLbl():
global lcnt
lcnt += 1
return "LBL{idx}".format(idx=str(lcnt))


def getNewVar(_type):
global cnt
cnt += 1
return lexer(_type, 'var'+str(cnt))


def get_token():
global cur_tok
cur_tok = tokens.popleft()
return cur_tok


def match(_, val=''):
global cur_tok
t, v = cur_tok.type, cur_tok.value
if t in _ or t in tokens_op and v == val:
get_token()
else:
raise


def stat():
global cur_tok
t, v = cur_tok.type, cur_tok.value
tmp = comp()
lbl.append(getNewLbl())
unit = ('jnz', tmp, '_', lbl[-1])
tac.append(unit)


def comp():
get_token()
global cur_tok
t, v = cur_tok.type, cur_tok.value
ltmp = expr() if t == 'NUMBER' else v

get_token()
tt, vv = cur_tok.type, cur_tok.value

get_token()
ttt, vvv = cur_tok.type, cur_tok.value
rtmp = expr() if ttt == 'NUMBER' else vvv

var = getNewVar('BOOL')
unit = (vv, ltmp, rtmp, var.value)
tac.append(unit)
return var.value


def attr():
global cur_tok
t, v = cur_tok.type, cur_tok.value
get_token()
get_token()
tt, vv = cur_tok.type, cur_tok.value
var = expr() if tt == 'NUMBER' else vv
unit = ('=', var, '_', v)
tac.append(unit)


def expr():
global cur_tok
tmp = term()
t, v = cur_tok.type, cur_tok.value
while v == '+' or v == '-':
match(tokens_op)
rhs = term()
var = getNewVar(t)
unit = (v, tmp, rhs, var.value)
tac.append(unit)
tmp = var.value
t, v = cur_tok.type, cur_tok.value
return tmp


def term():
global cur_tok
tmp = factor()
t, v = cur_tok.type, cur_tok.value
while v == '*' or v == '/':
match(tokens_op)
rhs = factor()
var = getNewVar(t)
unit = (v, tmp, rhs, var.value)
tac.append(unit)
tmp = var.value
t, v = cur_tok.type, cur_tok.value
return tmp


def factor():
global cur_tok
t, v = cur_tok.type, cur_tok.value
if t in tokens_num:
match(tokens_num)
return v
elif v == '(':
match(tokens_op, '(')
tmp = expr()
match(tokens_op, ')')
return tmp
else:
raise


def switch():
get_token()
global cur_tok
if cur_tok.type == 'NUMBER':
expr()
elif cur_tok.type == 'RESERVED':
if cur_tok.value == 'IF':
stat()
elif cur_tok.value == 'ELSE':
unit = ('lbl', lbl[-1], '_', '_')
tac.append(unit)
lbl.pop()
elif cur_tok.value == 'END':
while lbl != list():
unit = ('lbl', lbl[-1], '_', '_')
tac.append(unit)
lbl.pop()
unit = ('end', '_', '_', '_')
tac.append(unit)
else:
if tokens[0].type == 'EOL':
get_token()
else:
raise
elif cur_tok.type == 'IDENTIFIER':
attr()
else:
if tokens != deque() and tokens[0].type == 'EOL':
get_token()


if __name__ == "__main__":
excLexer()
while tokens != deque():
switch()
pp(tac)

简要说明

  • expr+term+factor:四则运算

  • attr:赋值

    • 读取token.type=='EQUAL',再调用expr,相当于x = expression
    • tac:记录变量的“符号表”,由列表实现
  • comp:比较

  • stat:IF...ELSE...THEN

    • 记录goto LABEL的”符号表”,由栈实现
  • getNewVar:创建新的变量

  • getNewLbl:创建新的LABEL标识

5. 实验结果

测试输入

1
2
3
4
x=1+2*3
y=x
z=2
IF x>y z=x ELSE IF z>y z=y END
  • 注意以LF作为换行

输出

## 说明

BUG⚠:在stat里涉及常量与常量,常量与变量的会出现错误的结果,不过我是懒得修了…

  • Lab
  • Compiler Construction
  • Lab
IATHook注入与劫持实验
操作系统备忘录-硬盘存储器与操作系统接口
© 2024 何决云 载入天数...