实验:四元式编译器实现

2020-01-10

1. 实验目的

实现一个支持对四则运算与条件转移四元式翻译的简易编译器

2. 实验内容

3. 词法分析

from collections import namedtuple, deque
import re
from beeprint import pp

_BLANK = re.compile(r'[\s]')
_DIGIT = re.compile(r'[0-9]')
_ALPHA = re.compile(r'[_a-zA-Z]')
_EQUAL = re.compile(r'=')
_GREATER_LESS = re.compile(r'[<>]')
lexer = namedtuple('lexer', ['type', 'value'])
tokens_rw = ('IF', 'ELSE', 'END')
tokens_op = ('PLUS', 'MINUS', 'TIMES', 'DIVIDE', 'LPAREN', 'RPAREN','GREATER','GREATERQ','LESS','LESSQ')
tokens_num = ('NUMBER',)
file = open('1.txt', 'rb')
tokens = deque()


def getChar():
    return file.read(1).decode()


def unitDigit(wstr):
    while True:
        ch = getChar()
        if not _DIGIT.match(ch):
            break
        wstr += ch
    file.seek(-1, 1)
    unit = lexer('NUMBER', wstr)
    tokens.append(unit)


def unitAlpha(wstr):
    while True:
        ch = getChar()
        if not _DIGIT.match(ch) and not _ALPHA.match(ch):
            break
        wstr += ch
    file.seek(-1, 1)
    unit = lexer('RESERVED', wstr) if wstr in tokens_rw else lexer(
        'IDENTIFIER', wstr)
    tokens.append(unit)


def unitEqual(wstr):
    unit = lexer('EQUAL', wstr)
    tokens.append(unit)


def unitGL(wstr):
    ch = getChar()
    if not _EQUAL.match(ch):
        file.seek(-1, 1)
        unit = lexer('GREATER', wstr) if wstr == '>' else lexer('LESS', wstr)
    else:
        wstr += ch
        unit = lexer('GREATERQ', wstr) if wstr == '>=' else lexer(
            'LESSQ', wstr)
    tokens.append(unit)


def unitSymbol(wstr):
    if wstr == '+':
        unit = lexer('PLUS', wstr)
    elif wstr == '-':
        unit = lexer('MINUS', wstr)
    elif wstr == '*':
        unit = lexer('TIMES', wstr)
    elif wstr == '/':
        unit = lexer('DIVIDE', wstr)
    elif wstr == '(':
        unit = lexer('LPAREN', wstr)
    elif wstr == ')':
        unit = lexer('RPAREN', wstr)
    else:
        raise
    tokens.append(unit)


def excLexer():
    while(file.read(1)):
        file.seek(-1, 1)
        for ch in iter(lambda: getChar(), '\n'):
            while _BLANK.match(ch):
                ch = getChar()
            if _DIGIT.match(ch):
                unitDigit(ch)
            elif _ALPHA.match(ch):
                unitAlpha(ch)
            elif _EQUAL.match(ch):
                unitEqual(ch)
            elif _GREATER_LESS.match(ch):
                unitGL(ch)
            else:
                unitSymbol(ch)
        tokens.append(lexer('EOL','#'))

简要说明

token划分标准：空白符、数字、字母、等号、大\小于号、加减乘除等其余符号
退回操作：例如在检测到大\小于号时，需要向前继续查看下一个字符是数字还是等号(其余情况转错误处理)，如果为数字，则将其退回缓冲区
- python实现退回：file.seek(-1, 1)
token格式：(type,value)，例如(NUMBER, 10)

4. LL1制导的语义分析

cur_tok = None
cnt = 0
lcnt = 0
tac = list()
lbl = list()
# name=locals()


def getNewLbl():
    global lcnt
    lcnt += 1
    return "LBL{idx}".format(idx=str(lcnt))


def getNewVar(_type):
    global cnt
    cnt += 1
    return lexer(_type, 'var'+str(cnt))


def get_token():
    global cur_tok
    cur_tok = tokens.popleft()
    return cur_tok


def match(_, val=''):
    global cur_tok
    t, v = cur_tok.type, cur_tok.value
    if t in _ or t in tokens_op and v == val:
        get_token()
    else:
        raise


def stat():
    global cur_tok
    t, v = cur_tok.type, cur_tok.value
    tmp = comp()
    lbl.append(getNewLbl())
    unit = ('jnz', tmp, '_', lbl[-1])
    tac.append(unit)


def comp():
    get_token()
    global cur_tok
    t, v = cur_tok.type, cur_tok.value
    ltmp = expr() if t == 'NUMBER' else v

    get_token()
    tt, vv = cur_tok.type, cur_tok.value

    get_token()
    ttt, vvv = cur_tok.type, cur_tok.value
    rtmp = expr() if ttt == 'NUMBER' else vvv

    var = getNewVar('BOOL')
    unit = (vv, ltmp, rtmp, var.value)
    tac.append(unit)
    return var.value


def attr():
    global cur_tok
    t, v = cur_tok.type, cur_tok.value
    get_token()
    get_token()
    tt, vv = cur_tok.type, cur_tok.value
    var = expr() if tt == 'NUMBER' else vv
    unit = ('=', var, '_', v)
    tac.append(unit)


def expr():
    global cur_tok
    tmp = term()
    t, v = cur_tok.type, cur_tok.value
    while v == '+' or v == '-':
        match(tokens_op)
        rhs = term()
        var = getNewVar(t)
        unit = (v, tmp, rhs, var.value)
        tac.append(unit)
        tmp = var.value
        t, v = cur_tok.type, cur_tok.value
    return tmp


def term():
    global cur_tok
    tmp = factor()
    t, v = cur_tok.type, cur_tok.value
    while v == '*' or v == '/':
        match(tokens_op)
        rhs = factor()
        var = getNewVar(t)
        unit = (v, tmp, rhs, var.value)
        tac.append(unit)
        tmp = var.value
        t, v = cur_tok.type, cur_tok.value
    return tmp


def factor():
    global cur_tok
    t, v = cur_tok.type, cur_tok.value
    if t in tokens_num:
        match(tokens_num)
        return v
    elif v == '(':
        match(tokens_op, '(')
        tmp = expr()
        match(tokens_op, ')')
        return tmp
    else:
        raise


def switch():
    get_token()
    global cur_tok
    if cur_tok.type == 'NUMBER':
        expr()
    elif cur_tok.type == 'RESERVED':
        if cur_tok.value == 'IF':
            stat()
        elif cur_tok.value == 'ELSE':
            unit = ('lbl', lbl[-1], '_', '_')
            tac.append(unit)
            lbl.pop()
        elif cur_tok.value == 'END':
            while lbl != list():
                unit = ('lbl', lbl[-1], '_', '_')
                tac.append(unit)
                lbl.pop()
            unit = ('end', '_', '_', '_')
            tac.append(unit)
        else:
            if tokens[0].type == 'EOL':
                get_token()
            else:
                raise
    elif cur_tok.type == 'IDENTIFIER':
        attr()
    else:
        if tokens != deque() and tokens[0].type == 'EOL':
            get_token()


if __name__ == "__main__":
    excLexer()
    while tokens != deque():
        switch()
    pp(tac)

简要说明

expr+term+factor：四则运算
attr：赋值
- 读取token.type=='EQUAL'，再调用expr，相当于x = expression
- tac：记录变量的“符号表”，由列表实现
comp：比较
stat：IF...ELSE...THEN
- 记录goto LABEL的”符号表”，由栈实现
getNewVar：创建新的变量
getNewLbl：创建新的LABEL标识

5. 实验结果

测试输入

x=1+2*3
y=x
z=2
IF x>y z=x ELSE IF z>y z=y END

注意以LF作为换行

输出

## 说明

BUG⚠：在stat里涉及常量与常量，常量与变量的会出现错误的结果，不过我是懒得修了…