LR(1) 语法分析 (Python)
2022/1/10 20:08:06
本文主要是介绍LR(1) 语法分析 (Python),对大家解决编程问题具有一定的参考价值,需要的程序猿们随着小编来一起学习吧!
内容
- 生成 LR(1) 规范项集族
- 构造 LR(1) 分析表
- 自动机和分析表可视化
- 基于栈的自顶向上分析器
- 二义性文法处理(本程序中遇冲突直接覆写,可自由调整)
# 项:(prod_id, dot_pos, ahead) import copy def closure(prods, ps, first): ps = ps[:] for (prod_id, dot_pos, ahead) in ps: prod = prods[prod_id] if dot_pos + 1 < len(prod): cur = prod[dot_pos+1] # next_symbol = prod[dot_pos+2] if dot_pos+2 < len(prod) else ahead nss = [] tmpseq = prod[dot_pos+2:] + [ahead] i = 0 first['$'] = ['$'] while i < len(tmpseq): for x in first[tmpseq[i]]: if (x != '$' or tmpseq[i] == '$') and x not in nss: nss.append(x) if '$' not in first[tmpseq[i]]: break i += 1 for next_symbol in nss: for ex_prod_id, ex_prod in enumerate(prods): if ex_prod[0] == cur: new_item = (ex_prod_id, 0, next_symbol) if new_item not in ps: ps.append(new_item) return ps def goto(prods, ps, a, first): ans = [] for (i, j, ah) in ps: prod = prods[i] if j + 1 < len(prod): cur = prod[j + 1] if cur == a: new_item = (i, j+1, ah) if new_item not in ans: ans.append(new_item) return closure(prods, ans, first) def prod2str(prod, dotpos): prod = prod[:] prod.append("") prod[dotpos + 1] = '·' + prod[dotpos + 1] return prod[0] + '→' + ''.join(prod[1:]) def make_lr1(prods): table = [{}] words = list(set(j for i in prods for j in i)) nonterminals = list(set(i[0] for i in prods)) terminals = [i for i in words if i not in nonterminals] terminals .append('$') nonterminals.sort() terminals.sort() # 预处理 first 集合 first = {} for i in terminals: first[i] = [i] for i in nonterminals: first[i] = [] while True: old_first = copy.deepcopy(first) for prod in prods: left = prod[0] right = prod[1:] i = 0 while i < len(right): for x in first[right[i]]: if x != "$" and x not in first[left]: first[left].append(x) if "$" not in first[right[i]]: break i += 1 if i == len(right): if "$" not in first[left]: first[left].append("$") if old_first == first: break I0 = closure(prods, [(0, 0, '$')], first) cc = [I0] i = 0 while i < len(cc): ccc = {} for j in cc[i]: if (j[0], j[1]) not in ccc.keys(): ccc[(j[0], j[1])] = j[2] else: ccc[(j[0], j[1])] = j[2] ccc = [[i[0], i[1], j] for i, j in ccc.items()] print("\033[35m[I%d]:\n \033[31m" % i, '\n \033[31m'.join(prod2str(prods[j[0]], j[1])+' \t\033[33m'+j[2]+'\033[30m' for j in ccc), end="\n \033[32m") candiwords = [] for j in cc[i]: prod = prods[j[0]] dot_pos = j[1]+1 if dot_pos < len(prod) and prod[dot_pos] not in candiwords: candiwords.append(prod[dot_pos]) if dot_pos == len(prod): if prod[0] != "S'": if j[2] not in table[i].keys(): table[i][j[2]] = 'r%d' % j[0] else: table[i][j[2]] = 'r%d' % j[0] else: if '$' not in table[i].keys(): table[i]['$'] = 'acc' else: table[i]['$'] += 'acc' for j in candiwords: tmp = goto(prods, cc[i], j, first) if len(tmp) > 0 and tmp not in cc: cc.append(tmp) table.append({}) if len(tmp) > 0: print(j, cc.index(tmp), sep=",", end=" ") if j in terminals: if j not in table[i].keys(): table[i][j] = "s%d" % cc.index(tmp) else: table[i][j] = "s%d" % cc.index(tmp) else: table[i][j] = "%d" % cc.index(tmp) i += 1 print("\033[0m") head = ['I'] for j in terminals: head.append(j) for j in nonterminals: if j == "S'": continue head.append(j) print('---'.join(''+'-'*(5) for i in head)) print(' | '.join(i+' '*(5-len(i)) for i in head)) print('-+-'.join(''+'-'*(5) for i in head)) for i, line in enumerate(table): lo = [str(i)] for j in terminals: if j in line.keys(): lo.append(table[i][j]) else: lo.append("") for j in nonterminals: if j == "S'": continue if j in line.keys(): lo.append(table[i][j]) else: lo.append("") print(' | '.join(i+' '*(5-len(i)) for i in lo)) return table prod_file = open("prod.txt", "r") prod_lines = prod_file.readlines() prod_lines = [i[:-1] for i in prod_lines] prod_lines = [i[:-1] if i[-1] == ' ' else i for i in prod_lines] prods = [i.split(" ")[:1] + i.split(" ")[2:] for i in prod_lines] print(prods) table = make_lr1(prods) tokens = [] token_file = open("tokens.txt", "r") token_lines = token_file.readlines() token_lines = [i[:-1] for i in token_lines] token_lines = [i[:-1] if i[-1] == ' ' else i for i in token_lines] token_lines = [i.split(' ') for i in token_lines] tokens = [i[1] for i in token_lines] tokens_str = [i[2] for i in token_lines] tokens.append("$") tokens_str.append("") stack_token = ['#'] stack_state = [0] input_ptr = 0 while len(stack_token) > 0: cur_token = tokens[input_ptr] cur_state = stack_state[-1] if cur_token not in table[cur_state].keys(): print("errorA") break action = table[cur_state][cur_token] if action == "acc": print("succeed") break elif action[0] == 's': next_state = int(action[1:]) stack_token.append(cur_token) stack_state.append(next_state) input_ptr += 1 print("shift", cur_token) else: prod_id = int(action[1:]) prod_len = len(prods[prod_id]) - 1 print("reduce", prods[prod_id]) stack_token = stack_token[:-prod_len] stack_state = stack_state[:-prod_len] cur_state = stack_state[-1] goto_nt = prods[prod_id][0] if goto_nt not in table[cur_state].keys(): print("errorB") break next_state = int(table[cur_state][goto_nt]) stack_token.append(goto_nt) stack_state.append(next_state)
文法示例
S' -> L L -> L S L -> S S -> S0 ; S -> { L } S0 -> Sa Sa -> Id = E S -> IfC S S -> IfCEl S IfCEl -> IfC S else IfC -> if ( C ) C -> C1 C -> Ca C1 Ca -> C && C1 -> C2 C1 -> Co C2 Co -> C1 || C2 -> C3 C2 -> ! C3 C3 -> ( C ) C -> E Rop E Rop -> == Rop -> != Rop -> < Rop -> <= Rop -> > Rop -> >= E -> E + Et E -> E - Et E -> Et Et -> Et * Ef Et -> Et / Ef Et -> Et % Ef Et -> Ef Ef -> ( E ) Ef -> Id E -> Const
项集族输出示例
分析表输出示例
分析输入示例
if (1 == 2) if (3 == 4) a = 1; else a = 2; else a = 3;
分析输出示例
这篇关于LR(1) 语法分析 (Python)的文章就介绍到这儿,希望我们推荐的文章对大家有所帮助,也希望大家多多支持为之网!
- 2025-01-03用FastAPI掌握Python异步IO:轻松实现高并发网络请求处理
- 2025-01-02封装学习:Python面向对象编程基础教程
- 2024-12-28Python编程基础教程
- 2024-12-27Python编程入门指南
- 2024-12-27Python编程基础
- 2024-12-27Python编程基础教程
- 2024-12-27Python编程基础指南
- 2024-12-24Python编程入门指南
- 2024-12-24Python编程基础入门
- 2024-12-24Python编程基础:变量与数据类型