求first集,follow集,打印预测分析表,最后实现总控程序。
注释写的很清楚啦就不写实验思路了qwq(主要是懒
源代码:
import sys
import operator
import copy
#终结符
terSymbol = ['a','b','c','d','e','f','g','h','i','j','k','l','m','n','o','p','q','r','s','t','u','v','w','x','y','z',
'+','-','*','/','(',')','^']
#非终结符
non_ter = ['A','B','C','D','E','F','G','H','I','J','K','L','M','N','O','P','Q','R','S','T','U','V','W','X','Y','Z']
firstSET = [[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]
followSET = [[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[],[]]
#求first集
def first(gra_line):
indexSET = []
x = gra_line[0] #x是开头的非终结符
ind = non_ter.index(x) #firstSET[ind]是当前非终结符的first集
#找到所有产生式右部的第一个字符,下标值返回列表indexSET
i = 0
while i < len(gra_line):
if gra_line[i] == '>' or gra_line[i] == '|':
indexSET.append(i + 1)
i += 1
#判断这些字符是否为终结符、空、非终结符,i是下标啊!gra_line[i]才是遍历的那个字符
for i in indexSET:
if gra_line[i] in terSymbol or gra_line[i] == '@':
if gra_line[i] not in firstSET[ind]:
firstSET[ind].append(gra_line[i])
elif gra_line[i] in non_ter:
#先把gra_line[i]的first集的非空元素加进去
if len(firstSET[non_ter.index(gra_line[i])]) == 0:
for gr in grammer:
if gr.startswith(gra_line[i]):
first(gr)
break
for g in firstSET[non_ter.index(gra_line[i])]:
if g != '@' and g not in firstSET[ind]:
firstSET[ind].append(g)
#再判断一堆空的那个情况
j = i #i是indexSET遍历用的,不能改,j用来遍历gra_line好了
while '@' in firstSET[non_ter.index(gra_line[j])]:
j += 1
#全包含空就把空加进去
if gra_line[j] == '|' or gra_line[j] == '\n':
firstSET[ind].append('@')
break
#把gra_line[j]的first集的非空元素加进去
if len(firstSET[non_ter.index(gra_line[j])]) == 0:
for gr in grammer:
if gr.startswith(gra_line[j]):
first(gr)
break
for g in firstSET[non_ter.index(gra_line[j])]:
if g != '@' and g not in firstSET[ind]:
firstSET[ind].append(g)
return firstSET
#求字符串的first集
def firststr(str):
firsts = []
if str == '@':
firsts.append('@')
return firsts
else:
for s in str:
if s in terSymbol:
firsts.append(s)
return firsts
elif '@' not in firstSET[non_ter.index(s)]:
for f in firstSET[non_ter.index(s)]:
if f not in firsts:
firsts.append(f)
return firsts
else:
for f in firstSET[non_ter.index(s)]:
if f != '@' and f not in firsts:
firsts.append(f)
if str.endswith(s):
firsts.append('@')
return firsts
#求follow集
def follow(gra_line):
#如果是开始符号则把#加入它的follow集
if gra_line[0] == grammer[0][0] and '#' not in followSET[non_ter.index(gra_line[0])]:
followSET[non_ter.index(gra_line[0])].append('#')
#查找产生式右部的非终结符并把下标加到nonIndex中,注意不能用index是因为可能会有重复的字符
nonIndex = []
g = 0
while g < len(gra_line):
if gra_line[g] in non_ter:
nonIndex.append(g)
g += 1
#从第二个非终结符开始判断它的位置,注意n是该非终结符在gra_line中的下标,gra_line[n]才是遍历的那个非终结符
for n in nonIndex[1:]:
#如果该终结符在产生式最右,则把follow(nonIndex[0])加入follow(n)中
if gra_line[n + 1] == '|' or gra_line[n + 1] == '\n':
for i in followSET[non_ter.index(gra_line[0])]:
if i not in followSET[non_ter.index(gra_line[n])]:
followSET[non_ter.index(gra_line[n])].append(i)
#如果该终结符不在最后
else:
#先把该终结符后的字符串截取出来
str = ''
for s in gra_line[n + 1:]:
if s == '|' or s == '\n':
break
else:
str = str + s
#如果字符串的first集中含有空
if '@' in firststr(str):
for i in followSET[non_ter.index(gra_line[0])]:
if i not in followSET[non_ter.index(gra_line[n])]:
followSET[non_ter.index(gra_line[n])].append(i)
for i in firststr(str):
if i != '@' and i not in followSET[non_ter.index(gra_line[n])]:
followSET[non_ter.index(gra_line[n])].append(i)
#如果字符串的first集中不含有空
else:
for i in firststr(str):
if i != '@' and i not in followSET[non_ter.index(gra_line[n])]:
followSET[non_ter.index(gra_line[n])].append(i)
return followSET
def cmp(SET1, SET2):
i = 0
while i < 26:
if (operator.eq(SET1[i], SET2[i])) == False:
return False
else:
i += 1
return True
#构造预测分析表
def table(grammer, column, row):
col = len(column) + 1
ro = len(row) + 1
#创建一个c列r行的二维列表
gra = [['\t' for col in range(col)] for row in range(ro)]
gra[0][0] = ' '
i = 1
for c in column:
gra[0][i] = c
i += 1
j = 1
for r in row:
gra[j][0] = r
j += 1
for gra_line in grammer:
# 先看每一行有几个产生式,把每一个产生式的开始索引加入pro_index
pro_index = []
i = 0
while i < len(gra_line):
if gra_line[i] == '>' or gra_line[i] == '|':
pro_index.append(i + 1)
i += 1
for p in pro_index:
str = ''
for s in gra_line[p:]:
if s == '|' or s == '\n':
break
else:
str = str + s
#找到了单独的产生式后,看这个产生式的first集,注意产生式是单独一个空的情况
if str == '@':
for f in followSET[non_ter.index(gra_line[0])]:
r = row.index(gra_line[0]) + 1
c = column.index(f) + 1
pro = gra_line[0] + '->' + str
gra[r][c] = pro
else:
for n in firststr(str):
if n == '@':
continue
r = row.index(gra_line[0]) + 1
c = column.index(n) + 1
pro = gra_line[0] + '->' + str
gra[r][c] = pro
if '@' in firststr(str):
for f in followSET[non_ter.index(gra_line[0])]:
r = row.index(gra_line[0]) + 1
c = column.index(f) + 1
pro = gra_line[0] + '->' + str
gra[r][c] = pro
return gra
#总控程序
def master(ana_str, column, row):
flag = 0
ana_str += '#'
stack = ['#']
stack.append(grammer[0][0])
while flag == 0:
print("当前栈中元素为:")
print(stack)
print("当前字符串为:")
print(ana_str)
if stack[-1] == ana_str[0] and stack[-1] == '#':
print("匹配成功,分析结束")
flag = 1
return True
elif stack[-1] == ana_str[0] and stack[-1] != '#':
stack.pop()
ana_str = ana_str[1:]
print("非终结符匹配成功")
elif stack[-1] in non_ter:
if ana_str[0] not in column:
print("匹配失败")
flag = 1
return False
elif gra[row.index(stack[-1]) + 1][column.index(ana_str[0]) + 1] != '\t':
str = gra[row.index(stack[-1]) + 1][column.index(ana_str[0]) + 1]
print("当前所用产生式为:")
print(str)
stack.pop()
right_str = str[3:]
right_str = right_str[::-1]
if right_str == '@':
continue
else:
for s in right_str:
stack.append(s)
else:
print("匹配失败")
flag = 1
return False
else:
print("匹配失败")
flag = 1
return False
if __name__ == '__main__':
ana_str = input("请输入一个待分析的串:")
print("请输入一个文法:")
grammer = sys.stdin.readlines()
for gr in grammer:
first(gr)
print("该文法的非终结符的first集为:")
i = 0
for f in firstSET:
if len(f) != 0:
print('first(' + non_ter[i] + '):',f)
i += 1
print()
#求follow集用的循环……因为我没想到该怎么递归
#关于递归:如何在需要把follow(A)加入follow(B)的时候准确求出follow(A)???????
#一个易错点:直接把followSET赋给follow1,会被引用。用切片可以避免普通列表的引用,但是对于嵌套列表好像没什么用
follow1 = copy.deepcopy(followSET)
for gr in grammer:
follow(gr)
follow2 = copy.deepcopy(followSET)
while not cmp(follow1, follow2):
follow1 = copy.deepcopy(followSET)
for gr in grammer:
follow(gr)
follow2 = copy.deepcopy(followSET)
print("该文法的非终结符的follow集为:")
j = 0
for f in followSET:
if len(f) != 0:
print('follow(' + non_ter[j] + '):', f)
j += 1
print()
column = []
row = []
for gra_line in grammer:
for gr in gra_line:
if gr == '-' and gra_line.index(gr) == 1:
continue
if gr in terSymbol and gr not in column:
column.append(gr)
if gr in non_ter and gr not in row:
row.append(gr)
column.append('#')
print("该文法的预测分析表为:")
gra = table(grammer, column, row)
for i in range(len(gra)): # 控制行
if i == 0:
for j in range(len(gra[i])): # 控制列
if j == 0:
print(gra[i][j], end='\t')
else:
print(gra[i][j], end='\t\t')
else:
for j in range(len(gra[i])): # 控制列
print(gra[i][j], end='\t')
print()
print()
#总控程序
print("分析过程如下:")
master(ana_str, column, row)