-
Notifications
You must be signed in to change notification settings - Fork 0
/
parser.py
87 lines (75 loc) · 3.67 KB
/
parser.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import nltk
import re
from SymbolTable import SymbolTable
class Token:
def __init__(self, token, line_nr):
self._token = token
self._line_nr = line_nr
def get_token(self):
return self._token
def get_line_nr(self):
return self._line_nr
def set_token(self, new_token):
self._token = new_token
def write_to_file(file_name, list):
textfile = open(file_name, "w")
for element in list:
textfile.write(str(element) + "\n")
textfile.close()
def check_tokens_from_file(file_name):
identifier_symbol_tale = SymbolTable()
constant_symbol_tale = SymbolTable()
tokens = open('token.in')
tokens = tokens.read()
PIF = []
identifier_regex = '^_?[a-zA-Z]+[a-zA-Z0-9_]*$'
integer_regex = '0|^[1-9]+[0-9]*$'
rational_regex = '^([1-9]+[0-9]*|0)(\.[0-9]*)+$'
token_list = []
line_nr = 1
with open(file_name) as f:
for line in f:
for token in nltk.tokenize.word_tokenize(line):
token_list = token_list + [Token(token, line_nr)]
line_nr += 1
for index in range(len(token_list)):
if not token_list[index].get_token():
continue
if re.search('\n'+re.escape(token_list[index].get_token()) + '\n', tokens):
# check if not identifier or constant
PIF.append([token_list[index].get_token(), -1])
elif re.search(identifier_regex, token_list[index].get_token()):
# check if identifier
PIF.append(['identifier', identifier_symbol_tale.add(token_list[index].get_token())])
elif re.search(integer_regex, token_list[index].get_token()):
# check if integer
PIF.append(['constant', constant_symbol_tale.add(token_list[index].get_token())])
elif re.search(rational_regex, token_list[index].get_token()):
# check if rational
PIF.append(['constant', constant_symbol_tale.add(token_list[index].get_token())])
elif re.search('^\'[^\']*$', token_list[index].get_token()):
# check if string
look_ahead_index = index + 1
while look_ahead_index < len(token_list) and re.search('^[^\']*$', token_list[look_ahead_index].get_token()):
token_list[index].set_token(token_list[index].get_token() + ' ' + token_list[look_ahead_index].get_token())
token_list[look_ahead_index].set_token(None)
look_ahead_index += 1
if look_ahead_index < len(token_list) and token_list[look_ahead_index].get_token() == "'":
token_list[index].set_token(token_list[index].get_token() + token_list[look_ahead_index].get_token())
token_list[look_ahead_index].set_token(None)
PIF.append(['constant', constant_symbol_tale.add(token_list[index].get_token())])
else:
print('unexpected EOF, expected "\'" ; error line: ', token_list[index].get_line_nr())
return None, None, None
else:
print('lexical error - invalid token, line:', token_list[index].get_line_nr(), ' ; token:', token_list[index].get_token())
return None, None, None
print('lexically correct')
write_to_file('PIF.out', PIF)
write_to_file('IST.out', ['symbol table represented as tree\ninorder traversal:\n'] + identifier_symbol_tale.inorder_traversal())
write_to_file('CST.out', ['symbol table represented as tree\ninorder traversal:\n'] + constant_symbol_tale.inorder_traversal())
return PIF, identifier_symbol_tale, constant_symbol_tale
PIF, identifier_symbol_tale, constant_symbol_tale = check_tokens_from_file('p1err.txt')
print(PIF)
print(identifier_symbol_tale)
print(constant_symbol_tale)