Python版C语言词法分析器

Posted

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Python版C语言词法分析器相关的知识,希望对你有一定的参考价值。


#
!/usr/bin/python # -*- coding: utf-8 -*- import sys from Tkinter import * from tkFont import * from FileDialog import * KEYWORD_LIST = [if, else, while, break, continue, for, double, int, float, long, short, bool, switch, case, return, void] SEPARATOR_LIST = [{, }, [, ], (, ), ~, ,, ;, ., ?, :, ] OPERATOR_LIST = [+, ++, -, --, +=, -=, *, *=, %, %=, ->, |, ||, |=, /, /=, >, <, >=, <=, =, ==, !=, !, &] CATEGORY_DICT = { # KEYWORD "far": 257, "near": 258, "pascal": 259, "register": 260, "asm": 261, "cdecl": 262, "huge": 263, "auto": 264, "double": 265, "int": 266, "struct": 267, "break": 268, "else": 269, "long": 270, "switch": 271, "case": 272, "enum": 273, "register": 274, "typedef": 275, "char": 276, "extern": 277, "return": 278, "union": 279, "const": 280, "float": 281, "short": 282, "unsigned": 283, "continue": 284, "for": 285, "signed": 286, "void": 287, "default": 288, "goto": 289, "sizeof": 290, "volatile": 291, "do": 292, "if": 293, "while": 294, "static": 295, "interrupt": 296, "sizeof": 297, "NULL": 298, # SEPARATOR "{": 299, "}": 300, "[": 301, "]": 302, "(": 303, ")": 304, "~": 305, ",": 306, ";": 307, ".": 308, "#": 309, "?": 310, ":": 311, # OPERATOR "<<": 312, ">>": 313, "<": 314, "<=": 315, ">": 316, ">=": 317, "=": 318, "==": 319, "|": 320, "||": 321, "|=": 322, "^": 323, "^=": 324, "&": 325, "&&": 326, "&=": 327, "%": 328, "%=": 329, "+": 330, "++": 331, "+=": 332, "-": 333, "--": 334, "-=": 335, "->": 336, "/": 337, "/=": 338, "*": 339, "*=": 340, "!": 341, "!=": 342, "sizeof": 343, "<<=": 344, ">>=": 345, "inum": 346, "int16": 347, "int8": 348, "char": 350, "string": 351, "bool": 352, "fnum": 353, "IDN": 354 } current_row = -1 current_line = 0 out_line = 1 def getchar(input_str): global current_row global current_line current_row += 1 if current_row == len(input_str[current_line]): current_line += 1 current_row = 0 if current_line == len(input_str) - 1: return SCANEOF return input_str[current_line][current_row] def ungetchar(input_str): global current_row global current_line current_row = current_row - 1 if current_row < 0: current_line = current_line - 1 current_row = len(input_str[current_row]) - 1 return input_str[current_line][current_row] def error(msg, line=None, row=None): global out_line if line is None: line = current_line + 1 if row is None: row = current_row + 1 analysis.insert(str(out_line) + .0, str(line) + : + str(row) + Error: + msg) analysis.insert(str(out_line) + .end, "\n") out_line = out_line + 1 def scanner(input_str): global current_line global current_row current_char = getchar(input_str) if current_char == SCANEOF: return (SCANEOF, ‘‘, ‘‘) if current_char.strip() == ‘‘: return if current_char.isdigit(): int_value = 0 while current_char.isdigit(): int_value = int_value * 10 + int(current_char) current_char = getchar(input_str) if current_char not in OPERATOR_LIST and current_char not in SEPARATOR_LIST and current_char != e: line = current_line + 1 row = current_row + 1 # ungetchar(input_str) error(illigal identifier, line, row) # return (‘SCANEOF‘, ‘‘, ‘‘) return (‘‘, ‘‘, ‘‘) if current_char != . and current_char != e: ungetchar(input_str) return (INUM, int_value, CATEGORY_DICT[inum]) if current_char == e: power_value = str(int_value) + e current_char = getchar(input_str) if current_char == + or current_char == -: power_value += current_char current_char = getchar(input_str) while current_char.isdigit(): power_value += current_char current_char = getchar(input_str) if current_char not in OPERATOR_LIST and current_char not in SEPARATOR_LIST: line = current_line + 1 row = current_row + 1 # ungetchar(input_str) error(illigal const int value in power, line, row) # return (‘SCANEOF‘, ‘‘, ‘‘) return (‘‘, ‘‘, ‘‘) ungetchar(input_str) return (INUM, power_value, CATEGORY_DICT[inum]) if current_char == .: float_value = str(int_value) + . current_char = getchar(input_str) while current_char.isdigit(): float_value += current_char current_char = getchar(input_str) if current_char not in OPERATOR_LIST and current_char not in SEPARATOR_LIST or current_char == .: line = current_line + 1 row = current_row + 1 # ungetchar(input_str) error(illigal const float value, line, row) # return (‘SCANEOF‘, ‘‘, ‘‘) return (‘‘, ‘‘, ‘‘) ungetchar(input_str) return (FNUM, float_value, CATEGORY_DICT[fnum]) if current_char.isalpha() or current_char == _: string = ‘‘ while current_char.isalpha() or current_char.isdigit() or current_char == _ and current_char != : string += current_char current_char = getchar(input_str) if current_char == SCANEOF: break ungetchar(input_str) if string in KEYWORD_LIST: return (string, ‘‘, CATEGORY_DICT[string]) else: return (IDN, string, CATEGORY_DICT[IDN]) if current_char == \": str_literal = ‘‘ line = current_line + 1 row = current_row + 1 current_char = getchar(input_str) while current_char != \": str_literal += current_char current_char = getchar() if current_char == SCANEOF: error(missing terminating \", line, row) current_line = line current_row = row return (SCANEOF, ‘‘, ‘‘) return (STRING_LITERAL, str_literal, CATEGORY_DICT[string]) if current_char == /: next_char = getchar(input_str) line = int(current_line) + 1 row = int(current_row) + 1 if next_char == *: comment = ‘‘ next_char = getchar(input_str) while True: if next_char == SCANEOF: error(unteminated /* comment, line, row) return (SCANEOF, ‘‘, ‘‘) if next_char == *: end_char = getchar(input_str) if end_char == /: return None if end_char == SCANEOF: error(unteminated /* comment, line, row) return (SCANEOF, ‘‘, ‘‘) comment += next_char next_char = getchar(input_str) else: ungetchar(input_str) op = current_char current_char = getchar(input_str) if current_char in OPERATOR_LIST: op += current_char else: ungetchar(input_str) return (OP, op, CATEGORY_DICT[op]) if current_char in SEPARATOR_LIST: return (SEP, current_char, CATEGORY_DICT[current_char]) if current_char in OPERATOR_LIST: op = current_char current_char = getchar(input_str) if current_char in OPERATOR_LIST: op += current_char else: ungetchar(input_str) return (OP, op, CATEGORY_DICT[op]) else: error(unknown character: + current_char) def fileloader(): global root code.delete(1.0, END) fd = LoadFileDialog(root) filename = fd.go() fin = open(filename, "r") input_file = fin.read() input_lines = input_file[0].split("\n") code.insert(1.0, input_file) fin.close() def lexer_analysis(input_str): global current_row global current_line global out_line current_row = -1 current_line = 0 analysis_result = [] while True: r = scanner(input_str) if r is not None: if r[0] == SCANEOF: break analysis_result.append(str(r[0]) + "\t\t" + str(r[1]) + "\t\t" + str(r[2])) return analysis_result def lexer(): input_str = [] analysis.delete(1.0, END) input_raw = code.get(1.0, END) input_str = input_raw.split("\n") lexer_analysis(input_str) out_line = 1 result = lexer_analysis(input_str) for each in result: analysis.insert(str(out_line) + .end, each) analysis.insert(str(out_line) + .end, "\n") out_line = out_line + 1 def pre_interface(): global root global code global analysis root = Tk() code = Text(root, width=60, height=20, font=15) analysis = Text(root, width=60, height=20, font=15) t = StringVar() t.set(Patrick的词法分析器) label = Label(root, textvariable=t, font=15) Analysis = Button(root, text=词法分析, command=lexer, font=15) load = Button(root, text= 载入代码 , command=fileloader, font=15) root.title("LEXER") label.pack(side=TOP) Analysis.pack(side=BOTTOM) load.pack(side=BOTTOM) code.pack(side=LEFT) analysis.pack(side=RIGHT) root.mainloop() def main(): pre_interface() # lexer() if __name__ == __main__: main()

 


以上是关于Python版C语言词法分析器的主要内容,如果未能解决你的问题,请参考以下文章

编译原理 实验一 java语言实现对C语言词法分析

编译原理让我们来构建一个简单的解释器(Let’s Build A Simple Interpreter. Part 5.)(python/c/c++版)(笔记)Lexer词法分析程序

C词法分析器的Python简单实现

编译原理--词法分析器(python语言实现)

词法分析(编译原理不用慌)

语言词法分析:一次或单独对字符串进行词法分析的性能更好?