简易词法分析器

Posted INnoVation-V2

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了简易词法分析器相关的知识,希望对你有一定的参考价值。

最近开始学习编译器,希望能坚持下去,通过做编译器也把c++练好点,今天花了一天做了一个简单的词法分析器,过几天编译原理书到了后,希望能通过看书得到更好的方法。

//learn.cpp

#include <iostream> #include "learn.h" #include <fstream> using namespace std; static char *st_line; static int st_line_pos; int length; typedef enum { INITIAL_STATUS, IN_INT_PART_STATUS, IN_FRAC_PART_STATUS, DOT_STATUS, } ParseStatus; void get_token(Token *token) { int out_pos = 0; ParseStatus status = INITIAL_STATUS; char current_char; string temp; token->kind = BAD_TOKEN; while (st_line[st_line_pos] != \0) { current_char = st_line[st_line_pos]; if ((status == IN_INT_PART_STATUS || status == IN_FRAC_PART_STATUS) && !isdigit(current_char) && current_char != .) { token->kind = NUMBER_TOKEN; sscanf(token->str, "%lf", &token->value); return; } if (isalpha(current_char)) { while (current_char != = && current_char != ; && current_char != < && current_char != > && current_char != + && current_char != - && current_char != * && current_char != / && current_char != ( && current_char != )) { temp += current_char; token->str[out_pos] = st_line[st_line_pos]; ++st_line_pos; ++out_pos; current_char = st_line[st_line_pos]; token->str[out_pos + 1] = \0; if (temp == "if") { token->kind = IF_OPERATOR_TOKEN; return; } else if (temp == "while") { token->kind = WHILE_OPERATOR_TOKEN; return; } else if (temp == "for") { token->kind = FOR_PAREN_TOKEN; return; } else if (temp == "int") { token->kind = INT_TOKEN; return; } else if (temp == "double") { token->kind = DOUBLE_TOKEN; return; } else if (temp == "string") { token->kind = STRING_TOKEN; return; } else if (temp == "cout") { token->kind = COUT_TOKEN; return; } else if (temp == "return") { token->kind = RETURN_TOKEN; return; } else if (temp == "main") { token->kind = MAIN_TOKEN; return; } } token->kind = VARIABLE_NAME_TOKEN; token->string_value = temp; token->str[out_pos] = \0; return; } if ((current_char == " || current_char == \‘) && status == INITIAL_STATUS) { current_char = st_line[++st_line_pos]; while (current_char != " && current_char != \‘) { token->str[out_pos] = st_line[st_line_pos]; out_pos++; temp += current_char; current_char = st_line[++st_line_pos]; } token->string_value = temp; token->kind = STRING_VARIABLE_TOKEN; st_line_pos++; token->str[++out_pos] = \0; return; } if (isspace(current_char)) { if (status == INITIAL_STATUS) { st_line_pos++; continue; } } token->str[out_pos] = st_line[st_line_pos]; st_line_pos++; out_pos++; token->str[out_pos] = \0; if (current_char == +) { if (st_line[st_line_pos] == +) { token->str[out_pos] = st_line[st_line_pos]; token->str[out_pos + 1] = \0; ++st_line_pos; token->kind = SELF_PLUS_ONE_OPERATOR_TOKEN; return; } else { token->kind = ADD_OPERATOR_TOKEN; return; } } else if (current_char == -) { if (st_line[st_line_pos] == -) { token->str[out_pos] = st_line[st_line_pos]; token->str[out_pos + 1] = \0; ++st_line_pos; token->kind = SELF_SUB_ONE_OPERATOR_TOKEN; return; } else { token->kind = SUB_OPERATOR_TOKEN; return; } } else if (current_char == <) { if (st_line[st_line_pos] == <) { token->str[out_pos] = st_line[st_line_pos]; token->str[out_pos + 1] = \0; ++st_line_pos; token->kind = OUT_OPERATER_TOKEN; return; } else { token->kind = LESS_OPERATER_TOKEN; return; } } else if (current_char == >) { if (st_line[st_line_pos] == >) { token->str[out_pos] = st_line[st_line_pos]; token->str[out_pos + 1] = \0; ++st_line_pos; token->kind = GET_OPERATER_TOKEN; return; } else { token->kind = GREATE_OPERATER_TOKEN; return; } } else if (current_char == *) { token->kind = MUL_OPERATOR_TOKEN; return; } else if (current_char == /) { token->kind = DIV_OPERATOR_TOKEN; return; } else if (current_char == ;) { token->kind = END_OF_LINE_TOKEN; return; } else if (current_char == {) { token->kind = LEFT_BRACE_TOKEN; return; } else if (current_char == }) { token->kind = RIGHT_BRACE_TOKEN; return; } else if (current_char == () { token->kind = LEFT_PARENTHESS_TOKEN; return; } else if (current_char == )) { token->kind = RIGHT_PARENTHESE_TOKEN; return; } else if (current_char == =) { if (st_line[st_line_pos] == =) { token->kind = EQUAL_PAREN_TOKEN; return; } else { token->kind = ASSIGMENT_PAREN_TOKEN; return; } } else if (isdigit(current_char)) { if (status == INITIAL_STATUS) { status = IN_INT_PART_STATUS; } else if (status == DOT_STATUS) { status = IN_FRAC_PART_STATUS; } } else if (current_char == .) { if (status == IN_INT_PART_STATUS) { status = DOT_STATUS; } else { fprintf(stderr, "syntax error.\n"); exit(1); } } } } //void //set_line(char *line) { // st_line = line; // st_line_pos = 0; //} void parse_line(string filename) { Token token; st_line_pos = 0; char temp, test[1024]; ifstream get; get.open(filename); int i = 0; while (!get.eof()) { get.read(&temp, 1); test[i++] = temp; } length = i; st_line = test; for (;;) { get_token(&token); if (st_line_pos == length) { return; } printf("kind..%d, str..%s\n", token.kind, token.str); } } int main() { string filename = "/home/liuyu/文档/test"; parse_line(filename); return 0; }
//learn.h


#ifndef LEARN_LEARN_H
#define LEARN_LEARN_H

#include <iostream>
using namespace std;

typedef enum {
    BAD_TOKEN,
    NUMBER_TOKEN,
    STRING_TOKEN,
    INT_TOKEN,
    DOUBLE_TOKEN,
    COUT_TOKEN,
    RETURN_TOKEN,
    MAIN_TOKEN,
    IF_OPERATOR_TOKEN,
    OUT_OPERATER_TOKEN,
    GET_OPERATER_TOKEN,
    LESS_OPERATER_TOKEN,
    GREATE_OPERATER_TOKEN,
    LEFT_BRACE_TOKEN,
    RIGHT_BRACE_TOKEN,
    LEFT_PARENTHESS_TOKEN,
    RIGHT_PARENTHESE_TOKEN,
    WHILE_OPERATOR_TOKEN,
    EQUAL_PAREN_TOKEN,
    FOR_PAREN_TOKEN,
    ASSIGMENT_PAREN_TOKEN,
    END_OF_LINE_TOKEN,
    VARIABLE_NAME_TOKEN,
    STRING_VARIABLE_TOKEN,
    ADD_OPERATOR_TOKEN,
    SUB_OPERATOR_TOKEN,
    MUL_OPERATOR_TOKEN,
    DIV_OPERATOR_TOKEN,
    SELF_PLUS_ONE_OPERATOR_TOKEN,
    SELF_SUB_ONE_OPERATOR_TOKEN,
    END_TOKEN
} TokenKind;

#define MAX_TOKEN_SIZE (100)

typedef struct {
    TokenKind kind;
    double      value;
    string      string_value;
    char        str[MAX_TOKEN_SIZE];
} Token;


void set_line(char *line);
void get_token(Token *token);

#endif

 测试:技术分享

技术分享

以上是关于简易词法分析器的主要内容,如果未能解决你的问题,请参考以下文章

简易的词法分析程序

C语言编译器开发之旅:词法分析扫描器

编译原理 实验一 java语言实现对C语言词法分析

编译原理 实验一 java语言实现对C语言词法分析

Java 实现《编译原理》简单词法分析功能

用java实现一个简易编译器1-词法解析入门