词法分析器的手工实现

Posted duke77--null

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了词法分析器的手工实现相关的知识,希望对你有一定的参考价值。

#include<stdio.h>
#include<stdlib.h>
#include<string.h>
#include<ctype.h>
#include<iostream>
#include<fstream>
using namespace std;
struct symbol
{
    char * str;
    int coding;
};
char *keyword_list[34] = { "void", "char", "int", "float", "double", "short", "long", "signed", "unsigned", "struct", "union", "enum", "typedef", "sizeof", "auto", "static", "register", "extern", "const", "volatile", "return", "continue", "break", "goto", "if", "else", "switch", "case","default","for","do","while","scanf","printf"};
char *operator_list[44] = { "{","}","[","]","(",")",".","->","~","++","--",
"!","&","*","/","%","+","-","<<",">>",">", ">=","<","<=","==","!=","^","|","&&",
"||","?","=","/=","*=","%=","+=","-=","&=","^=","|=",",","#",";",":"};
char ch; //读入的字符
char strToken[20] = ""; //读入的字符串
int eof_flag = 0;
int num = 1;//编码的数字(为了递增)
int row = 1;
struct symbol keywords[34];
struct symbol identifiers[44];
FILE *fp = NULL;
FILE *fw = NULL;
ofstream out;

//给单词符号设定种别编码
void initialization() {
    //给关键字设定种别编码
    for (int i = 0;i < 34;i++)
    {
        keywords[i].str = keyword_list[i];
        keywords[i].coding = num;
        num++;
    }
   //给算符和界符设定种别编码
    for (int i = 0;i < 44;i++) {
        identifiers[i].str = operator_list[i];
        identifiers[i].coding = num;
        num++;
    }
    //数字79,标识符80
}

//把下一个字符读入ch中
void getNextChar(FILE *ffp)
{
    if ((ch = getc(ffp)) == EOF)
    {
        eof_flag = 1;
    }
    if (ch == 
)
        row++;
}
//检查ch的字符是否为空白符、回车或者制表符,若是,则反复调用getNextChar (),直至ch中读入一非上述符号
void getbc(FILE * ffp)
{
    while (ch ==   || ch == 
 || ch == 	)
    {
        getNextChar(ffp);
    }
}

//判断ch是否为字母
bool isLetter(char ch)
{
    return isalpha(ch);
}

//判断ch是否为数字
bool isDigit(char ch)
{
    return isdigit(ch);
}

//判断ch是否为下划线
bool isUnderline(char ch)
{
    if (ch == _)
        return 1;
    else
        return 0;
}

//将输入的字符ch连接到strToken
void concat()
{
    char * tmp = &ch;
    strcat(strToken, tmp);
}

//把搜索指针回调一个字符位置
void retract(FILE * ffp)
{
    fseek(ffp, -1, SEEK_CUR);
    ch =  ;
}

//对于strToken中的字符串判断它是否为保留字,若它是保留字则给出它的编码,否则返回0
int reserve_string(char * str) {
    for (int i = 0;i < 34;i++) {
        if ((strcmp(str, keywords[i].str)) == 0)
        {
            return keywords[i].coding;
        }
    }
    return 0;
}

//返回strToken中所识别出的算符和界符编码
int reserve_operator(char* ch)
{

    for (int i = 0;i < 44;i++) {
        if ((strcmp(ch, identifiers[i].str)) == 0)
        {
            return identifiers[i].coding;
        }
    }
    return 0;
}

//出错处理
void error()
{
    printf("
 ********Error*********************
");
    printf(" row %d  Invaild symbol ! ! ! ",  row);
    printf("
 ********Error*********************
");
    exit(0);
}
void write_result( int x,char *str )
{
    char data[50];
    strcpy(data,"(");

    int m = x;
    char s[20];
    char ss[20];
    int i=0,j=0;
    if (x < 0)// 处理负数
    {
        m = 0 - m;
        j = 1;
        ss[0] = -;
    }    
    while (m>0)
    {
        s[i++] = m % 10 + 0;
        m /= 10;
    }
    s[i] = ;
    i = i - 1;
    while (i >= 0)
    {
        ss[j++] = s[i--];
     }    
     ss[j] = ;  
    strcat(data,ss);
    
    strcat(data,",");
    strcat(data,str);
    strcat(data," )
");
    out<<data;
}

//词法分析
void LexiscalAnalyzer()
{
    int num = 0, val = 0, code = 0;
    strcpy(strToken, "");
    getNextChar(fp);
    getbc(fp);
    switch (ch)
    {
    case a:
    case b:
    case c:
    case d:
    case e:
    case f:
    case g:
    case h:
    case i:
    case j:
    case k:
    case l:
    case m:
    case n:
    case o:
    case p:
    case q:
    case r:
    case s:
    case t:
    case u:
    case v:
    case w:
    case x:
    case y:
    case z:
    case A:
    case B:
    case C:
    case D:
    case E:
    case F:
    case G:
    case H:
    case I:
    case J:
    case K:
    case L:
    case M:
    case N:
    case O:
    case P:
    case Q:
    case R:
    case S:
    case T:
    case U:
    case V:
    case W:
    case X:
    case Y:
    case Z:
    case _:
        while (isLetter(ch) || isDigit(ch) || isUnderline(ch))
        {
            concat();
            getNextChar(fp);
        }
        retract(fp);
        code = reserve_string(strToken);
        if (code == 0)
        {
            printf("(%d , %s)
", 79, strToken);
            write_result(79,strToken);
        }
        else
        {
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        break;
    case0:
    case1:
    case2:
    case3:
    case4:
    case5:
    case6:
    case7:
    case8:
    case9:
        while (isdigit(ch))
        {
            concat();
            getNextChar(fp);
        }
        retract(fp);
        printf("(%d , %s)
",80, strToken);
        write_result(80,strToken);
        break;
    case {:
        concat();
        code = reserve_operator(strToken);
        printf("(%d , %s)
", code, strToken);
        write_result(code,strToken);
        break;
    case }:
        concat();
        code = reserve_operator(strToken);
        printf("(%d , %s)
", code, strToken);
        write_result(code,strToken);
        break;
    case [:
        concat();
        code = reserve_operator(strToken);
        printf("(%d , %s)
", code, strToken);
        write_result(code,strToken);
        break;
    case ]:
        concat();
        code = reserve_operator(strToken);
        printf("(%d , %s)
", code, strToken);
        write_result(code,strToken);
        break;
    case (:
        concat();
        code = reserve_operator(strToken);
        printf("(%d , %s)
", code, strToken);
        write_result(code,strToken);
        break;    
    case ):
        concat();
        code = reserve_operator(strToken);
        printf("(%d , %s)
", code, strToken);
        write_result(code,strToken);
        break;
    case .:
        concat();
        code = reserve_operator(strToken);
        printf("(%d , %s)
", code, strToken);
        write_result(code,strToken);
        break;
    case -:
        concat();
        getNextChar(fp);
        if (ch == >)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else if (ch == -)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else if (ch == =)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else
        {
            retract(fp);
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        break;
    case ~:
        concat();
        code = reserve_operator(strToken);
        printf("(%d , %s)
", code, strToken);
        write_result(code,strToken);
        break;
    case +:
        concat();
        getNextChar(fp);
        if (ch == +)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else if (ch == =)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else
        {
            retract(fp);
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);

        }
        break;
    case *:
        concat();
        getNextChar(fp);
        if (ch == =)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else
        {
            retract(fp);
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);

        }
        break;
    case &:
        concat();
        getNextChar(fp);
        if (ch == =)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else if (ch == &)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else
        {
            retract(fp);
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        break;
    case !:
        concat();
        getNextChar(fp);
        if (ch == =)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else
        {
            retract(fp);
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        break;
    case %:
        concat();
        getNextChar(fp);
        if (ch == =)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else
        {
            retract(fp);
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        break;
    case <:
        concat();
        getNextChar(fp);
        if (ch == =)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else if (ch == <)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else
        {
            retract(fp);
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        break;
    case >:
        concat();
        getNextChar(fp);
        if (ch == =)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else if (ch == >)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else
        {
            retract(fp);
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        break;
    case =:
        concat();
        getNextChar(fp);
        if (ch == =)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else
        {
            retract(fp);
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        break;
    case ^:
        concat();
        getNextChar(fp);
        if (ch == =)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else
        {
            retract(fp);
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        break;
    case |:
        concat();
        getNextChar(fp);
        if (ch == =)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else if (ch == |)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else
        {
            retract(fp);
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        break;

    case ?:
        concat();
        code = reserve_operator(strToken);
        printf("(%d , %s)
", code, strToken);
        write_result(code,strToken);
        break;
    case /:
        concat();
        getNextChar(fp);
        if (ch == =)
        {
            concat();
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        else if (ch == /) //跳过注释
        {
            getNextChar(fp);
            while (ch != 
) {
                getNextChar(fp);
            }
                
            break;    
        }
        else if (ch == *)//跳过注释
        {
            getNextChar(fp);
            while (ch != *) {
                getNextChar(fp);
            }
            getNextChar(fp);
            if (ch == /);
            break;
        }
        else
        {
            retract(fp);
            code = reserve_operator(strToken);
            printf("(%d , %s)
", code, strToken);
            write_result(code,strToken);
        }
        break;
    case ,:
        concat();
        code = reserve_operator(strToken);
        printf("(%d , %s)
", code, strToken);
        write_result(code,strToken);
        break;
    case #:
        concat();
        code = reserve_operator(strToken);
        printf("(%d , %s)
", code, strToken);
        write_result(code,strToken);
        break;    
    case ;:
        concat();
        code = reserve_operator(strToken);
        printf("(%d , %s)
", code, strToken);
        write_result(code,strToken);
        break;
    case ::
        concat();
        code = reserve_operator(strToken);
        printf("(%d , %s)
", code, strToken);
        write_result(code,strToken);
        //out<<strToken;
        break;
    default:
        if (ch == EOF)
        {
            eof_flag = 1;
            break;
        }
        error();
    }
}

//主函数
int main()
{
    initialization();
    char name[1024];
    cout<<"please input your file path:";
    cin>>name;
    fp=fopen(name,"r");
    out.open("result.txt");
    while(!feof(fp))
    {    if (eof_flag == 1)
        {
            exit(1);
        }
    LexiscalAnalyzer();
    }
    fclose(fp);
    out.close();
    return 0;
}

 

以上是关于词法分析器的手工实现的主要内容,如果未能解决你的问题,请参考以下文章

编译原理 实验一 java语言实现对C语言词法分析

编译原理 实验一 java语言实现对C语言词法分析

lex实现扩展的pl0语言的词法分析器(附源码)

词法分析

词法分析程序的实现

编译原理:词法分析PHP代码实现