ucc编译器(中间代码生成)
Posted 费晓行
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了ucc编译器(中间代码生成)相关的知识,希望对你有一定的参考价值。
【 声明:版权所有,欢迎转载,请勿用于商业用途。 联系信箱:feixiaoxing @163.com】
所谓中间代码生成,就是生成一种和具体汇编语言无关的中间代码。生成中间代码有很多的好处,一方面它可以转化成不同的汇编语言,使得后端移植变得很容易;另外一方面,中间代码可以做很多优化工作,剔除大量的冗余代码、无效代码、垃圾代码。
之间在语法分析、语义分析的时候,我们一直提到declaration、expression、statement这三种语句。等真正走到中间代码生成这个环节的时候,其实只需要处理expression和statement的翻译就可以了,declaration本身只是申明,这一部分其实可以省略了。
1、函数入口
void Translate(AstTranslationUnit transUnit)
2、语句翻译
tranexpr.c
https://github.com/sheisc/ucc162.3/blob/a92719fff0ab7eb5b0c45768acedabb3cd70ca05/ucc/ucl/tranexpr.c
transtmt.c
https://github.com/sheisc/ucc162.3/blob/a92719fff0ab7eb5b0c45768acedabb3cd70ca05/ucc/ucl/transtmt.c
3、打印生成的中间代码
uildasm.c
https://github.com/sheisc/ucc162.3/blob/a92719fff0ab7eb5b0c45768acedabb3cd70ca05/ucc/ucl/uildasm.c
4、中间代码格式
opcode.h
#ifndef OPCODE
#error "You must define OPCODE macro before include this file"
#endif
/**
opcode here is used by UIL
*/
OPCODE(BOR, "|", Assign)
OPCODE(BXOR, "^", Assign)
OPCODE(BAND, "&", Assign)
OPCODE(LSH, "<<", Assign)
OPCODE(RSH, ">>", Assign)
OPCODE(ADD, "+", Assign)
OPCODE(SUB, "-", Assign)
OPCODE(MUL, "*", Assign)
OPCODE(DIV, "/", Assign)
OPCODE(MOD, "%", Assign)
OPCODE(NEG, "-", Assign)
OPCODE(BCOM, "~", Assign)
OPCODE(JZ, "", Branch)
OPCODE(JNZ, "!", Branch)
OPCODE(JE, "==", Branch)
OPCODE(JNE, "!=", Branch)
OPCODE(JG, ">", Branch)
OPCODE(JL, "<", Branch)
OPCODE(JGE, ">=", Branch)
OPCODE(JLE, "<=", Branch)
OPCODE(JMP, "jmp", Jump)
OPCODE(IJMP, "ijmp", IndirectJump)
OPCODE(INC, "++", Inc)
OPCODE(DEC, "--", Dec)
OPCODE(ADDR, "&", Address)
OPCODE(DEREF, "*", Deref)
OPCODE(EXTI1, "(int)(char)", Cast)
OPCODE(EXTU1, "(int)(unsigned char)", Cast)
OPCODE(EXTI2, "(int)(short)", Cast)
OPCODE(EXTU2, "(int)(unsigned short)",Cast)
OPCODE(TRUI1, "(char)(int)", Cast)
OPCODE(TRUI2, "(short)(int)", Cast)
OPCODE(CVTI4F4, "(float)(int)", Cast)
OPCODE(CVTI4F8, "(double)(int)", Cast)
OPCODE(CVTU4F4, "(float)(unsigned)", Cast)
OPCODE(CVTU4F8, "(double)(unsigned)", Cast)
OPCODE(CVTF4, "(double)(float)", Cast)
OPCODE(CVTF4I4, "(int)(float)", Cast)
OPCODE(CVTF4U4, "(unsigned)(float)", Cast)
OPCODE(CVTF8, "(float)(double)", Cast)
OPCODE(CVTF8I4, "(int)(double)", Cast)
OPCODE(CVTF8U4, "(unsigned)(double)", Cast)
OPCODE(MOV, "=", Move)
OPCODE(IMOV, "*=", IndirectMove)
OPCODE(CALL, "call", Call)
OPCODE(RET, "ret", Return)
OPCODE(CLR, "", Clear)
OPCODE(NOP, "NOP", NOP)
对应结构体,
/**
//Intermediate Representation Instruction
see uildasm.c
#define DST inst->opds[0]
#define SRC1 inst->opds[1]
#define SRC2 inst->opds[2]
prev: pointer to previous instruction
next: pointer to next instruction
ty: instruction operating type
opcode: operation code
opds: operands, at most three
*/
typedef struct irinst
{
struct irinst *prev;
struct irinst *next;
Type ty;
int opcode;
Symbol opds[3];
} *IRInst;
5、gen.c
这是文件是中间代码生成最基础的一个文件,中间代码需要处理的很多基础工作都是在这里完成的。比如创建临时变量、创建跳转、创建返回值、创建函数调用等等。
6、不失一般性,我们以statment的翻译作为举例说明,
6.1 入口函数
static void TranslateStatement(AstStatement stmt)
{
(* StmtTrans[stmt->kind - NK_ExpressionStatement])(stmt);
}
6.2 if-statement翻译
/**
* This function translates an if statement.
*
* if (expr) stmt is translated into:
* if ! expr goto nextBB
* trueBB:
* stmt
* nextBB:
* ...
*
* if (expr) stmt1 else stmt2 is translated into:
* if ! expr goto falseBB
* trueBB:
* stmt1
* goto nextBB
* falseBB:
* stmt2
* nextBB:
* ...
*/
static void TranslateIfStatement(AstStatement stmt)
{
AstIfStatement ifStmt = AsIf(stmt);
BBlock nextBB;
BBlock trueBB;
BBlock falseBB;
nextBB = CreateBBlock();
trueBB = CreateBBlock();
if (ifStmt->elseStmt == NULL)
{
TranslateBranch(Not(ifStmt->expr), nextBB, trueBB);
StartBBlock(trueBB);
TranslateStatement(ifStmt->thenStmt);
}
else
{
falseBB = CreateBBlock();
TranslateBranch(Not(ifStmt->expr), falseBB, trueBB);
StartBBlock(trueBB);
TranslateStatement(ifStmt->thenStmt);
GenerateJump(nextBB);
StartBBlock(falseBB);
TranslateStatement(ifStmt->elseStmt);
}
StartBBlock(nextBB);
}
6.3 while-statement翻译
/**
* This function translates a while statement.
*
* while (expr) stmt is translated into:
* goto contBB
* loopBB:
* stmt
* contBB:
* if (expr) goto loopBB
* nextBB:
* ...
*/
static void TranslateWhileStatement(AstStatement stmt)
{
AstLoopStatement whileStmt = AsLoop(stmt);
whileStmt->loopBB = CreateBBlock();
whileStmt->contBB = CreateBBlock();
whileStmt->nextBB = CreateBBlock();
GenerateJump(whileStmt->contBB);
StartBBlock(whileStmt->loopBB);
TranslateStatement(whileStmt->stmt);
StartBBlock(whileStmt->contBB);
TranslateBranch(whileStmt->expr, whileStmt->loopBB, whileStmt->nextBB);
StartBBlock(whileStmt->nextBB);
}
6.4 break-statement翻译
/**
* This function translates a break statement.
* A break statement terminates the execution of associated
* switch or loop.
*
* break is translated into:
* goto switch or loop's nextBB
* nextBB:
* ...
*/
static void TranslateBreakStatement(AstStatement stmt)
{
AstBreakStatement brkStmt = AsBreak(stmt);
if (brkStmt->target->kind == NK_SwitchStatement)
{
GenerateJump(AsSwitch(brkStmt->target)->nextBB);
}
else
{
GenerateJump(AsLoop(brkStmt->target)->nextBB);
}
StartBBlock(CreateBBlock());
}
6.6 return-statement翻译
/**
* Translates a return statement.
* A return statement terminates execution of current function.
*/
static void TranslateReturnStatement(AstStatement stmt)
{
AstReturnStatement retStmt = AsRet(stmt);
if (retStmt->expr)
{
GenerateReturn(retStmt->expr->ty, TranslateExpression(retStmt->expr));
}
GenerateJump(FSYM->exitBB);
StartBBlock(CreateBBlock());
}
6.7 switch-statment翻译
switch-statement翻译几乎是statement翻译里面最复杂的一部分。整个处理过程由TranslateSwitchStatement、TranslateSwitchBuckets、MergeSwitchBucket这三个部分组成。
TranslateSwitchStatement是总的入口,TranslateSwitchBuckets负责排序,MergeSwitchBucket负责每个case前后的拼接,这部分可以专门写一篇文章介绍下。
6.8 和expression的对接
/**
* This function translates an expression statement
*/
static void TranslateExpressionStatement(AstStatement stmt)
{
AstExpressionStatement exprStmt = AsExpr(stmt);
if (exprStmt->expr != NULL)
{
TranslateExpression(exprStmt->expr);
}
}
6.9 可开可关的优化
{
// do some optimizations for every basic block, only at the IR level, not ASM level.
#if 1
Optimize(FSYM);
#endif
}
6.10 总结
中间代码的生成其实有很多的套路。比如说if语言、for语言、while语言、do{}while()语言翻译,这些都是套路。如果同学们有兴趣实现一套自己的语言,这部分完全可以直接拿过来使用。
另外一个需要重点掌握的,就是opcode.h。这部分和汇编语言基本是一一对应的。中间语言生成对应的汇编指令,本质上寻找合适的寄存器、查表获取对应的汇编指令,这样其实就可以完成汇编代码生成了。
以上是关于ucc编译器(中间代码生成)的主要内容,如果未能解决你的问题,请参考以下文章