ucc编译器(汇编生成)

Posted 费晓行

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了ucc编译器(汇编生成)相关的知识,希望对你有一定的参考价值。

【 声明:版权所有,欢迎转载,请勿用于商业用途。 联系信箱:feixiaoxing @163.com】

 

    有了中间代码,后面生成汇编就好办了。需要支持什么cpu,就将中间代码生成对应的汇编代码就好了。这部分的工作不复杂,但是比较琐碎。

 

1、函数入口

void EmitTranslationUnit(AstTranslationUnit transUnit)

 

2、涉及到的文件

emit.c,主要是告诉我们怎么生成一个gcc认识的汇编文件

https://github.com/sheisc/ucc162.3/blob/a92719fff0ab7eb5b0c45768acedabb3cd70ca05/ucc/ucl/emit.c

 

x86.c,真正用来生成汇编指令

https://github.com/sheisc/ucc162.3/blob/a92719fff0ab7eb5b0c45768acedabb3cd70ca05/ucc/ucl/x86.c

 

3、emit.c分析

3.1 EmitTranslationUnit会调用各个接口,创建一个合适的s文件

/**
 * Emit  assembly code for the translation unit
 */
void EmitTranslationUnit(AstTranslationUnit transUnit)
{
	if(ASMFileName){
		ASMFile = fopen(ASMFileName, "w");
		ASMFileName = NULL;
	}else{
		ASMFile = CreateOutput(Input.filename, ExtName);
	}
	SwitchTableNum = 1;
	// "# Code auto-generated by UCC\\n\\n"
	BeginProgram();
	// ".data\\n\\n"
	Segment(DATA);
	/**
		.str0:	.string	"%d \\012"
		.str1:	.string	"a + b + c + d = %d.\\012"
	 */
	EmitStrings();

	EmitFloatConstants();

	EmitGlobals();
	// ".text\\n\\n"
	Segment(CODE);

	ImportFunctions();
	/**
		The key function is 
			void EmitFunction(FunctionSymbol fsym)
		in x86.c
	 */
	EmitFunctions(transUnit);

	EndProgram();

	fclose(ASMFile);
}

    这个函数里面,Segement、Strings、FloatConstants、Globals、ImportFuncions这些都是为了创建汇编文件的格式基础,为后续的EmitFunctions做铺垫。在这些函数中调用的子函数,部分内容需要具体的cpu来实现,比如Segment这种函数。

 

3.2 emit.c和x86.c最重要的连接EmitFunction

/**
 * Emit all the functions
 */
static void EmitFunctions(AstTranslationUnit transUnit)
{
	AstNode p;
	FunctionSymbol fsym;

	p = transUnit->extDecls;
	while (p != NULL)
	{
		if (p->kind == NK_Function)
		{
			fsym = ((AstFunction)p)->fsym;
			if (fsym->sclass != TK_STATIC || fsym->ref > 0)
			{
				EmitFunction(fsym);
			}
		}
		p = p->next;
	}
}

 

4、x86.c分析

4.1 EmitFunction->EmitBBlock->EmitIRInst,这是最重要的翻译流程

static void EmitIRInst(IRInst inst)
{
	struct irinst instc = *inst;

	(* Emitter[inst->opcode])(&instc);
	return;
}

 

4.2 opcode.h一对一翻译

/**
	OPCODE(IJMP,    "ijmp",                 IndirectJump)
	OPCODE(INC,     "++",                   Inc)
	OPCODE(DEC,     "--",                   Dec)
	OPCODE(ADDR,    "&",                    Address)
	OPCODE(DEREF,   "*",                    Deref)
	OPCODE(EXTI1,   "(int)(char)",          Cast)
	OPCODE(EXTU1,   "(int)(unsigned char)", Cast)
 */
static void (* Emitter[])(IRInst inst) = 
{
#define OPCODE(code, name, func) Emit##func, 
#include "opcode.h"
#undef OPCODE
};

 

4.3 最简单的翻译,Jump跳转

/**
	(1)	the target of Jump is a BBlock, not Variable.
		So no DST->ref -- here.
	(2)  X87 top is saved in EmitBlock(),
		because Jump must be the last IL in a basic 
		block.
 */
static void EmitJump(IRInst inst)
{
	BBlock p = (BBlock)DST;

	DST = p->sym;
	assert(DST->kind == SK_Label);
	ClearRegs();
	PutASMCode(X86_JMP, inst->opds); 
}

 

4.4 另外一个最简单的翻译,获取地址

static void EmitAddress(IRInst inst)
{
	assert(DST->kind == SK_Temp && SRC1->kind != SK_Temp);
	AllocateReg(inst, 0);
	PutASMCode(X86_ADDR, inst->opds);
	ModifyVar(DST);
}

 

4.5 中间语言和汇编语言之间的映射

enum ASMCode
{
#define TEMPLATE(code, str) code,
#include "x86win32.tpl"
#undef TEMPLATE
};

static const char * asmCodeName[] = {
#define TEMPLATE(code, str) #code,
#include "x86win32.tpl"
#undef TEMPLATE
};

 

4.6 tpl内容,比如x86linux.tpl

#ifndef TEMPLATE
#error "You must define TEMPLATE macro before include this file"
#endif



TEMPLATE(X86_BORI4,     "orl %2, %0")
TEMPLATE(X86_BORU4,     "orl %2, %0")
TEMPLATE(X86_BORF4,     NULL)
TEMPLATE(X86_BORF8,     NULL)

TEMPLATE(X86_BXORI4,    "xorl %2, %0")
TEMPLATE(X86_BXORU4,    "xorl %2, %0")
TEMPLATE(X86_BXORF4,    NULL)
TEMPLATE(X86_BXORF8,    NULL)

TEMPLATE(X86_BANDI4,    "andl %2, %0")
TEMPLATE(X86_BANDU4,    "andl %2, %0")
TEMPLATE(X86_BANDF4,    NULL)
TEMPLATE(X86_BANDF8,    NULL)

TEMPLATE(X86_LSHI4,    "shll %2, %0")
TEMPLATE(X86_LSHU4,    "shll %2, %0")
TEMPLATE(X86_LSHF4,    NULL)
TEMPLATE(X86_LSHF8,    NULL)

TEMPLATE(X86_RSHI4,    "sarl %2, %0")
TEMPLATE(X86_RSHU4,    "shrl %2, %0")
TEMPLATE(X86_RSHF4,    NULL)
TEMPLATE(X86_RSHF8,    NULL)

TEMPLATE(X86_ADDI4,    "addl %2, %0")
TEMPLATE(X86_ADDU4,    "addl %2, %0")
TEMPLATE(X86_ADDF4,    "fadds %2")
TEMPLATE(X86_ADDF8,    "faddl %2")

TEMPLATE(X86_SUBI4,    "subl %2, %0")
TEMPLATE(X86_SUBU4,    "subl %2, %0")
TEMPLATE(X86_SUBF4,    "fsubs %2")
TEMPLATE(X86_SUBF8,    "fsubl %2")

TEMPLATE(X86_MULI4,    "imull %2, %0")
TEMPLATE(X86_MULU4,    "mull %2")
TEMPLATE(X86_MULF4,    "fmuls %2")
TEMPLATE(X86_MULF8,    "fmull %2")

TEMPLATE(X86_DIVI4,    "cdq;idivl %2")
TEMPLATE(X86_DIVU4,    "movl $0, %%edx;divl %2")
TEMPLATE(X86_DIVF4,    "fdivs %2")
TEMPLATE(X86_DIVF8,    "fdivl %2")

TEMPLATE(X86_MODI4,    "cdq;idivl %2")
TEMPLATE(X86_MODU4,    "movl $0, %%edx; divl %2")
TEMPLATE(X86_MODF4,    NULL)
TEMPLATE(X86_MODF8,    NULL)

TEMPLATE(X86_NEGI4,    "negl %0")
TEMPLATE(X86_NEGU4,    "negl %0")
TEMPLATE(X86_NEGF4,    "fchs")
TEMPLATE(X86_NEGF8,    "fchs")

TEMPLATE(X86_COMPI4,   "notl %0")
TEMPLATE(X86_COMPU4,   "notl %0")
TEMPLATE(X86_COMPF4,   NULL)
TEMPLATE(X86_COMPF8,   NULL)

TEMPLATE(X86_JZI4,     "cmpl $0, %1;je %0")
TEMPLATE(X86_JZU4,     "cmpl $0, %1;je %0")
TEMPLATE(X86_JZF4,     "fldz;fucompp;fnstsw %%ax;test $0x44, %%ah;jnp %0")
TEMPLATE(X86_JZF8,     "fldz;fucompp;fnstsw %%ax;test $0x44, %%ah;jnp %0")

TEMPLATE(X86_JNZI4,    "cmpl $0, %1;jne %0")
TEMPLATE(X86_JNZU4,    "cmpl $0, %1;jne %0")
TEMPLATE(X86_JNZF4,    "fldz;fucompp;fnstsw %%ax;test $0x44, %%ah;jp %0")
TEMPLATE(X86_JNZF8,    "fldz;fucompp;fnstsw %%ax;test $0x44, %%ah;jp %0")

TEMPLATE(X86_JEI4,     "cmpl %2, %1;je %0")
TEMPLATE(X86_JEU4,     "cmpl %2, %1;je %0")
TEMPLATE(X86_JEF4,     "flds %2;fucompp;fnstsw %%ax;test $0x44, %%ah;jnp %0")
TEMPLATE(X86_JEF8,     "fldl %2;fucompp;fnstsw %%ax;test $0x44, %%ah;jnp %0")

TEMPLATE(X86_JNEI4,    "cmpl %2, %1;jne %0")
TEMPLATE(X86_JNEU4,    "cmpl %2, %1;jne %0")
TEMPLATE(X86_JNEF4,    "flds %2;fucompp;fnstsw %%ax;test $0x44, %%ah;jp %0")
TEMPLATE(X86_JNEF8,    "fldl %2;fucompp;fnstsw %%ax;test $0x44, %%ah;jp %0")



TEMPLATE(X86_JGI4,     "cmpl %2, %1;jg %0")
TEMPLATE(X86_JGU4,     "cmpl %2, %1;ja %0")
TEMPLATE(X86_JGF4,     "flds %2;fucompp;fnstsw %%ax;test $0x1, %%ah;jne %0")
TEMPLATE(X86_JGF8,     "fldl %2;fucompp;fnstsw %%ax;test $0x1, %%ah;jne %0")

TEMPLATE(X86_JLI4,     "cmpl %2, %1;jl %0")
TEMPLATE(X86_JLU4,     "cmpl %2, %1;jb %0")
TEMPLATE(X86_JLF4,     "flds %2;fucompp;fnstsw %%ax;test $0x41, %%ah;jp %0")
TEMPLATE(X86_JLF8,     "fldl %2;fucompp;fnstsw %%ax;test $0x41, %%ah;jp %0")

TEMPLATE(X86_JGEI4,    "cmpl %2, %1;jge %0")
TEMPLATE(X86_JGEU4,    "cmpl %2, %1;jae %0")
TEMPLATE(X86_JGEF4,    "flds %2;fucompp;fnstsw %%ax;test $0x41, %%ah;jne %0")
TEMPLATE(X86_JGEF8,    "fldl %2;fucompp;fnstsw %%ax;test $0x41, %%ah;jne %0")

TEMPLATE(X86_JLEI4,    "cmpl %2, %1;jle %0")
TEMPLATE(X86_JLEU4,    "cmpl %2, %1;jbe %0")
TEMPLATE(X86_JLEF4,    "flds %2;fucompp;fnstsw %%ax;test $0x5, %%ah;jp %0")
TEMPLATE(X86_JLEF8,    "fldl %2;fucompp;fnstsw %%ax;test $0x5, %%ah;jp %0")



TEMPLATE(X86_EXTI1,    "movsbl %1, %0")
TEMPLATE(X86_EXTU1,    "movzbl %1, %0")
TEMPLATE(X86_EXTI2,    "movswl %1, %0")
TEMPLATE(X86_EXTU2,    "movzwl %1, %0")
TEMPLATE(X86_TRUI1,    "movb %%al, %0")
TEMPLATE(X86_TRUI2,    "movb %%al, %0")

 
TEMPLATE(X86_CVTI4F4,  "pushl %1;fildl (%%esp);fstps %0;addl $4, %%esp")
TEMPLATE(X86_CVTI4F8,  "pushl %1;fildl (%%esp);fstpl %0;addl $4, %%esp")
TEMPLATE(X86_CVTU4F4,  "pushl $0;pushl %1;fildq (%%esp);fstps %0;addl $8, %%esp")
TEMPLATE(X86_CVTU4F8,  "pushl $0;pushl %1;fildq (%%esp);fstpl %0;addl $8, %%esp")

TEMPLATE(X86_CVTF4,    "flds %1;fstpl %0") 

 
TEMPLATE(X86_CVTF4I4,  "flds %1;subl $16, %%esp;fnstcw (%%esp);movzwl (%%esp), %%eax;"
                       "orl $0x0c00, %%eax;movl %%eax, 4(%%esp);fldcw 4(%%esp);fistpl 8(%%esp);"
                       "fldcw (%%esp);movl 8(%%esp), %%eax;addl $16, %%esp")
 
TEMPLATE(X86_CVTF4U4,  "flds %1;subl $16, %%esp;fnstcw (%%esp);movzwl (%%esp), %%eax;"
                       "orl $0x0c00, %%eax;movl %%eax, 4(%%esp);fldcw 4(%%esp);fistpll 8(%%esp);"
                       "fldcw (%%esp);movl 8(%%esp), %%eax;addl $16, %%esp")
TEMPLATE(X86_CVTF8,    "fldl %1;fstps %0")

TEMPLATE(X86_CVTF8I4,  "fldl %1;subl $16, %%esp;fnstcw (%%esp);movzwl (%%esp), %%eax;"
                       "orl $0x0c00, %%eax;movl %%eax, 4(%%esp);fldcw 4(%%esp);fistpl 8(%%esp);"
                       "fldcw (%%esp);movl 8(%%esp), %%eax; addl $16, %%esp")
 
TEMPLATE(X86_CVTF8U4,  "fldl %1;subl $16, %%esp;fnstcw (%%esp);movzwl (%%esp), %%eax;"
                       "orl $0x0c00, %%eax;movl %%eax, 4(%%esp);fldcw 4(%%esp);fistpll 8(%%esp);"
                       "fldcw (%%esp);movl 8(%%esp), %%eax;addl $16, %%esp")
					
		
TEMPLATE(X86_INCI1,    "incb %0")
TEMPLATE(X86_INCU1,    "incb %0")
TEMPLATE(X86_INCI2,    "incw %0")
TEMPLATE(X86_INCU2,    "incw %0")				
TEMPLATE(X86_INCI4,    "incl %0")
TEMPLATE(X86_INCU4,    "incl %0")
TEMPLATE(X86_INCF4,    "fld1;fadds %0;fstps %0")

 
TEMPLATE(X86_INCF8,    "fld1;faddl %0;fstpl %0")

TEMPLATE(X86_DECI1,    "decb %0")
TEMPLATE(X86_DECU1,    "decb %0")
TEMPLATE(X86_DECI2,    "decw %0")
TEMPLATE(X86_DECU2,    "decw %0")
TEMPLATE(X86_DECI4,    "decl %0")
TEMPLATE(X86_DECU4,    "decl %0")

TEMPLATE(X86_DECF4,    "fld1;fsubs %0;fchs;fstps %0")
TEMPLATE(X86_DECF8,    "fld1;fsubl %0;fchs;fstpl %0")

TEMPLATE(X86_ADDR,     "leal %1, %0")

TEMPLATE(X86_MOVI1,    "movb %1, %0")
TEMPLATE(X86_MOVI2,    "movw %1, %0")
TEMPLATE(X86_MOVI4,    "movl %1, %0")
TEMPLATE(X86_MOVB,     "leal %0, %%edi;leal %1, %%esi;movl %2, %%ecx;rep movsb")


TEMPLATE(X86_JMP,      "jmp %0")
TEMPLATE(X86_IJMP,     "jmp *%0(,%1,4)")

TEMPLATE(X86_PROLOGUE, "pushl %%ebp;pushl %%ebx;pushl %%esi;pushl %%edi;movl %%esp, %%ebp")
TEMPLATE(X86_PUSH,     "pushl %0")
TEMPLATE(X86_PUSHF4,   "pushl %%ecx;flds %0;fstps (%%esp)")
TEMPLATE(X86_PUSHF8,   "subl $8, %%esp;fldl %0;fstpl (%%esp)")
TEMPLATE(X86_PUSHB,    "leal %0, %%esi;subl %2, %%esp;movl %%esp, %%edi;movl %1, %%ecx;rep movsb")
TEMPLATE(X86_EXPANDF,  "subl %0, %%esp")
TEMPLATE(X86_CALL,     "call %1")
TEMPLATE(X86_ICALL,    "call *%1")
TEMPLATE(X86_REDUCEF,  "addl %0, %%esp")
TEMPLATE(X86_EPILOGUE, "movl %%ebp, %%esp;popl %%edi;popl %%esi;popl %%ebx;popl %%ebp;ret")


TEMPLATE(X86_CLEAR,    "pushl %1;pushl $0;leal %0, %%eax;pushl %%eax;call memset;addl $12, %%esp")

TEMPLATE(X86_LDF4,     "flds %0")
TEMPLATE(X86_LDF8,     "fldl %0")
TEMPLATE(X86_STF4,     "fstps %0")
TEMPLATE(X86_STF8,     "fstpl %0")


TEMPLATE(X86_STF4_NO_POP,     "fsts %0")
TEMPLATE(X86_STF8_NO_POP,     "fstl %0")

TEMPLATE(X86_X87_POP,     "fstp %%st(0)")

 

4.7 汇编语言的打印

目前主要是PutASMCode实现,

这个函数在x86linux.c & x86win32.c都进行了实现

这就看自己需要实现哪一种os下的汇编代码了

 

4.8 函数翻译过程中涉及到其他问题

Export,函数输出

LayoutFrame & EmitPrologue,堆栈处理

EmitEpilogue,堆栈复原

 

4.9 寄存器的处理

reg.c

https://github.com/sheisc/ucc162.3/blob/a92719fff0ab7eb5b0c45768acedabb3cd70ca05/ucc/ucl/reg.c

 

4.10 总结

     相对而言,后端移植涉及到很多文件,比如emit.c、x86.c、reg.c、x86win32.c、x86linux.c,还涉及到opinfo.h、x86linux.tpl、x86win32.tpl,作为阅读代码同学,要了解每一个文件的作用,至少需要知道里面的处理流程。评价自己是否真正理解了这个理解,就看自己是不是可以把ucc移植到另外一个cpu架构上了,这是最好的判别方法。

 

 

以上是关于ucc编译器(汇编生成)的主要内容,如果未能解决你的问题,请参考以下文章

ucc编译器(优化)

ucc编译器(优化)

ucc编译器(语义分析)

查看C语言/C++编译器生成的汇编语言代码

如何为自定义 CPU 创建 C 编译器?

为啥编译器在编译的汇编代码中会生成额外的 sqrts