www.digitalmars.com         C & C++   DMDScript  

digitalmars.D.learn - Register allocation algorithm

I'm working on small compiler to understand these stuff and maybe 
get involved with the D compiler. I wrote a front-end to a C-like 
language and now I'm working on the code generator. To be more 
specific, in the register allocation phase. I was using a old and 
one where I put everything on stack because I thought it could be 
too complex for now but then I found something that seems a 
beginner like me would implement. From wikipedia articles I was 
able to implement below algorithm. I did some search, but it 
didn't provide much useful contents to myself, much probably 
because I don't have a math background and compiler development 
baggage. From what I understood how algorithms works I've 
implemented but I have a couple of questions. There's a lot of 
people here that knows a lot about compilers. Questions:

Data:
the code generator has only two registers available, they are RO 
and R1.
Numbers are 32-bit.
operations: load, push, pop, add, sub, div, mul. They are 
x86-like instructions.

I think that's all.

How should I design the get_reg() function?
I was using a stack-based to hold registers, but changed to 
current one (very very simple): it does keep the previously 
register returned in the function and return the "inverse of," 
eg, if previously was R0 does return R1 if R1 does return R0.
It does init always with R0 register to result of expression 
always be in R0. But if I run out of registers, one register is 
pushed on stack and result might don't live in R0. So, how do I 
chose the register to result expression always be in it? A 
working code example could be very great!
I also want to someone tell me if is correct my 
label()/sethiUllman() implementation.

I translated directly this code from C/C++(since post non-D code 
one could say it doesn't make much sense) that's the language I'm 
using and keep it in the C-way as possible, ie, not using too 
much D features because I want to translate it back to my C one 
when I get it working. It's because D compiler is written in C++ 
and then I want to be able read dmd compiler source code, in case 
I get involved to, what I really want to.

Thanks in advance.


Here's the code:

http://pastebin.com/vP0XtyVi (pastebin version, in case of found 
it's more readable)

import std.stdio;

enum Type {
	number,
	id,
	add,
	sub,
	mul,
	div,
	push,
	pop,
	none
}

enum Reg {
	none,
	r0,
	r1
}

class AST {
	AST left;
	AST right;
	Type type;
	Reg reg;
	int n;

	this(AST l, AST r, Type t) {
		left = l;
		right = r;
		type = t;
		n = 0;
	}
}

class BinExpression : AST {
	this(AST l, AST r, Type t) {
		super(l, r, t);
	}
}

class Identifier : AST {
	string name;
	
	this(string nm) {
		super(null, null, Type.id);
		name = nm;
	}
}

class Number : AST {
	int number;
	
	this(int n) {
		super(null, null, Type.number);
		number = n;
	}
}


Reg[] regs = [ Reg.r0, Reg.r1 ];
const int regs_num = regs.sizeof / int.sizeof;
int C = 0;

void main() {
	// ((2 + 2) + (2 + 2)) + ((2 + 2) + (2 + 2))
	// t1 = 2 + 2
	// t2 = 2 + 2
	// t3 = t1 + t2
	// t4 = 2 + 2
	// t5 = 2 + 2
	// t6 = t4 + t5
	// t7 = t3 + t6
	Number a = new Number(2);
	Number b = new Number(2);

	Number c = new Number(2);
	Number d = new Number(2);
	
	Number e = new Number(2);
	Number f = new Number(2);
	
	Number g = new Number(2);
	Number h = new Number(2);
	
	
	// t1 = 2 + 2
	BinExpression t1 = new BinExpression(a, b, Type.add);
	// t2 = 2 + 2
	BinExpression t2 = new BinExpression(c, d, Type.add);
	// t3 = t1 + t2
	BinExpression t3 = new BinExpression(t1, t2, Type.add);
	// t4 = 2 + 2
	BinExpression t4 = new BinExpression(e, f, Type.add);
	// t5 = 2 + 2
	BinExpression t5 = new BinExpression(g, h, Type.add);
	// t5 = t3 + t4
	BinExpression t6 = new BinExpression(t4, t5,Type.add);
	BinExpression t7 = new BinExpression(t3, t6, Type.add);
	label(t7);
	gen(t7);
}

Reg get_reg() {	
	if(C == regs.sizeof / int.sizeof) {
		//writeln("out of registers!");
		C = 0;
	}
	
	return regs[C++];
}

void gen(AST ast) {
	if(ast.left !is null && ast.right !is null) {
		int l = ast.left.n;
		int r = ast.right.n;
		
		if(l >= regs_num && r >= regs_num) {
			gen(ast.right);
			ast.n -= 1;
			//Reg r2 = ast.right.reg;
			emit_operation(Type.push, ast.right.reg);
			gen(ast.left);
			ast.reg = get_reg();
			emit_operation(Type.pop, ast.right.reg);
			//push_reg(r2);
		} else if(l >= r) {
			gen(ast.left);
			gen(ast.right);
			ast.n -= 1;
		} else if(l < r) {
			gen(ast.right);
			gen(ast.left);
			ast.n -= 1;
		}
		
		ast.reg = get_reg();
		Reg r1 = ast.left.reg;
		Reg r2 = ast.right.reg;
		emit_operation(ast.type, r1, r2);
		// result is in r1, so free r2.
		//push_reg(r2);
	} else if(ast.type == Type.id || ast.type == Type.number) {
		ast.n += 1;
		ast.reg = get_reg();
		emit_load(ast);
	} else {
		writeln("gen() error");
		// error
	}
}

void label(AST ast) {
	if(ast is null)
		return;
  	
	label(ast.left);
	label(ast.right);
	
	if(ast.type == Type.id || ast.type == Type.number)
		ast.n = 1;
	// ast has two childrens
	else if(ast.left !is null && ast.right !is null) {		
		int l = ast.left.n;
		int r = ast.right.n;
		
		if(l == r)
			ast.n = 1 + l;
		else
			ast.n = max(1, l, r);
	}
	// ast has one child
	else if(ast.left !is null && ast.right is null)
		ast.n = ast.left.n;
	else
		writeln("label() error!");
}

int max(int x, int y, int z) {
	return max(x, max(y, z));
}

int max(int a, int b) {
	return a > b ? a : b;
}

void emit_operation(Type op, Reg r1, Reg r2)
{	
	writefln("\t%s %s,%s",
					 op_as_string(op),
					 reg_as_string(r1),
					 reg_as_string(r2));
}

void emit_load(AST ast) {	
	switch(ast.type) {
		case Type.id:
			writefln("\t LOAD %s,[%s]", reg_as_string(ast.reg), 
(cast(Identifier)ast).name);
			break;
		case Type.number:
			writefln("\t LOAD %s,%d", reg_as_string(ast.reg), 
(cast(Number)ast).number);
			break;
		default:
			writeln("emitLoad() error!");
	}
}

void emit_operation(Type op, Reg r)
{	
	writefln("\t%s %s",
					 op_as_string(op),
					 reg_as_string(r));
}

string reg_as_string(Reg r) {
	switch(r) {
		case Reg.r0:   return "R0";
		case Reg.r1:   return "R1";
		case Reg.none: return "NONE";
		default:     return "UNKNOW";
	}
}

string op_as_string(Type t) {
	switch(t) {
		case Type.add: return "ADD";
		case Type.sub: return "SUB";
		case Type.mul: return "MUL";
		case Type.div: return "DIV";
		case Type.push: return "push";
		case Type.pop: return "pop";	
		default:     return "UNKNOW";
	}
}
May 06 2014