digitalmars.D.announce - Template BigNum implemented without runtime loops
- BCS (320/320) Dec 11 2006 I have implemented a arbitrary precision integer type using a struct and...
- Don Clugston (10/19) Dec 12 2006 Of course, there's not much value in unrolling loops to that extent
- BCS (8/25) Dec 12 2006 Yah, unrolling the 2Kb version probably won't gain much, however I
- BCS (5/5) Dec 13 2006 version next.bugfix
I have implemented a arbitrary precision integer type using a struct and templates. Currently it supports ==, +, -, *, +=, -= and *=. It should be fairly efficient as it uses static foreaches and asm blocks to unroll all of the looping and conditionals. I haven't tested the multiplication much yet so it may well contain bugs. (If it doesn't I will be vary surprised) On my system DMD crashes for size > 73 (e.i. 73 * 32b = 2336b). Here it is. <code> import std.stdio; debug { template decimalDigit(int n) { const char[] decimalDigit = "0123456789"[n..n+1]; } template itoa(long n) { static if (n < 0) const char[] itoa = "-" ~ itoa!(-n); else static if (n < 10) const char[] itoa = decimalDigit!(n); else const char[] itoa = itoa!(n/10L) ~ decimalDigit!(n%10L); } } unittest { BigNum!(5) tmp,add,sum; tmp.set = uint .max; assert(tmp.values[0] == uint.max, "set(uint) failed"); assert(tmp == uint.max, "opEqual(uint) false neg"); assert(tmp != 1, "opEqual(uint) false pos"); add.set = 1; assert(add.values[0] == 1, "set(uint) failed"); assert(add == 1, "opEqual(uint) false neg"); assert(add != uint.max, "opEqual(uint) false pos"); tmp.values[1] = uint.max; sum = tmp+add; assert(sum.values[0] == 0, "low word failed"); assert(sum.values[1] == 0, "carry word failed"); assert(sum.values[2] == 1, "high word failed"); sum = tmp; sum += add; assert(sum.values[0] == 0, "low word failed"); assert(sum.values[1] == 0, "carry word failed"); assert(sum.values[2] == 1, "high word failed"); tmp.values[2] = tmp.values[0] = 0; tmp.values[1] = 1; sum = tmp - add; assert(sum.values[0] == uint.max, "low word failed"); assert(sum.values[1] == 0, "borrow word failed"); assert(sum.values[2] == 0, "high word failed"); sum = tmp; sum -= add; assert(sum.values[0] == uint.max, "low word failed"); assert(sum.values[1] == 0, "borrow word failed"); assert(sum.values[2] == 0, "high word failed"); sum = tmp*add; } template BigNum(uint size) { static if(size > 73) pragma(mgs, "Ok, I'll try it. But don't say I didn't warn you!!!"); static if(size <= 2) static assert(false, "BigNum!(size) is not valid for size <= 2"); else alias build!(size, size-2, size-1) BigNum; } template build(uint size, uint i, V...) { static if(i==1) alias use!(size, 1, V) build; else alias build!(size, i-1, i, V) build; } template use(uint size, V...) { struct use { //DMD0.174 seg-v for size > 73 static if(size > 73) pragma(mgs, "You have set a new world record!!!!"); static assert(0 == values.offsetof); uint[size] values; void Emit() { for(int i=size-1; i>0; i--) writef("%x:",values[i]); writef("%x\n",values[0]); } void set(uint val) { values[0] = val; static if(size>1) foreach(inout v; values[1..$]) v = 0; } bool opEquals(uint i) { if (values[0] != i) return false; static if(size>1) foreach(inout v; values[1..$]) if(v != 0) return false; return true; } bool opEquals(use!(size, V) i) { return i.values[] == values[]; } use!(size, V) opAdd(use!(size, V) to) { uint* ths = this.values.ptr; uint* dest = to.values.ptr; // DON'T USE "this" after this line!!!!!!! asm { mov EAX, ths[0]; mov EBX, dest[0]; mov EDX, [EAX+0]; add [EBX+0], EDX; } foreach(j,i; V) { const uint off = V[j]*4; // this is a work-around asm { mov EDX, [EAX+off]; adc [EBX+off], EDX; } } return to; } use!(size, V) opSub(use!(size, V) to) { uint* ths = this.values.ptr; uint* dest = to.values.ptr; // DON'T USE "this" after this line!!!!!!! asm { // dest = ths - dest mov EAX, ths[0]; // dest = *EAX - dest mov EBX, dest[0]; // dest = *EAX - *EBX mov EDX, [EAX+0]; // dest = EDX - *EBX sub EDX, [EBX+0]; // EDX = EDX - *EBX mov [EBX+0], EDX; // *EBX // dest } foreach(j,i; V) { const uint off = V[j]*4; // this is a work-around asm { // dest = ths - dest // dest = *EAX - *EBX mov EDX, [EAX+off]; // dest = EDX - *EBX sbb EDX, [EBX+off]; // EDX = EDX - ECX mov [EBX+off], EDX; // *EBX // dest } } return to; } use!(size, V) opMul(use!(size, V) to) { uint* ths = this.values.ptr; use!(size, V) ret; // DON'T USE "this" after this line!!!!!!! // EDX:EAX mul in/out asm { // load values.ptr[0] -> ECX mov ECX, ths[0]; mov ECX, [ECX]; // load to.values.ptr[0] -> EAX mov EAX, to[0]; // EDX:EAX = mul(EAX, ECX) imul ECX; // load EAX -> ret.values[0] mov ret[0], EAX; // load EDX -> ret.values[1] mov ret[1], EDX; } debug pragma(msg, itoa!(__LINE__)~": set ret[0] from ths[0] and to[0]"); debug pragma(msg, itoa!(__LINE__)~": set ret[1] from ths[0] and to[0]"); foreach(i,j; V) { // get rank of this row const uint OutI = V[i]; // this is a work-around asm { // load values.ptr[OutI] -> ECX mov ECX, ths; mov ECX, [ECX+OutI]; // load to.values.ptr[0] -> EAX mov EAX, to[0]; // EDX:EAX = mul(EAX, ECX) imul ECX; // load ret.values[OutI] -> EBX mov EBX, ret[OutI]; // EBX = add(EBX, EAX) add EBX, EAX; // load EBX -> ret.values[OutI] mov ret[OutI], EBX; } debug pragma(msg, itoa!(__LINE__)~": set ret["~itoa!(OutI)~"] from ths["~itoa!(OutI)~"] and to[0]"); static if(OutI+1 < size) { asm { // load ret.values[OutI+1] -> EBX mov EBX, ret[OutI+1]; // EBX = adc(EBX, EDX) adc EBX, EDX; // load EBX -> ret.values[OutI+1] mov ret[OutI+1], EBX; nop; } debug pragma(msg, itoa!(__LINE__)~": set ret["~itoa!(OutI+1)~"] from ths["~itoa!(OutI)~"] and to[0]"); } foreach(k,l; V) { const uint InI = V[i]; // this is a work-around const uint DesI = OutI + InI; static if(DesI < size) { asm { // load to.values.ptr[InI] -> EAX mov EAX, to[InI]; // EDX:EAX = mul(EAX, ECX) imul ECX; // load ret.values[DesI] -> EBX mov EBX, ret[DesI]; // EBX = add(EBX, EAX) add EBX, EAX; // load EBX -> ret.values[DesI] mov ret[DesI], EBX; } debug pragma(msg, itoa!(__LINE__)~": set ret["~itoa!(DesI)~"] from ths["~itoa!(OutI)~"] and to["~itoa!(InI)~"]"); static if(DesI+1 < size) { asm { // load ret.values[DesI+1] -> EBX mov EBX, ret[DesI+1]; // EBX = adc(EBX, EDX) adc EBX, EDX; // load EBX -> ret.values[DesI+1] mov ret[DesI+1], EBX; } debug pragma(msg, itoa!(__LINE__)~": set ret["~itoa!(DesI+1)~"] from ths["~itoa!(OutI)~"] and to["~itoa!(InI)~"]"); } } } } return ret; } use!(size, V) opAddAssign(use!(size, V) to) { uint* ths = this.values.ptr; uint* dest = to.values.ptr; // DON'T USE "this" after this line!!!!!!! asm { mov EBX, ths[0]; mov EAX, dest[0]; mov EDX, [EAX+0]; add [EBX+0], EDX; // no store needed because dest is memory } foreach(j,i; V) { const uint off = V[j]*4;// this is a work-around asm { mov EDX, [EAX+off]; adc [EBX+off], EDX; // no store needed because dest is memory } } return to; } use!(size, V) opSubAssign(use!(size, V) to) { uint* ths = this.values.ptr; uint* dest = to.values.ptr; // DON'T USE "this" after this line!!!!!!! asm { mov EBX, ths[0]; mov EAX, dest[0]; mov EDX, [EAX+0]; sub [EBX+0], EDX; // no store needed because dest is memory } foreach(j,i; V) { const uint off = V[j]*4;// this is a work-around asm { mov EDX, [EAX+off]; sbb [EBX+off], EDX; // no store needed because dest is memory } } return to; } use!(size, V) opMulAssign(use!(size, V) to) { return *this = opMul(to); } } } </code>
Dec 11 2006
BCS wrote:I have implemented a arbitrary precision integer type using a struct and templates. Currently it supports ==, +, -, *, +=, -= and *=. It should be fairly efficient as it uses static foreaches and asm blocks to unroll all of the looping and conditionals. I haven't tested the multiplication much yet so it may well contain bugs. (If it doesn't I will be vary surprised) On my system DMD crashes for size > 73 (e.i. 73 * 32b = 2336b).Of course, there's not much value in unrolling loops to that extent (cache misses will cost you more than you'll gain by eliminating the last couple of branch instructions). However, the use of tuples and static foreach to enforce asm unrolling is very cool. This ought to work with mixins as well as direct asm instructions. I'd never found a way of mixing in multiple blocks of code at the same level -- this technique will probably solve the problem. (And it might be a way of doing very high performance expression templates :-) ).
Dec 12 2006
Don Clugston wrote:BCS wrote:[...]I have implemented a arbitrary precision integer type using a struct and templates. Currently it supports ==, +, -, *, +=, -= and *=.Yah, unrolling the 2Kb version probably won't gain much, however I expect that it would do 128b/256b and such just fine. Hmm, I'll have to look into having it do a loop for the higher size values and then benchmark the two versions.On my system DMD crashes for size > 73 (e.i. 73 * 32b = 2336b).Of course, there's not much value in unrolling loops to that extent (cache misses will cost you more than you'll gain by eliminating the last couple of branch instructions).However, the use of tuples and static foreach to enforce asm unrolling is very cool. This ought to work with mixins as well as direct asm instructions. I'd never found a way of mixing in multiple blocks of code at the same level -- this technique will probably solve the problem. (And it might be a way of doing very high performance expression templates :-) ).I guess it does more as a proof of concept that as a REALLY high precision type.
Dec 12 2006
version next.bugfix There was a typo bug and a logical bug in the opMul code. Also opAssign(uint) is now implemented as well as opDiv(uint) and opDivAssign(uint). http://www.webpages.uidaho.edu/~shro8822/bignum.d
Dec 13 2006