digitalmars.D.announce - Mini numerical optimizer in D
- Sean O'Connor (124/124) Aug 20 2018 Mini numerical optimizer in D:
Mini numerical optimizer in D: https://github.com/S6Regen/Dopt I also have this Walsh Hadamard code in D for Linux AMD 64: // Linux AMD64 extern(C) void hsixteen(float* x,ulong n,float scale){ asm{ naked; shufps XMM0,XMM0,0; align 16; h16: sub RSI,16; movups XMM1,[RDI]; movups XMM2,[RDI+16]; movups XMM3,[RDI+2*16]; movups XMM4,[RDI+3*16]; movups XMM5,XMM1; movups XMM6,XMM3; haddps XMM1,XMM2; haddps XMM3,XMM4; hsubps XMM5,XMM2; hsubps XMM6,XMM4; movups XMM2,XMM1; movups XMM4,XMM3; haddps XMM1,XMM5; haddps XMM3,XMM6; hsubps XMM2,XMM5; hsubps XMM4,XMM6; movups XMM5,XMM1; movups XMM6,XMM3; haddps XMM1,XMM2; haddps XMM3,XMM4; hsubps XMM5,XMM2; hsubps XMM6,XMM4; movups XMM2,XMM1; movups XMM4,XMM5; addps XMM1,XMM3; addps XMM5,XMM6; subps XMM2,XMM3; subps XMM4,XMM6; mulps XMM1,XMM0; mulps XMM5,XMM0; mulps XMM2,XMM0; mulps XMM4,XMM0; movups [RDI],XMM1; movups [RDI+16],XMM5; movups [RDI+2*16],XMM2; movups [RDI+3*16],XMM4; lea RDI,[RDI+64]; jnz h16; ret; } } extern(C) void hgap(float* x,ulong gap,ulong n){ asm{ naked; mov RCX,RSI; lea R8,[RDI+4*RSI]; shr RDX,1; align 16; hgaploop: sub RCX,16; movups XMM0,[RDI]; movups XMM1,[RDI+16]; movups XMM2,[RDI+2*16]; movups XMM3,[RDI+3*16]; movups XMM8,[R8]; movups XMM9,[R8+16]; movups XMM10,[R8+2*16]; movups XMM11,[R8+3*16]; movups XMM4,XMM0; movups XMM5,XMM1; movups XMM6,XMM2; movups XMM7,XMM3; addps XMM0,XMM8; addps XMM1,XMM9; addps XMM2,XMM10; addps XMM3,XMM11; subps XMM4,XMM8; subps XMM5,XMM9; subps XMM6,XMM10; subps XMM7,XMM11; movups [RDI],XMM0; movups [RDI+16],XMM1; movups [RDI+2*16],XMM2; movups [RDI+3*16],XMM3; movups [R8],XMM4; movups [R8+16],XMM5; movups [R8+2*16],XMM6; movups [R8+3*16],XMM7; lea RDI,[RDI+64]; lea R8,[R8+64]; jnz hgaploop; sub RDX,RSI; mov RCX,RSI; mov RDI,R8; lea R8,[R8+4*RSI]; jnz hgaploop; ret; } } void wht(float[] vec){ const ulong lim=8192; const ulong n=vec.length; ulong gap,k; float scale=1f/sqrt(to!float(n)); k=n; if( k>lim) k=lim; for(ulong i=0;i<n;i+=lim){ hsixteen(&vec[i],k,scale); gap=16; while (gap<k){ hgap(&vec[i],gap,k); gap+=gap; } } while(gap<n){ hgap(&vec[0],gap,n); gap+=gap; } } It is the simplest algorithm in computer science least known, as I like to say. It actually has many uses. Eg. https://github.com/FALCONN-LIB/FFHT
Aug 20 2018