www.digitalmars.com         C & C++   DMDScript  

digitalmars.D.learn - Issues using the in-line assembler

reply solidstate1991 <laszloszeremi outlook.com> writes:
I have this code:
asm  nogc{
	movq		XMM0, xy;
	paddd		XMM0, sXY;	// xy + sXY
	movq		XMM3, xy0;
	psubd		XMM0, XMM3;	// xy + sXY - x0y0
	movq		XMM1, ac;
	movq		XMM2, bd;
	pmuludq		XMM1, XMM0;	// (ac * (xy + sXY - x0y0))
	psrlq		XMM1, 16;	// (ac * (xy + sXY - x0y0))>>16
	pmuludq		XMM2, XMM0; // (bd * (xy + sXY - x0y0))
	psrlq		XMM2, 16;	// (bd * (xy + sXY - x0y0))>>16
	paddq		XMM1, XMM2; // (bd * (xy + sXY - x0y0))>>16 * (ac * (xy + 
sXY - x0y0))>>16
	punpckldq	XMM3, XMM7;
	paddq		XMM1, XMM3;	// (bd * (xy + sXY - x0y0))>>16 * (ac * (xy + 
sXY - x0y0))>>16 + x0y0
	movups		XMM2, XMM1;	// Convert 64 bit vectors into 32 bit ones
	psrldq		XMM2, 4;
	por			XMM2, XMM1;	
	movq		result, XMM2;
}
I'm getting "bad type/size of operand 'movq'" error on xy0, ac, 
and bd when I try to compile it. All of the values are the type 
of int[2], xy is function parameter, sXY is created locally. How 
can I fix it?
Apr 04 2018
next sibling parent solidstate1991 <laszloszeremi outlook.com> writes:
I forgot to tell, that xy0 ac, and bd are local to the class.
Apr 04 2018
prev sibling parent reply Basile B. <b2.temp gmx.com> writes:
On Wednesday, 4 April 2018 at 21:00:44 UTC, solidstate1991 wrote:
 I have this code:
 asm  nogc{
 	movq		XMM0, xy;
 	paddd		XMM0, sXY;	// xy + sXY
 	movq		XMM3, xy0;
 	psubd		XMM0, XMM3;	// xy + sXY - x0y0
 	movq		XMM1, ac;
 	movq		XMM2, bd;
 	pmuludq		XMM1, XMM0;	// (ac * (xy + sXY - x0y0))
 	psrlq		XMM1, 16;	// (ac * (xy + sXY - x0y0))>>16
 	pmuludq		XMM2, XMM0; // (bd * (xy + sXY - x0y0))
 	psrlq		XMM2, 16;	// (bd * (xy + sXY - x0y0))>>16
 	paddq		XMM1, XMM2; // (bd * (xy + sXY - x0y0))>>16 * (ac * (xy 
 + sXY - x0y0))>>16
 	punpckldq	XMM3, XMM7;
 	paddq		XMM1, XMM3;	// (bd * (xy + sXY - x0y0))>>16 * (ac * (xy 
 + sXY - x0y0))>>16 + x0y0
 	movups		XMM2, XMM1;	// Convert 64 bit vectors into 32 bit ones
 	psrldq		XMM2, 4;
 	por			XMM2, XMM1;	
 	movq		result, XMM2;
 }
 I'm getting "bad type/size of operand 'movq'" error on xy0, ac, 
 and bd when I try to compile it. All of the values are the type 
 of int[2], xy is function parameter, sXY is created locally. 
 How can I fix it?
The "this" seems to be in R11, so you have to apply the asm syntax for accessing the members using <Type>.offsetof.<member>[R11], example: ``` class Foo { double a = 123456; extern(D) double foo() { asm { naked; movq XMM0, Foo.a.offsetof[R11]; ret; } } } void main() { import std.stdio; (new Foo).foo(0,0).writeln; } ``` However i cant find any specification saying that R11 is "this". With a free function just pass the instance as param and replace R11 by the register where the instance is passed.
Apr 04 2018
next sibling parent Basile B. <b2.temp gmx.com> writes:
On Thursday, 5 April 2018 at 04:48:02 UTC, Basile B. wrote:
 On Wednesday, 4 April 2018 at 21:00:44 UTC, solidstate1991 
 wrote:
 void main()
 {
     import std.stdio;
     (new Foo).foo(0,0).writeln;
 }
 ```
Ah sorry, the params must be removed ((new Foo).foo().writeln;)... I was actually trying to play with params and extern linkage to see R11 always work...
Apr 04 2018
prev sibling parent reply solidstate1991 <laszloszeremi outlook.com> writes:
On Thursday, 5 April 2018 at 04:48:02 UTC, Basile B. wrote:
 The "this" seems to be in R11, so you have to apply the asm 
 syntax for accessing the members using 
 <Type>.offsetof.<member>[R11], example:

 ```
 class Foo
 {
     double a = 123456;
     extern(D) double foo()
     {
         asm
         {
             naked;
             movq    XMM0, Foo.a.offsetof[R11];
             ret;
         }
     }
 }

 void main()
 {
     import std.stdio;
     (new Foo).foo(0,0).writeln;
 }
 ```

 However i cant find any specification saying that R11 is "this".
 With a free function just pass the instance as param and 
 replace R11 by the register where the instance is passed.
It seems that the compiler lets it through if I change it like this: asm nogc{ naked; movd XMM1, dword ptr sX[EBP]; pslldq XMM1, 4; movss XMM1, dword ptr sY[EBP]; movq XMM0, xy; paddd XMM0, XMM1; // [x,y] + [sX,sY] movq XMM3, qword ptr xy0[EBP]; psubd XMM0, XMM3; // ([x,y] + [sX,sY] - [x_0,y_0]) movq XMM1, qword ptr ac[EBP]; movq XMM2, qword ptr bd[EBP]; pmuludq XMM1, XMM0; // [A,0,C,0] * ([x,y] + [sX,sY] - [x_0,y_0]) psrlq XMM1, 16; // ([A,0,C,0] * ([x,y] + [sX,sY] - [x_0,y_0]))>>16 movups XMM4, XMM0; psrldq XMM4, 4; pslldq XMM0, 4; por XMM4, XMM0; pmuludq XMM2, XMM4; // [0,B,0,D] * ([x,y] + [sX,sY] - [x_0,y_0]) psrlq XMM2, 16; // ([0,B,0,D] * ([x,y] + [sX,sY] - [x_0,y_0]))>>16 paddq XMM1, XMM2; // ([A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]))>>16 punpckldq XMM3, XMM7; paddq XMM1, XMM3; // ([A,B,C,D] * ([x,y] + [sX,sY] - [x_0,y_0]))>>16 + [x_0,y_0] movups XMM0, XMM1; // Convert 64 bit vectors into 32 bit ones psrldq XMM0, 4; por XMM0, XMM1; ret ; } I wonder if I can return an int[2] in XMM0. I can do some modifications to either move the result to the stack first, or add an import to core.simd (which needs to be refactored completely) and instead make the return type int4 on SIMD enabled CPUs.
Apr 05 2018
parent solidstate1991 <laszloszeremi outlook.com> writes:
Seems I found a better solution hidden in the docs:

 nogc protected int[2] transformFunc(int[2] xy){
version(X86){
asm  nogc{
	naked;
	mov			EBX, this;
	movd		XMM1, sX[EBX];
	pslldq		XMM1, 4;
	movss		XMM1, sY[EBX];
	movq		XMM0, xy;
	paddd		XMM0, XMM1;	// [x,y] + [sX,sY]
	movq		XMM3, xy0[EBX];
	psubd		XMM0, XMM3;	// ([x,y] + [sX,sY] - [x_0,y_0])
	movq		XMM1, ac[EBX];
	movq		XMM2, bd[EBX];
	pmuludq		XMM1, XMM0;	// [A,0,C,0] * ([x,y] + [sX,sY] - [x_0,y_0])
	psrlq		XMM1, 16;	// ([A,0,C,0] * ([x,y] + [sX,sY] - 
[x_0,y_0]))>>16
	movups		XMM4, XMM0;
	psrldq		XMM4, 4;
	pslldq		XMM0, 4;
	por			XMM4, XMM0;
	pmuludq		XMM2, XMM4; // [0,B,0,D] * ([x,y] + [sX,sY] - [x_0,y_0])
	psrlq		XMM2, 16;	// ([0,B,0,D] * ([x,y] + [sX,sY] - 
[x_0,y_0]))>>16
	paddq		XMM1, XMM2; // ([A,B,C,D] * ([x,y] + [sX,sY] - 
[x_0,y_0]))>>16
	punpckldq	XMM3, XMM7;
	paddq		XMM1, XMM3;	// ([A,B,C,D] * ([x,y] + [sX,sY] - 
[x_0,y_0]))>>16 + [x_0,y_0]
	movups		XMM0, XMM1;	// Convert 64 bit vectors into 32 bit ones
	psrldq		XMM0, 4;
	por			XMM0, XMM1;	
	ret			;
}
}(...)
}
Apr 05 2018