www.digitalmars.com         C & C++   DMDScript  

digitalmars.D - SSE asm with functions

reply Byron <bheads emich.edu> writes:
In the attached file xmm.d I have a function xnormal that takes a vector (
alias float[4] )  an computes the
unit vector.  The SSE code seems to work fine, but it keeps returning [nan,
nan, nan, nan ] and the
writeln prints the same. But if I change the return from r ( output vector ) to
v ( input vector ) it prints
the correct normal vector, and returns the input vector.  Is this my bug or a
compiler bug?
DMD32 v2.053  OS X

const(vector) xnormal( ref const(vector) v )
{
    vector r;
    asm
    {
        mov EAX, v;
        movups XMM0, [EAX]; //load vector
        movaps XMM2, XMM0; // copy original data

        // find x^2 + y^2 + z^2 + w^2
        mulps XMM0, XMM0; // xx, yy, zz, ww
        movaps XMM1, XMM0; // copy, cause we will write into X0
        shufps XMM0, XMM1, 0x4e; // 0100 1110 zwxy
        addps XMM0, XMM1; // xyzw + zwxy

        movaps XMM1, XMM0; // copy, cause we will write into X0
        shufps XMM0, XMM1, 0x11; // 0001 0001 (y+w)(x+z)(y+w)(x+z)
        addps XMM0, XMM1; // (x+z)(y+w)(z+x)(w+y) + (y+w)(x+z)(y+w)(x+z)
                          // (x+z+y+w)(y+w+x+z)(z+x+y+w)(w+y+x+z)

        rsqrtps XMM0, XMM0; // 1/sqrt(XMM0)
        mulps XMM2, XMM0; // x/sqrt(x^2+y^2+z^2+w^2) , ...
        movups r, XMM2;
    }
    writeln( "Result: ", r, "\t", v );
    return r;
}

I would like to use D for a thesis projects, but wont be able to if its still
this buggy.

-Byron
begin 644 xmm.d
M;6]D=6QE('AM;3L*"FEM<&]R="!S=&0N<W1D:6\L('-T9"YM871H.PH*86QI
M87, 9FQO871;-%T =F5C=&]R.PH*8V]N<W0H=F5C=&]R*2!N;W)M86PH(')E
M9B!C;VYS="AV96-T;W(I('8 *0I["B` ("!V96-T;W( <F5T.PH ("` 9FQO
M870 ;&5N(#T ,#L


M("H ;&5N.PH*("` (')E='5R;B!R970["GT*"G9E8W1O<B!N;W)M86QI>F4H

M86-H*"!I.R`P+BXT("D*("` ("` ("!L96X *ST =EMI72`J('9;:5T["B` 
M("!L96X /2`Q+C!F+W-Q<G0H;&5N*3L*"B` ("!F;W)E86-H*"!I.R`P+BXT
M("D*("` ("` ("!V6VE=("H](&QE;CL*"B` ("!R971U<FX =CL*?0H*8V]N
M<W0H=F5C=&]R*2!X;F]R;6%L*"!R968 8V]N<W0H=F5C=&]R*2!V("D*>PH 
M("` =F5C=&]R('(["B` ("!A<VT*("` ('L*("` ("` ("!M;W8 14%8+"!V
M.PH ("` ("` (&UO=G5P<R!834TP+"!;14%873L +R]L;V%D('9E8W1O< H 
M("` ("` (&UO=F%P<R!834TR+"!834TP.R`O+R!C;W!Y(&]R:6=I;F%L(&1A
M=&$*"B` ("` ("` +R\ 9FEN9"!X7C( *R!Y7C( *R!Z7C( *R!W7C(*("` 
M("` ("!M=6QP<R!834TP+"!834TP.R`O+R!X>"P >7DL('IZ+"!W=PH ("` 
M("` (&UO=F%P<R!834TQ+"!834TP.R`O+R!C;W!Y+"!C875S92!W92!W:6QL


M.R`O+R!X>7IW("L >G=X>0H*("` ("` ("!M;W9A<', 6$U-,2P 6$U-,#L 
M+R\ 8V]P>2P 8V%U<V4 =V4 =VEL;"!W<FET92!I;G1O(% P"B` ("` ("` 

M*WHI*'DK=RDH>"MZ*0H ("` ("` (&%D9'!S(%A-33`L(%A-33$[("\O("AX
M*WHI*'DK=RDH>BMX*2AW*WDI("L *'DK=RDH>"MZ*2AY*W<I*' K>BD*("` 
M("` ("` ("` ("` ("` ("` ("` ("`O+R`H>"MZ*WDK=RDH>2MW*W K>BDH
M>BMX*WDK=RDH=RMY*W K>BD*("` ("` ("`*("` ("` ("!R<W%R='!S(%A-

M(%A-33`[("\O(' O<W%R="AX7C(K>5XR*WI>,BMW7C(I("P +BXN"B` ("` 
M("` ;6]V=7!S('(L(%A-33(["B` ("!]"B` ("!W<FET96QN*"`B4F5S=6QT
M.B`B+"!R+"`B7'0B+"!V("D["B` ("!R971U<FX <CL*?0H*=F5C=&]R('AN
M;W)M86QI>F4H(')E9B!V96-T;W( =B`I"GL*("` (&%S;0H ("` >PH ("` 
M("` (&UO=B!%05 L('8["B` ("` ("` ;6]V=7!S(%A-33`L(%M%05A=.R`O
M+VQO860 =F5C=&]R"B` ("` ("` ;6]V87!S(%A-33(L(%A-33`[("\O(&-O
M<'D ;W)I9VEN86P 9&%T80H*("` ("` ("`O+R!F:6YD('A>,B`K('E>,B`K
M('I>,B`K('=>, H ("` ("` (&UU;'!S(%A-33`L(%A-33`[("\O('AX+"!Y
M>2P >GHL('=W"B` ("` ("` ;6]V87!S(%A-33$L(%A-33`[("\O(&-O<'DL
M(&-A=7-E('=E('=I;&P =W)I=&4 :6YT;R!8,`H ("` ("` ('-H=69P<R!8

M9'!S(%A-33`L(%A-33$[("\O('AY>G< *R!Z=WAY" H ("` ("` (&UO=F%P
M<R!834TQ+"!834TP.R`O+R!C;W!Y+"!C875S92!W92!W:6QL('=R:71E(&EN


M,"P 6$U-,3L +R\ *' K>BDH>2MW*2AZ*W I*'<K>2D *R`H>2MW*2AX*WHI
M*'DK=RDH>"MZ*0H ("` ("` ("` ("` ("` ("` ("` ("` ("\O("AX*WHK
M>2MW*2AY*W<K>"MZ*2AZ*W K>2MW*2AW*WDK>"MZ*0H ("` ("` (`H ("` 
M("` (')S<7)T<', 6$U-,"P 6$U-,#L +R\ ,2]S<7)T*%A-33`I"B` ("` 
M("` ;75L<', 6$U-,BP 6$U-,#L +R\ >"]S<7)T*'A>,BMY7C(K>EXR*W=>
M,BD +"`N+BX*("` ("` ("!M;W9U<', 6T5!6%TL(%A-33(["B` ("!]"B` 
M("!R971U<FX =CL*?0H*=F]I9"!M86EN*"D*>PH*("` ('9E8W1O<B!V,2`]

M=W)I=&5L;B  (EQN4F5T=7)N(&YE=R!N;W)M86QI>F5D('9E8W1O<G,B("D[
M"B` ("!W<FET96QN*"`B4U-%.B`B+"!V,2P (EQT(BP ('AN;W)M86PH('8Q
M("DI.PH ("` =W)I=&5L;B  (F9O<F5A8V Z("(L('8R+"`B7'0B+"!N;W)M
M86PH('8R("DI.PH*("` ('=R:71E;&XH(")<;DYO<FUA;&EZ92!I;B!P;&%C
M92( *3L*("` ('=R:71E;&XH(")34T4Z("(L('8Q+"`B7'0B+"` >&YO<FUA
M;&EZ92  =C$ *2D["B` ("!W<FET96QN*"`B9F]R96%C:#H (BP =C(L(")<
9="(L(&YO<FUA;&EZ92  =C( *2D["GT*" ``
`
end
Jun 15 2011
next sibling parent bearophile <bearophileHUGS lycos.com> writes:
Byron:

 Is this my bug or a compiler bug?
DMD doesn't compile asm code. My suggestion is to keep reducing your code until you understand what's going on. Also, a disassembler helps a bit here. Bye, bearophile
Jun 15 2011
prev sibling parent reply Timon Gehr <timon.gehr gmx.ch> writes:
Byron wrote:
 In the attached file xmm.d I have a function xnormal that takes a vector (
alias
float[4] ) an computes the
 unit vector.  The SSE code seems to work fine, but it keeps returning [nan,
nan,
nan, nan ] and the
 writeln prints the same. But if I change the return from r ( output vector ) to
v ( input vector ) it prints
 the correct normal vector, and returns the input vector.  Is this my bug or a
compiler bug?
 DMD32 v2.053  OS X

 const(vector) xnormal( ref const(vector) v )
 {
     vector r;
     asm
     {
         mov EAX, v;
         movups XMM0, [EAX]; //load vector
         movaps XMM2, XMM0; // copy original data

         // find x^2 + y^2 + z^2 + w^2
         mulps XMM0, XMM0; // xx, yy, zz, ww
         movaps XMM1, XMM0; // copy, cause we will write into X0
         shufps XMM0, XMM1, 0x4e; // 0100 1110 zwxy
         addps XMM0, XMM1; // xyzw + zwxy

         movaps XMM1, XMM0; // copy, cause we will write into X0
         shufps XMM0, XMM1, 0x11; // 0001 0001 (y+w)(x+z)(y+w)(x+z)
         addps XMM0, XMM1; // (x+z)(y+w)(z+x)(w+y) + (y+w)(x+z)(y+w)(x+z)
                           // (x+z+y+w)(y+w+x+z)(z+x+y+w)(w+y+x+z)

         rsqrtps XMM0, XMM0; // 1/sqrt(XMM0)
         mulps XMM2, XMM0; // x/sqrt(x^2+y^2+z^2+w^2) , ...
         movups r, XMM2;
     }
     writeln( "Result: ", r, "\t", v );
     return r;
 }

 I would like to use D for a thesis projects, but wont be able to if its still
this buggy.
 -Byron
 << xmm.d >>
It seems it is a backend bug in DMD as the same code works just fine with GDC. (frontend version 2.052 though, this might need some further investigation). Timon
Jun 15 2011
parent reply Byron <bheads emich.edu> writes:
I reduced the complexity of the problem, seems to be SSE and returning local
copies.

$ dmd -run db.d
v: [1, 2, 3, 4]
test1 r: [nan, nan, nan, nan]
test1: [nan, nan, nan, nan]
test2 r: [1, 2, 3, 4]
test2: [1, 2, 3, 4]
halle109-251:asm byro


//db.d
import std.stdio;

alias float[4] vector;

const(vector) test1( ref const(vector) v )
{
    vector r;
    asm
    {
        mov EAX, v;
        movups XMM0, [EAX];
        movups r, XMM0;
    }
    writeln( "test1 r: ", r );
    return r;
}

const(vector) test2( ref const(vector) v )
{
    vector r, s;
    asm
    {
        mov EAX, v;
        movups XMM0, [EAX];
        movups r, XMM0;
    }
    writeln( "test2 r: ", r );
    s = r;
    return s;
}

void main()
{
    vector v = [1,2,3,4];
    writeln( "v: ", v );
    writeln( "test1: ", test1(v));
    writeln( "test2: ", test2(v));
}


-Byron
begin 644 db.d
M:6UP;W)T('-T9"YS=&1I;SL*"F%L:6%S(&9L;V%T6S1=('9E8W1O<CL*"F-O

M("!V96-T;W( <CL*("` (&%S;0H ("` >PH ("` ("` (&UO=B!%05 L('8[
M"B` ("` ("` ;6]V=7!S(%A-33`L(%M%05A=.PH ("` ("` (&UO=G5P<R!R
M+"!834TP.PH ("` ?0H ("` =W)I=&5L;B  (G1E<W0Q('(Z("(L('( *3L*
M("` (')E='5R;B!R.PI]" IC;VYS="AV96-T;W(I('1E<W0R*"!R968 8V]N
M<W0H=F5C=&]R*2!V("D*>PH ("` =F5C=&]R('(L(',["B` ("!A<VT*("` 
M('L*("` ("` ("!M;W8 14%8+"!V.PH ("` ("` (&UO=G5P<R!834TP+"!;
M14%873L*("` ("` ("!M;W9U<', <BP 6$U-,#L*("` ('T*("` ('=R:71E
M;&XH(")T97-T,B!R.B`B+"!R("D["B` ("!S(#T <CL*("` (')E='5R;B!S



)=BDI.PI]" H*
`
end
Jun 16 2011
next sibling parent reply Byron <bheads emich.edu> writes:
Same problem with 64-bit dmd on ubuntu. ( change EAX to RAX )
Jun 16 2011
parent Walter Bright <newshound2 digitalmars.com> writes:
http://d.puremagic.com/issues/show_bug.cgi?id=6166
Jun 16 2011
prev sibling parent Walter Bright <newshound2 digitalmars.com> writes:
On 6/16/2011 10:20 AM, Byron wrote:
 I reduced the complexity of the problem, seems to be SSE and returning local
copies.
What you've run into is the "named return value" optimization. 'r' is rewritten by the compiler as a reference to a vector in the caller's stack frame, this avoids unnecessary copying when returning r. The trouble is, the inline assembler does no such rewrites. I'll file this in bugzilla. In the meantime, you've already discovered the workaround in test2().
Jun 16 2011