www.digitalmars.com         C & C++   DMDScript  

digitalmars.D.bugs - Bug in asm version of std.math.poly since 0.143

reply Grzegorz Adam Hankiewicz <fake dont.use> writes:
The following program, as extracted from phobos, outputs:

    $ ./asmbug
    The result should be 215.130000
    The C version outputs 215.130000
    The asm version outputs 56.100000

I don't know enough asm to solve this. Test program:

    import std.stdio;
    
    real poly_asm(real x, real[] A)
    in
    {
        assert(A.length > 0);
    }
    body
    {
        version (D_InlineAsm_X86)
        {
            asm     // assembler by W. Bright
            {
                // EDX = (A.length - 1) * real.sizeof
                mov     ECX,A[EBP]          ; // ECX = A.length
                dec     ECX                 ;
                lea     EDX,[ECX][ECX*8]    ;
                add     EDX,ECX             ;
                add     EDX,A+4[EBP]        ;
                fld     real ptr [EDX]      ; // ST0 = coeff[ECX]
                jecxz   return_ST           ;
                fld     x[EBP]              ; // ST0 = x
                fxch    ST(1)               ; // ST1 = x, ST0 = r
                align   4                   ;
        L2:     fmul    ST,ST(1)            ; // r *= x
                fld     real ptr -10[EDX]   ;
                sub     EDX,10              ; // deg--
                faddp   ST(1),ST            ;
                dec     ECX                 ;
                jne     L2                  ;
                fxch    ST(1)               ; // ST1 = r, ST0 = x
                fstp    ST(0)               ; // dump x
                align   4                   ;
        return_ST:                          ;
                ;
            }
        }
        else
        {
            writefl("Sorry, you don't seem to have InlineAsm_X86");
            return 0;
        }
    }
    
    real poly_c(real x, real[] A)
    in
    {
        assert(A.length > 0);
    }
    body
    {
        int i = A.length - 1;
        real r = A[i];
        while (--i >= 0)
        {
            r *= x;
            r += A[i];
        }
        return r;
    }
    
    int main()
    {
        real x = 3.1;
        static real pp[] = [56.1, 32.7, 6];
    
        writefln("The result should be %f",(56.1L + (32.7L + 6L * x) * x));
        writefln("The C version outputs %f", poly_c(x, pp));
        writefln("The asm version outputs %f", poly_asm(x, pp));
        
        return 0;
    }
Feb 04 2006
parent reply Don Clugston <dac nospam.com.au> writes:
The code works correctly on DMD-Windows 0.145.
I think the problem is the extra alignment for reals  on Linux (12 
bytes) compared to Windows (10 bytes). Just a guess, but I think that 
this line:
                 sub     EDX,10              ; // deg--
should be
                 sub     EDX,12              ; // deg--
on Linux. This also needs to be changed:
                 lea     EDX,[ECX][ECX*8]    ;
                 add     EDX,ECX             ;
to
                 lea     EDX,[ECX][ECX*4]    ;
                 add     EDX,ECX             ;
add EDX,EDX which isn't optimal, but see if it works. If that doesn't work, try changing the line
                 fld     real ptr -10[EDX]   ;
to
                 fld     real ptr -12[EDX]   ;
Grzegorz Adam Hankiewicz wrote:
 The following program, as extracted from phobos, outputs:
 
     $ ./asmbug
     The result should be 215.130000
     The C version outputs 215.130000
     The asm version outputs 56.100000
 
 I don't know enough asm to solve this. Test program:
 
     import std.stdio;
     
     real poly_asm(real x, real[] A)
     in
     {
         assert(A.length > 0);
     }
     body
     {
         version (D_InlineAsm_X86)
         {
             asm     // assembler by W. Bright
             {
                 // EDX = (A.length - 1) * real.sizeof
                 mov     ECX,A[EBP]          ; // ECX = A.length
                 dec     ECX                 ;
                 lea     EDX,[ECX][ECX*8]    ;
                 add     EDX,ECX             ;
                 add     EDX,A+4[EBP]        ;
                 fld     real ptr [EDX]      ; // ST0 = coeff[ECX]
                 jecxz   return_ST           ;
                 fld     x[EBP]              ; // ST0 = x
                 fxch    ST(1)               ; // ST1 = x, ST0 = r
                 align   4                   ;
         L2:     fmul    ST,ST(1)            ; // r *= x
                 fld     real ptr -10[EDX]   ;
                 sub     EDX,10              ; // deg--
                 faddp   ST(1),ST            ;
                 dec     ECX                 ;
                 jne     L2                  ;
                 fxch    ST(1)               ; // ST1 = r, ST0 = x
                 fstp    ST(0)               ; // dump x
                 align   4                   ;
         return_ST:                          ;
                 ;
             }
         }
         else
         {
             writefl("Sorry, you don't seem to have InlineAsm_X86");
             return 0;
         }
     }
     
     real poly_c(real x, real[] A)
     in
     {
         assert(A.length > 0);
     }
     body
     {
         int i = A.length - 1;
         real r = A[i];
         while (--i >= 0)
         {
             r *= x;
             r += A[i];
         }
         return r;
     }
     
     int main()
     {
         real x = 3.1;
         static real pp[] = [56.1, 32.7, 6];
     
         writefln("The result should be %f",(56.1L + (32.7L + 6L * x) * x));
         writefln("The C version outputs %f", poly_c(x, pp));
         writefln("The asm version outputs %f", poly_asm(x, pp));
         
         return 0;
     }
 
 
Feb 06 2006
parent Grzegorz Adam Hankiewicz <fake dont.use> writes:
The Mon, 06 Feb 2006 09:28:33 +0100, Don Clugston wrote:
 The code works correctly on DMD-Windows 0.145.  I think the problem
 is the extra alignment for reals on Linux (12 bytes) compared to
 Windows (10 bytes). Just a guess, but I think that this line:
  >                 sub     EDX,10              ; // deg--
 should be
  >                 sub     EDX,12              ; // deg--
 on Linux.
 This also needs to be changed:
  >                 lea     EDX,[ECX][ECX*8]    ;
  >                 add     EDX,ECX             ;
 to
  >                 lea     EDX,[ECX][ECX*4]    ;
  >                 add     EDX,ECX             ;
                    add     EDX,EDX
 
 If that doesn't work, try changing the line
  >                 fld     real ptr -10[EDX]   ;
 to
  >                 fld     real ptr -12[EDX]   ;
Unfortunately none of the proposed changes or their combinations do work. I either get wrong results or nans.
Feb 06 2006