digitalmars.D.bugs - [Issue 4438] New: A missed function inlining
- d-bugmail puremagic.com (310/310) Jul 08 2010 http://d.puremagic.com/issues/show_bug.cgi?id=4438
- d-bugmail puremagic.com (16/16) Jul 08 2010 http://d.puremagic.com/issues/show_bug.cgi?id=4438
- d-bugmail puremagic.com (11/11) Jul 08 2010 http://d.puremagic.com/issues/show_bug.cgi?id=4438
- d-bugmail puremagic.com (9/10) Jul 09 2010 http://d.puremagic.com/issues/show_bug.cgi?id=4438
- d-bugmail puremagic.com (101/101) Apr 14 2011 http://d.puremagic.com/issues/show_bug.cgi?id=4438
http://d.puremagic.com/issues/show_bug.cgi?id=4438 Summary: A missed function inlining Product: D Version: D1 & D2 Platform: x86 OS/Version: Windows Status: NEW Severity: enhancement Priority: P2 Component: DMD AssignedTo: nobody puremagic.com ReportedBy: bearophile_hugs eml.cc A test program that can be compiled with DMD2+Phobos2 and LDC1+Tango1: version (Tango) import tango.stdc.stdio: printf; else import std.c.stdio: printf; double masked_dot(double[] a1, double[] a2, ubyte[] mask) in { assert(a1.length == a2.length); assert(a1.length == mask.length); } body { double sum = 0.0; foreach (i, m; mask) if (m) sum += a1[i] * a2[i]; return sum; } void main() { int N = 1000; auto m1 = new double[][](N, N); foreach (ref row; m1) row[] = 2.0; auto m2 = new double[][](N, N); foreach (ref row; m2) row[] = 0.5; auto mask = new ubyte[N]; mask[] = 1; double sum = 0.0; for (int r; r < m1.length; r++) sum += masked_dot(m1[r], m2[r], mask); printf("%f\n", sum); } Compiled with: dmd v2.047 and ldc based on DMD v1.057 and llvm 2.6 ldc -O3 -release -inline -output-s test dmd -O -release -inline test.d I'd like dmd to inline this masked_dot() function too. ------------------------------- The cleaned up asm produced by dmd: _D4test10masked_dotFAdAdAhZd comdat sub ESP,030h push ESI xor ESI,ESI mov dword ptr 4[ESP],0 mov dword ptr 8[ESP],0 cmp 038h[ESP],ESI je L51 mov EDX,03Ch[ESP] mov EAX,038h[ESP] mov ECX,EDX L26: cmp [ECX][ESI],0 je L4A mov EDX,04Ch[ESP] mov EAX,048h[ESP] mov EAX,040h[ESP] fld qword ptr [ESI*8][EDX] mov EDX,044h[ESP] fmul qword ptr [ESI*8][EDX] fadd qword ptr 4[ESP] fstp qword ptr 4[ESP] L4A: inc ESI cmp ESI,038h[ESP] jb L26 L51: fld qword ptr 4[ESP] pop ESI add ESP,030h ret 018h __Dmain comdat L0: sub ESP,028h mov EAX,offset FLAT:_D12TypeInfo_AAd6__initZ push EBX push ESI push EDI push 03E8h push 03E8h push 2 push EAX call near ptr __d_newarraymiT xor EBX,EBX mov 020h[ESP],EAX mov 024h[ESP],EDX add ESP,010h cmp 010h[ESP],EBX je L58 mov ESI,EDX L32: lea EDI,[EBX*8][ESI] mov EDX,4[EDI] mov EAX,[EDI] push dword ptr [EDI] push dword ptr FLAT:_DATA[04h] push dword ptr FLAT:_DATA[00h] push EDX call near ptr __memset64 add ESP,010h inc EBX cmp EBX,010h[ESP] jb L32 L58: push 03E8h mov ECX,offset FLAT:_D12TypeInfo_AAd6__initZ push 03E8h push 2 push ECX call near ptr __d_newarraymiT xor EBX,EBX mov 028h[ESP],EAX mov 02Ch[ESP],EDX add ESP,010h cmp 018h[ESP],EBX je LAA mov ESI,EDX L84: lea EDI,[EBX*8][ESI] mov EDX,4[EDI] mov EAX,[EDI] push dword ptr [EDI] push dword ptr FLAT:_DATA[0Ch] push dword ptr FLAT:_DATA[08h] push EDX call near ptr __memset64 add ESP,010h inc EBX cmp EBX,018h[ESP] jb L84 LAA: push 03E8h mov EBX,offset FLAT:_D11TypeInfo_Ah6__initZ push EBX call near ptr __d_newarrayT mov 028h[ESP],EAX mov ECX,028h[ESP] mov EAX,01010101h mov 02Ch[ESP],EDX mov EDX,02Ch[ESP] mov EBX,028h[ESP] mov EDI,EDX rep stosb mov 030h[ESP],ECX mov 034h[ESP],ECX mov 038h[ESP],ECX add ESP,8 cmp 010h[ESP],ECX je L12E mov EDX,014h[ESP] mov EDI,EDX mov EAX,010h[ESP] mov EDX,01Ch[ESP] mov EBX,030h[ESP] mov EAX,018h[ESP] mov ESI,EDX L104: push dword ptr 4[EBX*8][EDI] push [EBX*8][EDI] push dword ptr 4[EBX*8][ESI] push [EBX*8][ESI] push dword ptr 034h[ESP] push dword ptr 034h[ESP] call near ptr _D4test10masked_dotFAdAdAhZd inc EBX fadd qword ptr 028h[ESP] cmp EBX,010h[ESP] fstp qword ptr 028h[ESP] jb L104 L12E: push dword ptr 02Ch[ESP] mov ECX,offset FLAT:_DATA[010h] push dword ptr 02Ch[ESP] push ECX call near ptr _printf add ESP,0Ch xor EAX,EAX pop EDI pop ESI pop EBX add ESP,028h ret ------------------------------- The cleaned up asm produced by ldc: _D4test10masked_dotFAdAdAhZd: pushl %edi pushl %esi subl $12, %esp movl 24(%esp), %eax testl %eax, %eax je .LBB1_6 movl 28(%esp), %ecx movl 36(%esp), %edx movl 44(%esp), %esi pxor %xmm0, %xmm0 xorl %edi, %edi .align 16 .LBB1_2: cmpb $0, (%ecx,%edi) je .LBB1_4 movsd (%esi,%edi,8), %xmm1 mulsd (%edx,%edi,8), %xmm1 addsd %xmm1, %xmm0 .LBB1_4: incl %edi cmpl %eax, %edi jne .LBB1_2 .LBB1_5: movsd %xmm0, (%esp) fldl (%esp) addl $12, %esp popl %esi popl %edi ret $24 .LBB1_6: pxor %xmm0, %xmm0 jmp .LBB1_5 _Dmain: pushl %ebp pushl %ebx pushl %edi pushl %esi subl $36, %esp movl $1000, 28(%esp) movl $1000, 32(%esp) leal 28(%esp), %eax movl %eax, 8(%esp) movl $2, 4(%esp) movl $_D12TypeInfo_AAd6__initZ, (%esp) xorl %esi, %esi call _d_newarraymiT movl %eax, %edi .align 16 .LBB2_1: movl 4(%edi,%esi,8), %eax movl (%edi,%esi,8), %ecx movl %ecx, 4(%esp) movl %eax, (%esp) movl $1073741824, 12(%esp) movl $0, 8(%esp) call _d_array_init_double incl %esi cmpl $1000, %esi jne .LBB2_1 movl $1000, 20(%esp) movl $1000, 24(%esp) leal 20(%esp), %eax movl %eax, 8(%esp) movl $2, 4(%esp) movl $_D12TypeInfo_AAd6__initZ, (%esp) xorl %esi, %esi call _d_newarraymiT movl %eax, %ebx .align 16 .LBB2_3: movl 4(%ebx,%esi,8), %eax movl (%ebx,%esi,8), %ecx movl %ecx, 4(%esp) movl %eax, (%esp) movl $1071644672, 12(%esp) movl $0, 8(%esp) call _d_array_init_double incl %esi cmpl $1000, %esi jne .LBB2_3 movl $1000, 4(%esp) movl $_D11TypeInfo_Ah6__initZ, (%esp) call _d_newarrayT movl %eax, %esi movl %esi, (%esp) movl $1000, 8(%esp) movl $1, 4(%esp) call memset pxor %xmm0, %xmm0 xorl %eax, %eax .LBB2_5: movl 4(%ebx,%eax,8), %ecx movl 4(%edi,%eax,8), %edx pxor %xmm1, %xmm1 xorl %ebp, %ebp .align 16 .LBB2_6: cmpb $0, (%esi,%ebp) je .LBB2_8 movsd (%edx,%ebp,8), %xmm2 mulsd (%ecx,%ebp,8), %xmm2 addsd %xmm2, %xmm1 .LBB2_8: incl %ebp cmpl $1000, %ebp jne .LBB2_6 addsd %xmm1, %xmm0 incl %eax cmpl $1000, %eax jne .LBB2_5 movsd %xmm0, 4(%esp) movl $.str, (%esp) call printf xorl %eax, %eax addl $36, %esp popl %esi popl %edi popl %ebx popl %ebp ret $8 -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: -------
Jul 08 2010
http://d.puremagic.com/issues/show_bug.cgi?id=4438 Leandro Lucarella <llucax gmail.com> changed: What |Removed |Added ---------------------------------------------------------------------------- Keywords| |performance CC| |llucax gmail.com Platform|x86 |All Blocks| |859 OS/Version|Windows |All PDT --- I'm marking this a a blocker of bug 859 so there is a single bug to track all the inlining issues. Please do the same if you open more bugs associated to inlining, or post them directly in bug 859. -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: -------
Jul 08 2010
http://d.puremagic.com/issues/show_bug.cgi?id=4438 Brad Roberts <braddr puremagic.com> changed: What |Removed |Added ---------------------------------------------------------------------------- CC| |braddr puremagic.com Blocks|859 | --- undoing false dependency -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: -------
Jul 08 2010
http://d.puremagic.com/issues/show_bug.cgi?id=4438 PDT ---undoing false dependencyCan you elaborate a little on why having bug 859 as a tracker of all missing inline oportunities is a bad thing? Thanks -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: -------
Jul 09 2010
http://d.puremagic.com/issues/show_bug.cgi?id=4438 A different case of missed inlining. In the following code isValidMove() is not inlined by DMD 2.052 (with -O -inline -release), despite this function contains no loop and no throws. Removing the "ref" for the board array in the isValidMove() signature changes nothing. The runtime is 12.6 seconds, the alternative version of the knightsTour() function with manually inlined isValidMove() (currently commented out below) runs in 8.0 seconds. My tests show that GCC -O3 is able to inline similar code written in C. // Code adapted from: // http://en.literateprograms.org/The_Knight's_Tour_(C)?oldid=14704 import core.stdc.stdio: printf; template TypeTuple(T...) { alias T TypeTuple; } bool isValidMove(int N)(ref int[N][N] board, int xpos, int ypos, int nmove) { if (xpos < 0 || xpos >= N || ypos < 0 || ypos >= N || (board[xpos][ypos] && board[xpos][ypos] < nmove)) return false; return true; } // run time: 12.6 seconds bool knightsTour(int N)(ref int[N][N] board, int xpos, int ypos, int nmove) { if (!isValidMove(board, xpos, ypos, nmove)) return false; board[xpos][ypos] = nmove; if (nmove == N * N) return true; alias TypeTuple!(+1, -2, -2, -1, -1, +2, +2, +1) spx; alias TypeTuple!(+2, +1, -1, +2, -2, +1, -1, -2) spy; static assert(spx.length == spy.length); bool ok = true; foreach (i, sx; spx) ok = ok && !knightsTour(board, xpos + sx, ypos + spy[i], nmove + 1); if (ok) { board[xpos][ypos] = N * N; return false; } return true; } /* // run time: 8.0 seconds bool knightsTour(int N)(ref int[N][N] board, int xpos, int ypos, int nmove) { // if is not valid move if (xpos < 0 || xpos >= N || ypos < 0 || ypos >= N || (board[xpos][ypos] && board[xpos][ypos] < nmove)) return false; board[xpos][ypos] = nmove; if (nmove == N * N) return true; alias TypeTuple!(+1, -2, -2, -1, -1, +2, +2, +1) spx; alias TypeTuple!(+2, +1, -1, +2, -2, +1, -1, -2) spy; static assert(spx.length == spy.length); bool ok = true; foreach (i, sx; spx) ok = ok && !knightsTour(board, xpos + sx, ypos + spy[i], nmove + 1); if (ok) { board[xpos][ypos] = N * N; return false; } return true; } */ void main() { enum int side = 8; enum int xpos = 0; enum int ypos = xpos; int[side][side] board; printf("Executing Knight's Tour...\n"); if (knightsTour(board, xpos, ypos, 1)) { printf("Solution found:\n"); foreach (ref row; board) { foreach (item; row) printf("%3d", item); printf("\n"); } } else printf("No solution found.\n"); } The first part of the assembly of knightsTour() shows the call to isValidMove(): _D4test20__T11knightsTourVk8Z11knightsTourFKG8G8iiiiZb comdat sub ESP,02Ch push EBX push EBP push ESI mov ESI,044h[ESP] push EDI mov 038h[ESP],EAX push ESI push dword ptr 048h[ESP] push dword ptr 048h[ESP] call near ptr _D4test20__T11isValidMoveVk8Z11isValidMoveFKG8G8iiiiZb xor AL,1 je L2D pop EDI ... -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: -------
Apr 14 2011