digitalmars.D.bugs - [Issue 4438] New: A missed function inlining
- d-bugmail puremagic.com (310/310) Jul 08 2010 http://d.puremagic.com/issues/show_bug.cgi?id=4438
- d-bugmail puremagic.com (16/16) Jul 08 2010 http://d.puremagic.com/issues/show_bug.cgi?id=4438
- d-bugmail puremagic.com (11/11) Jul 08 2010 http://d.puremagic.com/issues/show_bug.cgi?id=4438
- d-bugmail puremagic.com (9/10) Jul 09 2010 http://d.puremagic.com/issues/show_bug.cgi?id=4438
- d-bugmail puremagic.com (101/101) Apr 14 2011 http://d.puremagic.com/issues/show_bug.cgi?id=4438
http://d.puremagic.com/issues/show_bug.cgi?id=4438
Summary: A missed function inlining
Product: D
Version: D1 & D2
Platform: x86
OS/Version: Windows
Status: NEW
Severity: enhancement
Priority: P2
Component: DMD
AssignedTo: nobody puremagic.com
ReportedBy: bearophile_hugs eml.cc
A test program that can be compiled with DMD2+Phobos2 and LDC1+Tango1:
version (Tango)
import tango.stdc.stdio: printf;
else
import std.c.stdio: printf;
double masked_dot(double[] a1, double[] a2, ubyte[] mask)
in {
assert(a1.length == a2.length);
assert(a1.length == mask.length);
} body {
double sum = 0.0;
foreach (i, m; mask)
if (m)
sum += a1[i] * a2[i];
return sum;
}
void main() {
int N = 1000;
auto m1 = new double[][](N, N);
foreach (ref row; m1)
row[] = 2.0;
auto m2 = new double[][](N, N);
foreach (ref row; m2)
row[] = 0.5;
auto mask = new ubyte[N];
mask[] = 1;
double sum = 0.0;
for (int r; r < m1.length; r++)
sum += masked_dot(m1[r], m2[r], mask);
printf("%f\n", sum);
}
Compiled with:
dmd v2.047 and ldc based on DMD v1.057 and llvm 2.6
ldc -O3 -release -inline -output-s test
dmd -O -release -inline test.d
I'd like dmd to inline this masked_dot() function too.
-------------------------------
The cleaned up asm produced by dmd:
_D4test10masked_dotFAdAdAhZd comdat
sub ESP,030h
push ESI
xor ESI,ESI
mov dword ptr 4[ESP],0
mov dword ptr 8[ESP],0
cmp 038h[ESP],ESI
je L51
mov EDX,03Ch[ESP]
mov EAX,038h[ESP]
mov ECX,EDX
L26: cmp [ECX][ESI],0
je L4A
mov EDX,04Ch[ESP]
mov EAX,048h[ESP]
mov EAX,040h[ESP]
fld qword ptr [ESI*8][EDX]
mov EDX,044h[ESP]
fmul qword ptr [ESI*8][EDX]
fadd qword ptr 4[ESP]
fstp qword ptr 4[ESP]
L4A: inc ESI
cmp ESI,038h[ESP]
jb L26
L51: fld qword ptr 4[ESP]
pop ESI
add ESP,030h
ret 018h
__Dmain comdat
L0: sub ESP,028h
mov EAX,offset FLAT:_D12TypeInfo_AAd6__initZ
push EBX
push ESI
push EDI
push 03E8h
push 03E8h
push 2
push EAX
call near ptr __d_newarraymiT
xor EBX,EBX
mov 020h[ESP],EAX
mov 024h[ESP],EDX
add ESP,010h
cmp 010h[ESP],EBX
je L58
mov ESI,EDX
L32: lea EDI,[EBX*8][ESI]
mov EDX,4[EDI]
mov EAX,[EDI]
push dword ptr [EDI]
push dword ptr FLAT:_DATA[04h]
push dword ptr FLAT:_DATA[00h]
push EDX
call near ptr __memset64
add ESP,010h
inc EBX
cmp EBX,010h[ESP]
jb L32
L58: push 03E8h
mov ECX,offset FLAT:_D12TypeInfo_AAd6__initZ
push 03E8h
push 2
push ECX
call near ptr __d_newarraymiT
xor EBX,EBX
mov 028h[ESP],EAX
mov 02Ch[ESP],EDX
add ESP,010h
cmp 018h[ESP],EBX
je LAA
mov ESI,EDX
L84: lea EDI,[EBX*8][ESI]
mov EDX,4[EDI]
mov EAX,[EDI]
push dword ptr [EDI]
push dword ptr FLAT:_DATA[0Ch]
push dword ptr FLAT:_DATA[08h]
push EDX
call near ptr __memset64
add ESP,010h
inc EBX
cmp EBX,018h[ESP]
jb L84
LAA: push 03E8h
mov EBX,offset FLAT:_D11TypeInfo_Ah6__initZ
push EBX
call near ptr __d_newarrayT
mov 028h[ESP],EAX
mov ECX,028h[ESP]
mov EAX,01010101h
mov 02Ch[ESP],EDX
mov EDX,02Ch[ESP]
mov EBX,028h[ESP]
mov EDI,EDX
rep
stosb
mov 030h[ESP],ECX
mov 034h[ESP],ECX
mov 038h[ESP],ECX
add ESP,8
cmp 010h[ESP],ECX
je L12E
mov EDX,014h[ESP]
mov EDI,EDX
mov EAX,010h[ESP]
mov EDX,01Ch[ESP]
mov EBX,030h[ESP]
mov EAX,018h[ESP]
mov ESI,EDX
L104: push dword ptr 4[EBX*8][EDI]
push [EBX*8][EDI]
push dword ptr 4[EBX*8][ESI]
push [EBX*8][ESI]
push dword ptr 034h[ESP]
push dword ptr 034h[ESP]
call near ptr _D4test10masked_dotFAdAdAhZd
inc EBX
fadd qword ptr 028h[ESP]
cmp EBX,010h[ESP]
fstp qword ptr 028h[ESP]
jb L104
L12E: push dword ptr 02Ch[ESP]
mov ECX,offset FLAT:_DATA[010h]
push dword ptr 02Ch[ESP]
push ECX
call near ptr _printf
add ESP,0Ch
xor EAX,EAX
pop EDI
pop ESI
pop EBX
add ESP,028h
ret
-------------------------------
The cleaned up asm produced by ldc:
_D4test10masked_dotFAdAdAhZd:
pushl %edi
pushl %esi
subl $12, %esp
movl 24(%esp), %eax
testl %eax, %eax
je .LBB1_6
movl 28(%esp), %ecx
movl 36(%esp), %edx
movl 44(%esp), %esi
pxor %xmm0, %xmm0
xorl %edi, %edi
.align 16
.LBB1_2:
cmpb $0, (%ecx,%edi)
je .LBB1_4
movsd (%esi,%edi,8), %xmm1
mulsd (%edx,%edi,8), %xmm1
addsd %xmm1, %xmm0
.LBB1_4:
incl %edi
cmpl %eax, %edi
jne .LBB1_2
.LBB1_5:
movsd %xmm0, (%esp)
fldl (%esp)
addl $12, %esp
popl %esi
popl %edi
ret $24
.LBB1_6:
pxor %xmm0, %xmm0
jmp .LBB1_5
_Dmain:
pushl %ebp
pushl %ebx
pushl %edi
pushl %esi
subl $36, %esp
movl $1000, 28(%esp)
movl $1000, 32(%esp)
leal 28(%esp), %eax
movl %eax, 8(%esp)
movl $2, 4(%esp)
movl $_D12TypeInfo_AAd6__initZ, (%esp)
xorl %esi, %esi
call _d_newarraymiT
movl %eax, %edi
.align 16
.LBB2_1:
movl 4(%edi,%esi,8), %eax
movl (%edi,%esi,8), %ecx
movl %ecx, 4(%esp)
movl %eax, (%esp)
movl $1073741824, 12(%esp)
movl $0, 8(%esp)
call _d_array_init_double
incl %esi
cmpl $1000, %esi
jne .LBB2_1
movl $1000, 20(%esp)
movl $1000, 24(%esp)
leal 20(%esp), %eax
movl %eax, 8(%esp)
movl $2, 4(%esp)
movl $_D12TypeInfo_AAd6__initZ, (%esp)
xorl %esi, %esi
call _d_newarraymiT
movl %eax, %ebx
.align 16
.LBB2_3:
movl 4(%ebx,%esi,8), %eax
movl (%ebx,%esi,8), %ecx
movl %ecx, 4(%esp)
movl %eax, (%esp)
movl $1071644672, 12(%esp)
movl $0, 8(%esp)
call _d_array_init_double
incl %esi
cmpl $1000, %esi
jne .LBB2_3
movl $1000, 4(%esp)
movl $_D11TypeInfo_Ah6__initZ, (%esp)
call _d_newarrayT
movl %eax, %esi
movl %esi, (%esp)
movl $1000, 8(%esp)
movl $1, 4(%esp)
call memset
pxor %xmm0, %xmm0
xorl %eax, %eax
.LBB2_5:
movl 4(%ebx,%eax,8), %ecx
movl 4(%edi,%eax,8), %edx
pxor %xmm1, %xmm1
xorl %ebp, %ebp
.align 16
.LBB2_6:
cmpb $0, (%esi,%ebp)
je .LBB2_8
movsd (%edx,%ebp,8), %xmm2
mulsd (%ecx,%ebp,8), %xmm2
addsd %xmm2, %xmm1
.LBB2_8:
incl %ebp
cmpl $1000, %ebp
jne .LBB2_6
addsd %xmm1, %xmm0
incl %eax
cmpl $1000, %eax
jne .LBB2_5
movsd %xmm0, 4(%esp)
movl $.str, (%esp)
call printf
xorl %eax, %eax
addl $36, %esp
popl %esi
popl %edi
popl %ebx
popl %ebp
ret $8
--
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
Jul 08 2010
http://d.puremagic.com/issues/show_bug.cgi?id=4438
Leandro Lucarella <llucax gmail.com> changed:
What |Removed |Added
----------------------------------------------------------------------------
Keywords| |performance
CC| |llucax gmail.com
Platform|x86 |All
Blocks| |859
OS/Version|Windows |All
PDT ---
I'm marking this a a blocker of bug 859 so there is a single bug to track all
the inlining issues. Please do the same if you open more bugs associated to
inlining, or post them directly in bug 859.
--
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
Jul 08 2010
http://d.puremagic.com/issues/show_bug.cgi?id=4438
Brad Roberts <braddr puremagic.com> changed:
What |Removed |Added
----------------------------------------------------------------------------
CC| |braddr puremagic.com
Blocks|859 |
---
undoing false dependency
--
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
Jul 08 2010
http://d.puremagic.com/issues/show_bug.cgi?id=4438 PDT ---undoing false dependencyCan you elaborate a little on why having bug 859 as a tracker of all missing inline oportunities is a bad thing? Thanks -- Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email ------- You are receiving this mail because: -------
Jul 09 2010
http://d.puremagic.com/issues/show_bug.cgi?id=4438
A different case of missed inlining. In the following code isValidMove() is not
inlined by DMD 2.052 (with -O -inline -release), despite this function contains
no loop and no throws. Removing the "ref" for the board array in the
isValidMove() signature changes nothing.
The runtime is 12.6 seconds, the alternative version of the knightsTour()
function with manually inlined isValidMove() (currently commented out below)
runs in 8.0 seconds. My tests show that GCC -O3 is able to inline similar code
written in C.
// Code adapted from:
// http://en.literateprograms.org/The_Knight's_Tour_(C)?oldid=14704
import core.stdc.stdio: printf;
template TypeTuple(T...) {
alias T TypeTuple;
}
bool isValidMove(int N)(ref int[N][N] board, int xpos, int ypos, int nmove) {
if (xpos < 0 || xpos >= N || ypos < 0 || ypos >= N ||
(board[xpos][ypos] && board[xpos][ypos] < nmove))
return false;
return true;
}
// run time: 12.6 seconds
bool knightsTour(int N)(ref int[N][N] board, int xpos, int ypos, int nmove) {
if (!isValidMove(board, xpos, ypos, nmove))
return false;
board[xpos][ypos] = nmove;
if (nmove == N * N)
return true;
alias TypeTuple!(+1, -2, -2, -1, -1, +2, +2, +1) spx;
alias TypeTuple!(+2, +1, -1, +2, -2, +1, -1, -2) spy;
static assert(spx.length == spy.length);
bool ok = true;
foreach (i, sx; spx)
ok = ok && !knightsTour(board, xpos + sx, ypos + spy[i], nmove + 1);
if (ok) {
board[xpos][ypos] = N * N;
return false;
}
return true;
}
/*
// run time: 8.0 seconds
bool knightsTour(int N)(ref int[N][N] board, int xpos, int ypos, int nmove) {
// if is not valid move
if (xpos < 0 || xpos >= N || ypos < 0 || ypos >= N ||
(board[xpos][ypos] && board[xpos][ypos] < nmove))
return false;
board[xpos][ypos] = nmove;
if (nmove == N * N)
return true;
alias TypeTuple!(+1, -2, -2, -1, -1, +2, +2, +1) spx;
alias TypeTuple!(+2, +1, -1, +2, -2, +1, -1, -2) spy;
static assert(spx.length == spy.length);
bool ok = true;
foreach (i, sx; spx)
ok = ok && !knightsTour(board, xpos + sx, ypos + spy[i], nmove + 1);
if (ok) {
board[xpos][ypos] = N * N;
return false;
}
return true;
}
*/
void main() {
enum int side = 8;
enum int xpos = 0;
enum int ypos = xpos;
int[side][side] board;
printf("Executing Knight's Tour...\n");
if (knightsTour(board, xpos, ypos, 1)) {
printf("Solution found:\n");
foreach (ref row; board) {
foreach (item; row)
printf("%3d", item);
printf("\n");
}
} else
printf("No solution found.\n");
}
The first part of the assembly of knightsTour() shows the call to
isValidMove():
_D4test20__T11knightsTourVk8Z11knightsTourFKG8G8iiiiZb comdat
sub ESP,02Ch
push EBX
push EBP
push ESI
mov ESI,044h[ESP]
push EDI
mov 038h[ESP],EAX
push ESI
push dword ptr 048h[ESP]
push dword ptr 048h[ESP]
call near ptr _D4test20__T11isValidMoveVk8Z11isValidMoveFKG8G8iiiiZb
xor AL,1
je L2D
pop EDI
...
--
Configure issuemail: http://d.puremagic.com/issues/userprefs.cgi?tab=email
------- You are receiving this mail because: -------
Apr 14 2011









d-bugmail puremagic.com 