digitalmars.D - std.range.zip performance
- bearophile (511/511) Feb 15 2011 While doing some benchmarks on the Clay language I've found something in...
- bearophile (97/97) Feb 15 2011 I have done one more test, with a much simpler zip(). The code is now fa...
- Andrei Alexandrescu (14/525) Feb 15 2011 Initial: 58 seconds.
- spir (4/16) Feb 16 2011 Bearophile, is clay's zip func lazy. Else, it could explain part of its
- dennis luehring (2/25) Feb 16 2011 why should it - what can lazyness help here?
- bearophile (22/24) Feb 16 2011 HOFs as zip/map/filter aren't D built-ins as in Python, but they are bas...
While doing some benchmarks on the Clay language I've found something
interesting.
I have timed a zip() done on two short arrays, and I have found this D version
too much slow compared to normal array code. Higher order functions like zip,
map, filter, etc are going to stay, they aren't toys, so I think the D compiler
has to digest them better.
(There is lot of of asm at the tail of this post, I don't know if some of t
will get cut away.)
Bye,
bearophile
Timings, seconds, best of 3:
----------------------
Binary sizes, bytes:
----------------------
Compilers and command line used:
DMD 2.051
clay compiler (hg id dec41ba07d58 tip, llvm r122866, Jan 5 2011)
clay -inline test1.clay -o test1.exe
dmd -O -release -inline test2.d
dmd -O -release -inline test3.d
To produce asm code with Clay:
clay -inline -asm test1.clay -o test1.s
Output: -1149672960 for all versions.
----------------------
Code used:
main() {
var a = Vector([18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28]);
var b = Vector([9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25]);
var tot = 0; // 32 bit signed int
for (i in range(0, 50*1000*1000))
for (x, y in zipped(a, b))
tot += x + y;
println(tot);
}
----------------------
import std.c.stdio: printf;
import std.range: zip;
void main() {
auto a = [18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28];
auto b = [9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25];
int tot = 0;
foreach (i; 0 .. 50_000_000)
foreach (xy; zip(a, b))
tot += xy[0] + xy[1];
printf("%d\n", tot);
}
----------------------
import std.c.stdio: printf;
void main() {
auto a = [18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28];
auto b = [9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25];
int tot = 0;
foreach (i; 0 .. 50_000_000)
foreach (j; 0 .. a.length)
tot += a[j] + b[j];
printf("%d\n", tot);
}
----------------------
import std.c.stdio: printf;
void main() {
auto a = [18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28];
auto b = [9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25];
int tot = 0;
foreach (i; 0 .. 50_000_000)
foreach (j; 0 .. 30)
tot += a[j] + b[j];
printf("%d\n", tot);
}
----------------------
.align 16, 0x90
xorl %edx, %edx
.align 16, 0x90
addl (%esi,%edx,4), %ecx
addl (%edi,%edx,4), %ecx
incl %edx
cmpl $30, %edx
jne LBB2_5
decl %eax
jne LBB2_4
----------------------
LCE: push dword ptr 018h[ESP]
mov ESI,offset FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip6__initZ
push dword ptr 018h[ESP]
push dword ptr 028h[ESP]
push dword ptr 028h[ESP]
push 0
lea EDI,078h[ESP]
movsd
movsd
movsd
movsd
movsd
lea EAX,078h[ESP]
call near ptr
_D3std5range14__T3ZipTAiTAiZ3Zip6__ctorMFNcAiAiE3std5range14StoppingPolicyZS3std5range14__T3ZipTAiTAiZ3Zip
mov ESI,EAX
lea EDI,044h[ESP]
movsd
movsd
movsd
movsd
movsd
lea ESI,044h[ESP]
lea EDI,024h[ESP]
movsd
movsd
movsd
movsd
movsd
lea EAX,024h[ESP]
call near ptr _D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb
xor AL,1
je L153
L11C: lea EAX,024h[ESP]
call near ptr
_D3std5range14__T3ZipTAiTAiZ3Zip5frontMFNdZS3std8typecons14__T5TupleTiTiZ5Tuple
mov 07Ch[ESP],EAX
mov ECX,07Ch[ESP]
lea EAX,024h[ESP]
mov 080h[ESP],EDX
add ECX,080h[ESP]
add EBX,ECX
call near ptr _D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv
lea EAX,024h[ESP]
call near ptr _D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb
xor AL,1
jne L11C
L153: inc EBP
cmp EBP,02FAF080h
jb LCE
_D3std5range14__T3ZipTAiTAiZ3Zip6__ctorMFNcAiAiE3std5range14StoppingPolicyZS3std5range
4__T3ZipTAiTAiZ3Zip comdat
push EBX
mov ECX,8[ESP]
mov EBX,014h[ESP]
push ESI
mov ESI,EAX
mov EDX,01Ch[ESP]
mov 4[ESI],EDX
mov EDX,014h[ESP]
mov [ESI],EBX
mov EBX,010h[ESP]
mov 010h[ESI],ECX
mov 8[ESI],EBX
mov 0Ch[ESI],EDX
pop ESI
pop EBX
ret 014h
_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb comdat
L0: sub ESP,05Ch
mov ECX,010h[EAX]
test ECX,ECX
push EBX
push ESI
mov ESI,EAX
je L21
cmp ECX,1
je LC3
cmp ECX,2
je L55
jmp near ptr LC3
L21: mov EBX,[ESI]
mov EDX,4[ESI]
mov 0Ch[ESP],EBX
mov 010h[ESP],EDX
cmp dword ptr 0Ch[ESP],0
je L4A
mov EBX,8[ESI]
mov EDX,0Ch[ESI]
mov 014h[ESP],EBX
mov 018h[ESP],EDX
cmp dword ptr 014h[ESP],0
jne LC3
L4A: pop ESI
mov EAX,1
pop EBX
add ESP,05Ch
ret
L55: mov EBX,[ESI]
mov EDX,4[ESI]
mov ECX,offset
FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb14__dgliteral828MFZAxa
mov 01Ch[ESP],EBX
mov 020h[ESP],EDX
mov 02Ch[ESP],EAX
mov 030h[ESP],ECX
cmp dword ptr 01Ch[ESP],0
setz DL
mov EBX,8[ESI]
mov 024h[ESP],EBX
mov ECX,0Ch[ESI]
mov 028h[ESP],ECX
cmp dword ptr 024h[ESP],0
setz CL
cmp DL,CL
mov EDX,1
je L98
xor EDX,EDX
L98: xor DL,1
je LC3
push dword ptr FLAT:_DATA[054h]
push dword ptr FLAT:_DATA[050h]
push 0ADEh
mov EAX,038h[ESP]
mov EDX,03Ch[ESP]
mov EBX,038h[ESP]
call EDX
push EDX
push EAX
call near ptr _D3std9exception7bailOutFAyaixAaZv
LC3: pop ESI
xor EAX,EAX
pop EBX
add ESP,05Ch
ret
_D3std5range14__T3ZipTAiTAiZ3Zip5frontMFNdZS3std8typecons14__T5TupleTiTiZ5Tuple
comdat
assume CS:_D3std5range14__T3ZipTAiTAiZ3Zip5frontMFNdZS3std8typecons14__T5TupleTiTiZ5Tuple
L0: sub ESP,028h
mov EDX,4[EAX]
push EBX
push ESI
mov ESI,EAX
mov EBX,[ESI]
push EDI
mov 014h[ESP],EBX
mov 018h[ESP],EDX
cmp dword ptr 014h[ESP],0
je L31
lea EDX,0Ch[ESP]
mov EBX,[ESI]
push EDX
mov EDX,4[ESI]
mov EAX,[EDX]
push 4
call near ptr _D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi
jmp short L41
L31: lea ECX,0Ch[ESP]
mov EDI,4
push ECX
push EDI
call near ptr _D3std4conv14__T7emplaceTiZ7emplaceFAvZPi
L41: mov EBX,8[ESI]
mov ECX,0Ch[ESI]
test EBX,EBX
je L61
lea EDI,010h[ESP]
mov EDX,0Ch[ESI]
mov EAX,8[ESI]
push EDI
mov EAX,[EDX]
push 4
call near ptr _D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi
jmp short L71
L61: lea ECX,010h[ESP]
mov ESI,4
push ECX
push ESI
call near ptr _D3std4conv14__T7emplaceTiZ7emplaceFAvZPi
L71: mov EDX,010h[ESP]
mov EAX,0Ch[ESP]
pop EDI
pop ESI
pop EBX
add ESP,028h
ret
_D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv comdat
L0: sub ESP,04Ch
mov ECX,010h[EAX]
test ECX,ECX
push EBX
push EBP
push ESI
push EDI
mov EDI,EAX
je L1F
cmp ECX,1
je L4A
cmp ECX,2
je L8C
jmp near ptr L142
L1F: mov ESI,[EDI]
mov EDX,4[EDI]
dec ESI
mov EBX,[EDI]
add EDX,4
lea EBP,8[EDI]
mov [EDI],ESI
mov 4[EDI],EDX
mov ESI,0[EBP]
mov EDX,4[EBP]
dec ESI
mov EBX,0[EBP]
add EDX,4
mov 0[EBP],ESI
mov 4[EBP],EDX
jmp near ptr L142
L4A: mov ESI,[EDI]
mov ECX,4[EDI]
test ESI,ESI
je L63
mov ESI,[EDI]
mov EDX,4[EDI]
dec ESI
mov EBX,[EDI]
add EDX,4
mov [EDI],ESI
mov 4[EDI],EDX
L63: mov ESI,8[EDI]
mov ECX,0Ch[EDI]
test ESI,ESI
je L142
lea EBP,8[EDI]
mov ESI,0[EBP]
mov EDX,4[EBP]
dec ESI
mov EBX,0[EBP]
add EDX,4
mov 0[EBP],ESI
mov 4[EBP],EDX
jmp near ptr L142
L8C: mov EBX,[EDI]
mov EDX,4[EDI]
mov ECX,offset
FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv14__dgliteral831MFZAxa
mov 014h[ESP],EBX
mov 018h[ESP],EDX
mov 01Ch[ESP],EAX
mov 020h[ESP],ECX
cmp dword ptr 014h[ESP],0
setnz DL
xor DL,1
je LD7
mov EDX,ECX
push dword ptr FLAT:_DATA[054h]
push dword ptr FLAT:_DATA[050h]
push 0B86h
mov EAX,028h[ESP]
mov EBX,028h[ESP]
call EDX
push EDX
push EAX
call near ptr _D3std9exception7bailOutFAyaixAaZv
LD7: mov EAX,[EDI]
mov EDX,4[EDI]
dec EAX
mov EBX,[EDI]
add EDX,4
mov 4[EDI],EDX
mov EDX,0Ch[EDI]
mov EBX,EDI
mov [EDI],EAX
mov EAX,8[EDI]
mov ECX,offset
FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv14__dgliteral832MFZAxa
mov 024h[ESP],EAX
mov 028h[ESP],EDX
mov 02Ch[ESP],EBX
mov 030h[ESP],ECX
cmp dword ptr 024h[ESP],0
setnz DL
xor DL,1
je L12F
push dword ptr FLAT:_DATA[054h]
push dword ptr FLAT:_DATA[050h]
push 0B86h
mov EAX,038h[ESP]
call ECX
push EDX
push EAX
call near ptr _D3std9exception7bailOutFAyaixAaZv
L12F: lea ESI,8[EDI]
mov EBX,[ESI]
mov EDX,4[ESI]
dec EBX
mov EAX,[ESI]
mov [ESI],EBX
add EDX,4
mov 4[ESI],EDX
L142: pop EDI
pop ESI
pop EBP
pop EBX
add ESP,04Ch
ret
_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb comdat
L0: sub ESP,05Ch
mov ECX,010h[EAX]
test ECX,ECX
push EBX
push ESI
mov ESI,EAX
je L21
cmp ECX,1
je LC3
cmp ECX,2
je L55
jmp near ptr LC3
L21: mov EBX,[ESI]
mov EDX,4[ESI]
mov 0Ch[ESP],EBX
mov 010h[ESP],EDX
cmp dword ptr 0Ch[ESP],0
je L4A
mov EBX,8[ESI]
mov EDX,0Ch[ESI]
mov 014h[ESP],EBX
mov 018h[ESP],EDX
cmp dword ptr 014h[ESP],0
jne LC3
L4A: pop ESI
mov EAX,1
pop EBX
add ESP,05Ch
ret
L55: mov EBX,[ESI]
mov EDX,4[ESI]
mov ECX,offset
FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb14__dgliteral828MFZAxa
mov 01Ch[ESP],EBX
mov 020h[ESP],EDX
mov 02Ch[ESP],EAX
mov 030h[ESP],ECX
cmp dword ptr 01Ch[ESP],0
setz DL
mov EBX,8[ESI]
mov 024h[ESP],EBX
mov ECX,0Ch[ESI]
mov 028h[ESP],ECX
cmp dword ptr 024h[ESP],0
setz CL
cmp DL,CL
mov EDX,1
je L98
xor EDX,EDX
L98: xor DL,1
je LC3
push dword ptr FLAT:_DATA[054h]
push dword ptr FLAT:_DATA[050h]
push 0ADEh
mov EAX,038h[ESP]
mov EDX,03Ch[ESP]
mov EBX,038h[ESP]
call EDX
push EDX
push EAX
call near ptr _D3std9exception7bailOutFAyaixAaZv
LC3: pop ESI
xor EAX,EAX
pop EBX
add ESP,05Ch
ret
_D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi comdat
L0: push EAX
mov EDX,offset
FLAT:_D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi14__dgliteral829MFZC6object9Throwable
push EBX
push ESI
cmp dword ptr 010h[ESP],4
sbb ECX,ECX
inc ECX
push ECX
lea EBX,0Ch[ESP]
push EDX
push EBX
call near ptr
_D3std9exception14__T7enforceTbZ7enforceFbLC6object9ThrowableZb
mov ECX,014h[ESP]
mov EAX,014h[ESP]
mov ESI,8[ESP]
mov [ECX],ESI
pop ESI
pop EBX
pop ECX
ret 8
(I have not included all the code, like _D3std9exception7bailOutFAyaixAaZv).
----------------------
LDE: xor EBX,EBX
cmp 010h[ESP],BL
je LF5
LE6: mov ECX,[EBX*4][EAX]
add ECX,[EBX*4][EDI]
add ESI,ECX
inc EBX
cmp EBX,014h[ESP]
jb LE6
LF5: inc EDX
cmp EDX,02FAF080h
jb LDE
----------------------
LBF: xor EBX,EBX
LC1: mov ECX,[EBX*4][EAX]
add ECX,[EBX*4][EDI]
add ESI,ECX
inc EBX
cmp EBX,01Eh
jb LC1
inc EDX
cmp EDX,02FAF080h
jb LBF
----------------------
Feb 15 2011
I have done one more test, with a much simpler zip(). The code is now faster,
but it seems there's no hope to have an acceptable performance. Maybe D needs a
built-in zip as Clay.
Bye,
bearophile
-----------------------
Timings, seconds, best of 3:
-----------------------
import std.c.stdio: printf;
struct zip {
int[] arr1, arr2;
size_t i;
static struct Element { int _0, _1; }
this(int[] a1, int[] a2)
in {
assert(a1.length == a2.length);
} body {
arr1 = a1;
arr2 = a2;
}
bool empty() {
return i >= arr1.length;
}
property Element front() {
return Element(arr1[i], arr2[i]);
}
void popFront() {
i++;
}
}
void main() {
auto a = [18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28];
auto b = [9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25];
int tot = 0;
foreach (i; 0 .. 50_000_000)
foreach (xy; zip(a, b))
tot += xy._0 + xy._1;
printf("%d\n", tot);
}
----------------------
LCE: mov 024h[ESP],EBX
lea EBX,054h[ESP]
xor EDX,EDX
mov [EBX],EDX
mov EAX,014h[ESP]
lea ESI,054h[ESP]
mov 4[EBX],EDX
lea EDI,034h[ESP]
mov 8[EBX],EDX
mov 0Ch[EBX],EDX
mov 010h[EBX],EDX
mov EDX,018h[ESP]
mov 058h[ESP],EDX
mov EDX,020h[ESP]
mov 054h[ESP],EAX
mov EAX,01Ch[ESP]
mov 05Ch[ESP],EAX
mov 060h[ESP],EDX
movsd
movsd
movsd
movsd
movsd
mov ECX,044h[ESP]
cmp ECX,034h[ESP]
jae L167
L11D: mov EBX,044h[ESP]
mov EDX,038h[ESP]
mov ESI,[EBX*4][EDX]
mov EAX,034h[ESP]
mov EDX,040h[ESP]
mov ECX,[EBX*4][EDX]
mov 078h[ESP],ECX
mov EAX,03Ch[ESP]
mov EDX,078h[ESP]
mov 074h[ESP],ESI
mov EAX,074h[ESP]
mov EBX,074h[ESP]
mov 070h[ESP],EDX
add EBX,070h[ESP]
add EBP,EBX
inc dword ptr 044h[ESP]
mov ESI,044h[ESP]
mov 06Ch[ESP],EAX
cmp ESI,034h[ESP]
jb L11D
L167: mov EBX,024h[ESP]
inc EBX
cmp EBX,02FAF080h
jb LCE
----------------------
Feb 15 2011
Initial: 58 seconds.
Eliminated the switch in popFront: 53s.
Replaced emplace with assignment: 23s.
Specialized emplace for non-struct types, reinserted: 23s.
Eliminated the loop in empty (replaced with return ranges[0].empty;): 17s.
I'm sure there are ways to further improve this, but there are a few
difficulties. Each pass through the loop the code must transport values
from the two arrays into a specific format and then distribute them for
further calculation. Then, upon each popFront two words must be touched
because arrays hold pointer+length, not pointer+pointer (as probably
would be better since ranges are around).
Nice analysis!
Andrei
On 2/15/11 6:24 PM, bearophile wrote:
While doing some benchmarks on the Clay language I've found something
interesting.
I have timed a zip() done on two short arrays, and I have found this D version
too much slow compared to normal array code. Higher order functions like zip,
map, filter, etc are going to stay, they aren't toys, so I think the D compiler
has to digest them better.
(There is lot of of asm at the tail of this post, I don't know if some of t
will get cut away.)
Bye,
bearophile
Timings, seconds, best of 3:
----------------------
Binary sizes, bytes:
----------------------
Compilers and command line used:
DMD 2.051
clay compiler (hg id dec41ba07d58 tip, llvm r122866, Jan 5 2011)
clay -inline test1.clay -o test1.exe
dmd -O -release -inline test2.d
dmd -O -release -inline test3.d
To produce asm code with Clay:
clay -inline -asm test1.clay -o test1.s
Output: -1149672960 for all versions.
----------------------
Code used:
main() {
var a = Vector([18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28]);
var b = Vector([9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25]);
var tot = 0; // 32 bit signed int
for (i in range(0, 50*1000*1000))
for (x, y in zipped(a, b))
tot += x + y;
println(tot);
}
----------------------
import std.c.stdio: printf;
import std.range: zip;
void main() {
auto a = [18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28];
auto b = [9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25];
int tot = 0;
foreach (i; 0 .. 50_000_000)
foreach (xy; zip(a, b))
tot += xy[0] + xy[1];
printf("%d\n", tot);
}
----------------------
import std.c.stdio: printf;
void main() {
auto a = [18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28];
auto b = [9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25];
int tot = 0;
foreach (i; 0 .. 50_000_000)
foreach (j; 0 .. a.length)
tot += a[j] + b[j];
printf("%d\n", tot);
}
----------------------
import std.c.stdio: printf;
void main() {
auto a = [18,10,17,14,19,23,11,0,6,0,17,25,5,4,19,21,17,13,5,7,11,22,23,17,24,7,11,11,1,28];
auto b = [9,12,1,4,1,18,11,6,5,18,24,15,26,14,24,8,17,26,23,17,3,28,27,0,9,27,0,19,13,25];
int tot = 0;
foreach (i; 0 .. 50_000_000)
foreach (j; 0 .. 30)
tot += a[j] + b[j];
printf("%d\n", tot);
}
----------------------
.align 16, 0x90
xorl %edx, %edx
.align 16, 0x90
addl (%esi,%edx,4), %ecx
addl (%edi,%edx,4), %ecx
incl %edx
cmpl $30, %edx
jne LBB2_5
decl %eax
jne LBB2_4
----------------------
LCE: push dword ptr 018h[ESP]
mov ESI,offset FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip6__initZ
push dword ptr 018h[ESP]
push dword ptr 028h[ESP]
push dword ptr 028h[ESP]
push 0
lea EDI,078h[ESP]
movsd
movsd
movsd
movsd
movsd
lea EAX,078h[ESP]
call near ptr
_D3std5range14__T3ZipTAiTAiZ3Zip6__ctorMFNcAiAiE3std5range14StoppingPolicyZS3std5range14__T3ZipTAiTAiZ3Zip
mov ESI,EAX
lea EDI,044h[ESP]
movsd
movsd
movsd
movsd
movsd
lea ESI,044h[ESP]
lea EDI,024h[ESP]
movsd
movsd
movsd
movsd
movsd
lea EAX,024h[ESP]
call near ptr _D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb
xor AL,1
je L153
L11C: lea EAX,024h[ESP]
call near ptr
_D3std5range14__T3ZipTAiTAiZ3Zip5frontMFNdZS3std8typecons14__T5TupleTiTiZ5Tuple
mov 07Ch[ESP],EAX
mov ECX,07Ch[ESP]
lea EAX,024h[ESP]
mov 080h[ESP],EDX
add ECX,080h[ESP]
add EBX,ECX
call near ptr _D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv
lea EAX,024h[ESP]
call near ptr _D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb
xor AL,1
jne L11C
L153: inc EBP
cmp EBP,02FAF080h
jb LCE
_D3std5range14__T3ZipTAiTAiZ3Zip6__ctorMFNcAiAiE3std5range14StoppingPolicyZS3std5range
4__T3ZipTAiTAiZ3Zip comdat
push EBX
mov ECX,8[ESP]
mov EBX,014h[ESP]
push ESI
mov ESI,EAX
mov EDX,01Ch[ESP]
mov 4[ESI],EDX
mov EDX,014h[ESP]
mov [ESI],EBX
mov EBX,010h[ESP]
mov 010h[ESI],ECX
mov 8[ESI],EBX
mov 0Ch[ESI],EDX
pop ESI
pop EBX
ret 014h
_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb comdat
L0: sub ESP,05Ch
mov ECX,010h[EAX]
test ECX,ECX
push EBX
push ESI
mov ESI,EAX
je L21
cmp ECX,1
je LC3
cmp ECX,2
je L55
jmp near ptr LC3
L21: mov EBX,[ESI]
mov EDX,4[ESI]
mov 0Ch[ESP],EBX
mov 010h[ESP],EDX
cmp dword ptr 0Ch[ESP],0
je L4A
mov EBX,8[ESI]
mov EDX,0Ch[ESI]
mov 014h[ESP],EBX
mov 018h[ESP],EDX
cmp dword ptr 014h[ESP],0
jne LC3
L4A: pop ESI
mov EAX,1
pop EBX
add ESP,05Ch
ret
L55: mov EBX,[ESI]
mov EDX,4[ESI]
mov ECX,offset
FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb14__dgliteral828MFZAxa
mov 01Ch[ESP],EBX
mov 020h[ESP],EDX
mov 02Ch[ESP],EAX
mov 030h[ESP],ECX
cmp dword ptr 01Ch[ESP],0
setz DL
mov EBX,8[ESI]
mov 024h[ESP],EBX
mov ECX,0Ch[ESI]
mov 028h[ESP],ECX
cmp dword ptr 024h[ESP],0
setz CL
cmp DL,CL
mov EDX,1
je L98
xor EDX,EDX
L98: xor DL,1
je LC3
push dword ptr FLAT:_DATA[054h]
push dword ptr FLAT:_DATA[050h]
push 0ADEh
mov EAX,038h[ESP]
mov EDX,03Ch[ESP]
mov EBX,038h[ESP]
call EDX
push EDX
push EAX
call near ptr _D3std9exception7bailOutFAyaixAaZv
LC3: pop ESI
xor EAX,EAX
pop EBX
add ESP,05Ch
ret
_D3std5range14__T3ZipTAiTAiZ3Zip5frontMFNdZS3std8typecons14
_T5TupleTiTiZ5Tuple comdat
assume CS:_D3std5range14__T3ZipTAiTAiZ3Zip5frontMFNdZS3std8typecons14__T5TupleTiTiZ5Tuple
L0: sub ESP,028h
mov EDX,4[EAX]
push EBX
push ESI
mov ESI,EAX
mov EBX,[ESI]
push EDI
mov 014h[ESP],EBX
mov 018h[ESP],EDX
cmp dword ptr 014h[ESP],0
je L31
lea EDX,0Ch[ESP]
mov EBX,[ESI]
push EDX
mov EDX,4[ESI]
mov EAX,[EDX]
push 4
call near ptr _D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi
jmp short L41
L31: lea ECX,0Ch[ESP]
mov EDI,4
push ECX
push EDI
call near ptr _D3std4conv14__T7emplaceTiZ7emplaceFAvZPi
L41: mov EBX,8[ESI]
mov ECX,0Ch[ESI]
test EBX,EBX
je L61
lea EDI,010h[ESP]
mov EDX,0Ch[ESI]
mov EAX,8[ESI]
push EDI
mov EAX,[EDX]
push 4
call near ptr _D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi
jmp short L71
L61: lea ECX,010h[ESP]
mov ESI,4
push ECX
push ESI
call near ptr _D3std4conv14__T7emplaceTiZ7emplaceFAvZPi
L71: mov EDX,010h[ESP]
mov EAX,0Ch[ESP]
pop EDI
pop ESI
pop EBX
add ESP,028h
ret
_D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv comdat
L0: sub ESP,04Ch
mov ECX,010h[EAX]
test ECX,ECX
push EBX
push EBP
push ESI
push EDI
mov EDI,EAX
je L1F
cmp ECX,1
je L4A
cmp ECX,2
je L8C
jmp near ptr L142
L1F: mov ESI,[EDI]
mov EDX,4[EDI]
dec ESI
mov EBX,[EDI]
add EDX,4
lea EBP,8[EDI]
mov [EDI],ESI
mov 4[EDI],EDX
mov ESI,0[EBP]
mov EDX,4[EBP]
dec ESI
mov EBX,0[EBP]
add EDX,4
mov 0[EBP],ESI
mov 4[EBP],EDX
jmp near ptr L142
L4A: mov ESI,[EDI]
mov ECX,4[EDI]
test ESI,ESI
je L63
mov ESI,[EDI]
mov EDX,4[EDI]
dec ESI
mov EBX,[EDI]
add EDX,4
mov [EDI],ESI
mov 4[EDI],EDX
L63: mov ESI,8[EDI]
mov ECX,0Ch[EDI]
test ESI,ESI
je L142
lea EBP,8[EDI]
mov ESI,0[EBP]
mov EDX,4[EBP]
dec ESI
mov EBX,0[EBP]
add EDX,4
mov 0[EBP],ESI
mov 4[EBP],EDX
jmp near ptr L142
L8C: mov EBX,[EDI]
mov EDX,4[EDI]
mov ECX,offset
FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv14__dgliteral831MFZAxa
mov 014h[ESP],EBX
mov 018h[ESP],EDX
mov 01Ch[ESP],EAX
mov 020h[ESP],ECX
cmp dword ptr 014h[ESP],0
setnz DL
xor DL,1
je LD7
mov EDX,ECX
push dword ptr FLAT:_DATA[054h]
push dword ptr FLAT:_DATA[050h]
push 0B86h
mov EAX,028h[ESP]
mov EBX,028h[ESP]
call EDX
push EDX
push EAX
call near ptr _D3std9exception7bailOutFAyaixAaZv
LD7: mov EAX,[EDI]
mov EDX,4[EDI]
dec EAX
mov EBX,[EDI]
add EDX,4
mov 4[EDI],EDX
mov EDX,0Ch[EDI]
mov EBX,EDI
mov [EDI],EAX
mov EAX,8[EDI]
mov ECX,offset
FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip8popFrontMFZv14__dgliteral832MFZAxa
mov 024h[ESP],EAX
mov 028h[ESP],EDX
mov 02Ch[ESP],EBX
mov 030h[ESP],ECX
cmp dword ptr 024h[ESP],0
setnz DL
xor DL,1
je L12F
push dword ptr FLAT:_DATA[054h]
push dword ptr FLAT:_DATA[050h]
push 0B86h
mov EAX,038h[ESP]
call ECX
push EDX
push EAX
call near ptr _D3std9exception7bailOutFAyaixAaZv
L12F: lea ESI,8[EDI]
mov EBX,[ESI]
mov EDX,4[ESI]
dec EBX
mov EAX,[ESI]
mov [ESI],EBX
add EDX,4
mov 4[ESI],EDX
L142: pop EDI
pop ESI
pop EBP
pop EBX
add ESP,04Ch
ret
_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb comdat
L0: sub ESP,05Ch
mov ECX,010h[EAX]
test ECX,ECX
push EBX
push ESI
mov ESI,EAX
je L21
cmp ECX,1
je LC3
cmp ECX,2
je L55
jmp near ptr LC3
L21: mov EBX,[ESI]
mov EDX,4[ESI]
mov 0Ch[ESP],EBX
mov 010h[ESP],EDX
cmp dword ptr 0Ch[ESP],0
je L4A
mov EBX,8[ESI]
mov EDX,0Ch[ESI]
mov 014h[ESP],EBX
mov 018h[ESP],EDX
cmp dword ptr 014h[ESP],0
jne LC3
L4A: pop ESI
mov EAX,1
pop EBX
add ESP,05Ch
ret
L55: mov EBX,[ESI]
mov EDX,4[ESI]
mov ECX,offset
FLAT:_D3std5range14__T3ZipTAiTAiZ3Zip5emptyMFZb14__dgliteral828MFZAxa
mov 01Ch[ESP],EBX
mov 020h[ESP],EDX
mov 02Ch[ESP],EAX
mov 030h[ESP],ECX
cmp dword ptr 01Ch[ESP],0
setz DL
mov EBX,8[ESI]
mov 024h[ESP],EBX
mov ECX,0Ch[ESI]
mov 028h[ESP],ECX
cmp dword ptr 024h[ESP],0
setz CL
cmp DL,CL
mov EDX,1
je L98
xor EDX,EDX
L98: xor DL,1
je LC3
push dword ptr FLAT:_DATA[054h]
push dword ptr FLAT:_DATA[050h]
push 0ADEh
mov EAX,038h[ESP]
mov EDX,03Ch[ESP]
mov EBX,038h[ESP]
call EDX
push EDX
push EAX
call near ptr _D3std9exception7bailOutFAyaixAaZv
LC3: pop ESI
xor EAX,EAX
pop EBX
add ESP,05Ch
ret
_D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi comdat
L0: push EAX
mov EDX,offset
FLAT:_D3std4conv16__T7emplaceTiTiZ7emplaceFAviZPi14__dgliteral829MFZC6object9Throwable
push EBX
push ESI
cmp dword ptr 010h[ESP],4
sbb ECX,ECX
inc ECX
push ECX
lea EBX,0Ch[ESP]
push EDX
push EBX
call near ptr
_D3std9exception14__T7enforceTbZ7enforceFbLC6object9ThrowableZb
mov ECX,014h[ESP]
mov EAX,014h[ESP]
mov ESI,8[ESP]
mov [ECX],ESI
pop ESI
pop EBX
pop ECX
ret 8
(I have not included all the code, like _D3std9exception7bailOutFAyaixAaZv).
----------------------
LDE: xor EBX,EBX
cmp 010h[ESP],BL
je LF5
LE6: mov ECX,[EBX*4][EAX]
add ECX,[EBX*4][EDI]
add ESI,ECX
inc EBX
cmp EBX,014h[ESP]
jb LE6
LF5: inc EDX
cmp EDX,02FAF080h
jb LDE
----------------------
LBF: xor EBX,EBX
LC1: mov ECX,[EBX*4][EAX]
add ECX,[EBX*4][EDI]
add ESI,ECX
inc EBX
cmp EBX,01Eh
jb LC1
inc EDX
cmp EDX,02FAF080h
jb LBF
----------------------
Feb 15 2011
On 02/16/2011 03:36 AM, Andrei Alexandrescu wrote:Initial: 58 seconds. Eliminated the switch in popFront: 53s. Replaced emplace with assignment: 23s. Specialized emplace for non-struct types, reinserted: 23s. Eliminated the loop in empty (replaced with return ranges[0].empty;): 17s. I'm sure there are ways to further improve this, but there are a few difficulties. Each pass through the loop the code must transport values from the two arrays into a specific format and then distribute them for further calculation. Then, upon each popFront two words must be touched because arrays hold pointer+length, not pointer+pointer (as probably would be better since ranges are around). Nice analysis!Bearophile, is clay's zip func lazy. Else, it could explain part of its performance, don't you think? Denis
Feb 16 2011
Am 16.02.2011 10:25, schrieb spir:On 02/16/2011 03:36 AM, Andrei Alexandrescu wrote:why should it - what can lazyness help here?Initial: 58 seconds. Eliminated the switch in popFront: 53s. Replaced emplace with assignment: 23s. Specialized emplace for non-struct types, reinserted: 23s. Eliminated the loop in empty (replaced with return ranges[0].empty;): 17s. I'm sure there are ways to further improve this, but there are a few difficulties. Each pass through the loop the code must transport values from the two arrays into a specific format and then distribute them for further calculation. Then, upon each popFront two words must be touched because arrays hold pointer+length, not pointer+pointer (as probably would be better since ranges are around). Nice analysis!Bearophile, is clay's zip func lazy. Else, it could explain part of its performance, don't you think? Denis
Feb 16 2011
Andrei:I'm sure there are ways to further improve this, but there are a few difficulties. Each pass through the loop the code must transport values from the two arrays into a specific format and then distribute them for further calculation. Then, upon each popFront two words must be touched because arrays hold pointer+length, not pointer+pointer (as probably would be better since ranges are around).<HOFs as zip/map/filter aren't D built-ins as in Python, but they are basic language constructs. If the D front-end becomes a bit aware of what a zip is, it probably becomes able to optimize away most or all those data movements. constructor) shows there's more space for improvements, optimization-wise. In the Haskell Prelude (a standard module imported and compiled before any user code) shows the implementation of zip(l1,l2) and zip3(l1,l2,l3): zip :: [a] -> [b] -> [(a,b)] zip = zipWith (,) zip3 :: [a] -> [b] -> [c] -> [(a,b,c)] zip3 = zipWith3 (,,) zipWith :: (a->b->c) -> [a]->[b]->[c] zipWith z (a:as) (b:bs) = z a b : zipWith z as bs zipWith _ _ _ = [] zipWith3 :: (a->b->c->d) -> [a]->[b]->[c]->[d] zipWith3 z (a:as) (b:bs) (c:cs) = z a b c : zipWith3 z as bs cs zipWith3 _ _ _ _ = [] They are tiny compared to Phobos code. They are efficient enough, and they have no corner cases. ---------------------- spir:is clay's zip func lazy.<It seems lazy, it's not an array of records, and the printing function refuses to print it. Bye, bearophile
Feb 16 2011









bearophile <bearophileHUGS lycos.com> 