www.digitalmars.com         C & C++   DMDScript  

digitalmars.D - DMD is faster than LDC and GDC

reply Daniel Kozak <kozzi11 gmail.com> writes:
code:

import std.stdio;

auto fmttable(immutable(string[][]) table) {

     import std.array : appender, uninitializedArray;
     import std.range : take, repeat;
     import std.exception : assumeUnique;

     auto res = appender(uninitializedArray!(char[])(128));
     res.clear();

     if (table.length == 0) return "";
     // column widths
     auto widths = new int[](table[0].length);

     foreach (rownum, row; table) {
         foreach (colnum, cell; row) {
             if (cell.length > widths[colnum])
                 widths[colnum] = cast(int)cell.length;
         }
     }

     foreach (row; table) {
         res ~= "|";
         foreach (colnum, cell; row) {
             int l = widths[colnum] - cast(int)cell.length;
             res ~= cell;
             if (l)
                 res ~= ' '.repeat().take(l);
             res ~= "|";
         }
         res.put("\n");
     }

      return res.data.assumeUnique();
}

void main() {

     immutable table = [
         ["row1.1", "row1.2  ", "row1.3"],
         ["row2.1", "row2.2", "row2.3"],
         ["row3.1", "row3.2", "row3.3  "],
         ["row4.1", "row4.2", "row4.3"],
         ["row5.1", "row5.2", "row5.3"],
     ];

     writeln(fmttable(table));
     int i;
     for (i=0; i < 1000000; ++i) {
         fmttable(table);
     }
     writeln(i);
}

timings:

DMD (-O -release -inline -boundscheck=off):
real	0m0.003s
user	0m0.000s
sys	0m0.000s

LDMD2-ldc2 (-O -release -inline -boundscheck=off):
real	0m1.071s
user	0m1.067s
sys	0m0.000s


GDC (-O3 -finline -frelease -fno-bounds-check):
real	0m0.724s
user	0m0.720s
sys	0m0.003s
Nov 12 2015
next sibling parent reply John Colvin <john.loughran.colvin gmail.com> writes:
On Thursday, 12 November 2015 at 11:59:50 UTC, Daniel Kozak wrote:
 code:

 import std.stdio;

 auto fmttable(immutable(string[][]) table) {

     import std.array : appender, uninitializedArray;
     import std.range : take, repeat;
     import std.exception : assumeUnique;

     auto res = appender(uninitializedArray!(char[])(128));
     res.clear();

     if (table.length == 0) return "";
     // column widths
     auto widths = new int[](table[0].length);

     foreach (rownum, row; table) {
         foreach (colnum, cell; row) {
             if (cell.length > widths[colnum])
                 widths[colnum] = cast(int)cell.length;
         }
     }

     foreach (row; table) {
         res ~= "|";
         foreach (colnum, cell; row) {
             int l = widths[colnum] - cast(int)cell.length;
             res ~= cell;
             if (l)
                 res ~= ' '.repeat().take(l);
             res ~= "|";
         }
         res.put("\n");
     }

      return res.data.assumeUnique();
 }

 void main() {

     immutable table = [
         ["row1.1", "row1.2  ", "row1.3"],
         ["row2.1", "row2.2", "row2.3"],
         ["row3.1", "row3.2", "row3.3  "],
         ["row4.1", "row4.2", "row4.3"],
         ["row5.1", "row5.2", "row5.3"],
     ];

     writeln(fmttable(table));
     int i;
     for (i=0; i < 1000000; ++i) {
         fmttable(table);
     }
     writeln(i);
 }

 timings:

 DMD (-O -release -inline -boundscheck=off):
 real	0m0.003s
 user	0m0.000s
 sys	0m0.000s

 LDMD2-ldc2 (-O -release -inline -boundscheck=off):
 real	0m1.071s
 user	0m1.067s
 sys	0m0.000s


 GDC (-O3 -finline -frelease -fno-bounds-check):
 real	0m0.724s
 user	0m0.720s
 sys	0m0.003s
What versions of these compilers? I suspect the majority (maybe 80%-ish) of the time is spent allocating memory, so you might be seeing GC improvements in recent DMD
Nov 12 2015
next sibling parent reply Daniel Kozak via Digitalmars-d <digitalmars-d puremagic.com> writes:
V Thu, 12 Nov 2015 12:10:30 +0000
John Colvin via Digitalmars-d <digitalmars-d puremagic.com> napsáno:

 On Thursday, 12 November 2015 at 11:59:50 UTC, Daniel Kozak wrote:
 code:

 import std.stdio;

 auto fmttable(immutable(string[][]) table) {

     import std.array : appender, uninitializedArray;
     import std.range : take, repeat;
     import std.exception : assumeUnique;

     auto res = appender(uninitializedArray!(char[])(128));
     res.clear();

     if (table.length == 0) return "";
     // column widths
     auto widths = new int[](table[0].length);

     foreach (rownum, row; table) {
         foreach (colnum, cell; row) {
             if (cell.length > widths[colnum])
                 widths[colnum] = cast(int)cell.length;
         }
     }

     foreach (row; table) {
         res ~= "|";
         foreach (colnum, cell; row) {
             int l = widths[colnum] - cast(int)cell.length;
             res ~= cell;
             if (l)
                 res ~= ' '.repeat().take(l);
             res ~= "|";
         }
         res.put("\n");
     }

      return res.data.assumeUnique();
 }

 void main() {

     immutable table = [
         ["row1.1", "row1.2  ", "row1.3"],
         ["row2.1", "row2.2", "row2.3"],
         ["row3.1", "row3.2", "row3.3  "],
         ["row4.1", "row4.2", "row4.3"],
         ["row5.1", "row5.2", "row5.3"],
     ];

     writeln(fmttable(table));
     int i;
     for (i=0; i < 1000000; ++i) {
         fmttable(table);
     }
     writeln(i);
 }

 timings:

 DMD (-O -release -inline -boundscheck=off):
 real	0m0.003s
 user	0m0.000s
 sys	0m0.000s

 LDMD2-ldc2 (-O -release -inline -boundscheck=off):
 real	0m1.071s
 user	0m1.067s
 sys	0m0.000s


 GDC (-O3 -finline -frelease -fno-bounds-check):
 real	0m0.724s
 user	0m0.720s
 sys	0m0.003s  
What versions of these compilers? I suspect the majority (maybe 80%-ish) of the time is spent allocating memory, so you might be seeing GC improvements in recent DMD
DMD 2.069 LDC 2.067 GDC 2.065 No it is not cause by memory allocations. It seems DMD can recognize that fmttable has same result every time, so it does compute it only once.
Nov 12 2015
parent reply John Colvin <john.loughran.colvin gmail.com> writes:
On Thursday, 12 November 2015 at 12:23:11 UTC, Daniel Kozak wrote:
 V Thu, 12 Nov 2015 12:10:30 +0000
 John Colvin via Digitalmars-d <digitalmars-d puremagic.com> 
 napsáno:

 On Thursday, 12 November 2015 at 11:59:50 UTC, Daniel Kozak 
 wrote:
 [...]
What versions of these compilers? I suspect the majority (maybe 80%-ish) of the time is spent allocating memory, so you might be seeing GC improvements in recent DMD
DMD 2.069 LDC 2.067 GDC 2.065 No it is not cause by memory allocations. It seems DMD can recognize that fmttable has same result every time, so it does compute it only once.
Ok, then my second hypothesis is that dmd is inferring the pure attribute for fmttable because it returns auto (new in 2.069 IIRC), which enable the above optimisation that you have noted. Gdc and ldc (and dmd) can do similar things in their backend, but perhaps not here. Do you have older dmd versions on hand to test?
Nov 12 2015
parent Daniel Kozak via Digitalmars-d <digitalmars-d puremagic.com> writes:
V Thu, 12 Nov 2015 12:38:47 +0000
John Colvin via Digitalmars-d <digitalmars-d puremagic.com> napsáno:

 On Thursday, 12 November 2015 at 12:23:11 UTC, Daniel Kozak wrote:
 V Thu, 12 Nov 2015 12:10:30 +0000
 John Colvin via Digitalmars-d <digitalmars-d puremagic.com> 
 napsáno:
  
 On Thursday, 12 November 2015 at 11:59:50 UTC, Daniel Kozak 
 wrote:  
 [...]  
What versions of these compilers? I suspect the majority (maybe 80%-ish) of the time is spent allocating memory, so you might be seeing GC improvements in recent DMD
DMD 2.069 LDC 2.067 GDC 2.065 No it is not cause by memory allocations. It seems DMD can recognize that fmttable has same result every time, so it does compute it only once.
Ok, then my second hypothesis is that dmd is inferring the pure attribute for fmttable because it returns auto (new in 2.069 IIRC), which enable the above optimisation that you have noted. Gdc and ldc (and dmd) can do similar things in their backend, but perhaps not here. Do you have older dmd versions on hand to test?
Yes (DVM) and it is same for older versions (2.066.1, 2.067.1)
Nov 12 2015
prev sibling parent Artur Skawina via Digitalmars-d <digitalmars-d puremagic.com> writes:
On 11/12/15 13:22, Daniel Kozak via Digitalmars-d wrote:
 timings:
 DMD (-O -release -inline -boundscheck=off):
 real	0m0.003s
 user	0m0.000s
 sys	0m0.000s

 LDMD2-ldc2 (-O -release -inline -boundscheck=off):
 real	0m1.071s
 user	0m1.067s
 sys	0m0.000s


 GDC (-O3 -finline -frelease -fno-bounds-check):
 real	0m0.724s
 user	0m0.720s
 sys	0m0.003s  
What versions of these compilers? I suspect the majority (maybe 80%-ish) of the time is spent allocating memory, so you might be seeing GC improvements in recent DMD
DMD 2.069 LDC 2.067 GDC 2.065 No it is not cause by memory allocations. It seems DMD can recognize that fmttable has same result every time, so it does compute it only once.
Comparisons using different frontend versions are very unfair - - *every D release introduces a new language dialect* (for example: http://dlang.org/changelog/2.068.0.html#attribinference3). Out of curiosity, how does this slightly more sane version perform? (I don't have any dmd or ldc compilers; it takes ~80ms using GDC) import std.stdio; auto fmttable(alias sink=sink)(immutable(string[][]) table) { import std.range : take, repeat; if (table.length == 0) return; // column widths auto widths = new int[](table[0].length); foreach (rownum, row; table) { foreach (colnum, cell; row) { if (cell.length > widths[colnum]) widths[colnum] = cast(int)cell.length; } } foreach (row; table) { sink("|"); foreach (colnum, cell; row) { sink(cell, ' '.repeat().take(widths[colnum]-cast(int)cell.length), "|"); } sink("\n"); } } void sink(S...)(S s) { foreach(I, _; S) write(s[I]); } void sink0(S...)(S s) {} void main() { immutable table = [ ["row1.1", "row1.2 ", "row1.3"], ["row2.1", "row2.2", "row2.3"], ["row3.1", "row3.2", "row3.3 "], ["row4.1", "row4.2", "row4.3"], ["row5.1", "row5.2", "row5.3"], ]; fmttable(table); int i; for (i=0; i < 1000000; ++i) { fmttable!sink0(table); } sink(i, "\n"); } artur
Nov 12 2015
prev sibling next sibling parent Iain Buclaw via Digitalmars-d <digitalmars-d puremagic.com> writes:
On 12 November 2015 at 12:59, Daniel Kozak via Digitalmars-d <
digitalmars-d puremagic.com> wrote:

 code:
<snip>
 GDC (-O3 -finline -frelease -fno-bounds-check):
 real    0m0.724s
 user    0m0.720s
 sys     0m0.003s
Not to be pedantic, but -finline does nothing (what you really want is -finline-functions) However.... -finline-functions is enabled automatically at -O3, so the whole -finline just becomes wasted typing. :-)
Nov 12 2015
prev sibling next sibling parent Daniel Kozak via Digitalmars-d <digitalmars-d puremagic.com> writes:
V Thu, 12 Nov 2015 13:37:28 +0100
Iain Buclaw via Digitalmars-d <digitalmars-d puremagic.com> napsáno:

 On 12 November 2015 at 12:59, Daniel Kozak via Digitalmars-d <
 digitalmars-d puremagic.com> wrote:
 
 code:
<snip>
 GDC (-O3 -finline -frelease -fno-bounds-check):
 real    0m0.724s
 user    0m0.720s
 sys     0m0.003s
Not to be pedantic, but -finline does nothing (what you really want is -finline-functions) However.... -finline-functions is enabled automatically at -O3, so the whole -finline just becomes wasted typing. :-)
Yeah I know, but it is a bad habit from past
Nov 12 2015
prev sibling next sibling parent reply John Colvin <john.loughran.colvin gmail.com> writes:
On Thursday, 12 November 2015 at 11:59:50 UTC, Daniel Kozak wrote:
 code:

 import std.stdio;

 auto fmttable(immutable(string[][]) table) {

     import std.array : appender, uninitializedArray;
     import std.range : take, repeat;
     import std.exception : assumeUnique;

     auto res = appender(uninitializedArray!(char[])(128));
     res.clear();

     if (table.length == 0) return "";
     // column widths
     auto widths = new int[](table[0].length);

     foreach (rownum, row; table) {
         foreach (colnum, cell; row) {
             if (cell.length > widths[colnum])
                 widths[colnum] = cast(int)cell.length;
         }
     }

     foreach (row; table) {
         res ~= "|";
         foreach (colnum, cell; row) {
             int l = widths[colnum] - cast(int)cell.length;
             res ~= cell;
             if (l)
                 res ~= ' '.repeat().take(l);
             res ~= "|";
         }
         res.put("\n");
     }

      return res.data.assumeUnique();
 }

 void main() {

     immutable table = [
         ["row1.1", "row1.2  ", "row1.3"],
         ["row2.1", "row2.2", "row2.3"],
         ["row3.1", "row3.2", "row3.3  "],
         ["row4.1", "row4.2", "row4.3"],
         ["row5.1", "row5.2", "row5.3"],
     ];

     writeln(fmttable(table));
     int i;
     for (i=0; i < 1000000; ++i) {
         fmttable(table);
     }
     writeln(i);
 }

 timings:

 DMD (-O -release -inline -boundscheck=off):
 real	0m0.003s
 user	0m0.000s
 sys	0m0.000s

 LDMD2-ldc2 (-O -release -inline -boundscheck=off):
 real	0m1.071s
 user	0m1.067s
 sys	0m0.000s


 GDC (-O3 -finline -frelease -fno-bounds-check):
 real	0m0.724s
 user	0m0.720s
 sys	0m0.003s
To test the speed of fmttable itself I split fmttable and main in to different modules, made fmttable extern(C) so I could just prototype it in the main module (no import), then compiled them separately before linking. This should prevent any possible inlining/purity cleverness. ~1s for ldmd2, ~2s for dmd, which is business as normal. dmd is being clever and spotting that fmttable is pure, it would be good if ldc/gdc could spot this to.
Nov 12 2015
next sibling parent jmh530 <john.michael.hall gmail.com> writes:
On Thursday, 12 November 2015 at 14:44:49 UTC, John Colvin wrote:
 dmd is being clever and spotting that fmttable is pure, it 
 would be good if ldc/gdc could spot this to.
I don't recall seeing anything in the 2.069.0 change log about improved attribute inference for auto functions. If you can find a link pointing to where it was discussed (either change log or forum or bug report), I would appreciate it.
Nov 12 2015
prev sibling next sibling parent reply tired_eyes <pastuhov85 gmail.com> writes:
On Thursday, 12 November 2015 at 14:44:49 UTC, John Colvin wrote:
 To test the speed of fmttable itself I split fmttable and main 
 in to different modules, made fmttable extern(C) so I could 
 just prototype it in the main module (no import), then compiled 
 them separately before linking. This should prevent any 
 possible inlining/purity cleverness. ~1s for ldmd2, ~2s for 
 dmd, which is business as normal.

 dmd is being clever and spotting that fmttable is pure, it 
 would be good if ldc/gdc could spot this to.
If so, should explicitly marking fmttable as pure close the gap for initial code?
Nov 12 2015
parent John Colvin <john.loughran.colvin gmail.com> writes:
On Thursday, 12 November 2015 at 19:11:25 UTC, tired_eyes wrote:
 On Thursday, 12 November 2015 at 14:44:49 UTC, John Colvin 
 wrote:
 To test the speed of fmttable itself I split fmttable and main 
 in to different modules, made fmttable extern(C) so I could 
 just prototype it in the main module (no import), then 
 compiled them separately before linking. This should prevent 
 any possible inlining/purity cleverness. ~1s for ldmd2, ~2s 
 for dmd, which is business as normal.

 dmd is being clever and spotting that fmttable is pure, it 
 would be good if ldc/gdc could spot this to.
If so, should explicitly marking fmttable as pure close the gap for initial code?
Well it won't do any harm, but it really depends on what the compiler chooses to do with the information.
Nov 12 2015
prev sibling parent reply Walter Bright <newshound2 digitalmars.com> writes:
On 11/12/2015 6:44 AM, John Colvin wrote:
 dmd is being clever and spotting that fmttable is pure, it would be good if
 ldc/gdc could spot this to.
It's more than that - dmd's optimizer is designed to make use of the guarantees of a pure function. Since C/C++ do not have pure functions, ldc/gdc's optimizer may not have that capability.
Nov 12 2015
next sibling parent reply David Nadlinger <code klickverbot.at> writes:
On Thursday, 12 November 2015 at 21:16:25 UTC, Walter Bright 
wrote:
 It's more than that - dmd's optimizer is designed to make use 
 of the guarantees of a pure function. Since C/C++ do not have 
 pure functions, ldc/gdc's optimizer may not have that 
 capability.
Oh, GCC has had similar notions as a non-standard attribute for ages, and LLVM since its inception. At least for LDC, the reason why we do not currently lower many of the qualifiers like pure, nothrow, immutable, etc. is that LLVM will ruthlessly consider your code to exhibit undefined behavior if you try to be clever and violate them, subsequently optimizing based on that. In other words, if you cast away const/immutable and modify a variable, for instance, you might find that the entire function body magically disappears under your feet. Maybe it is time to revisit this, though, but last time I tried it broke druntime/Phobos in a couple of places. — David
Nov 12 2015
next sibling parent Iain Buclaw via Digitalmars-d <digitalmars-d puremagic.com> writes:
On 12 Nov 2015 10:25 pm, "David Nadlinger via Digitalmars-d" <
digitalmars-d puremagic.com> wrote:
 On Thursday, 12 November 2015 at 21:16:25 UTC, Walter Bright wrote:
 It's more than that - dmd's optimizer is designed to make use of the
guarantees of a pure function. Since C/C++ do not have pure functions, ldc/gdc's optimizer may not have that capability.
 Oh, GCC has had similar notions as a non-standard attribute for ages, and
LLVM since its inception.
 At least for LDC, the reason why we do not currently lower many of the
qualifiers like pure, nothrow, immutable, etc. is that LLVM will ruthlessly consider your code to exhibit undefined behavior if you try to be clever and violate them, subsequently optimizing based on that. In other words, if you cast away const/immutable and modify a variable, for instance, you might find that the entire function body magically disappears under your feet.
 Maybe it is time to revisit this, though, but last time I tried it broke
druntime/Phobos in a couple of places.

Same here, and for some very surprising reasons from what I recall.
Nov 12 2015
prev sibling next sibling parent Vladimir Panteleev <thecybershadow.lists gmail.com> writes:
On Thursday, 12 November 2015 at 21:24:30 UTC, David Nadlinger 
wrote:
 On Thursday, 12 November 2015 at 21:16:25 UTC, Walter Bright 
 wrote:
 [...]
Oh, GCC has had similar notions as a non-standard attribute for ages, and LLVM since its inception. At least for LDC, the reason why we do not currently lower many of the qualifiers like pure, nothrow, immutable, etc. is that LLVM will ruthlessly consider your code to exhibit undefined behavior if you try to be clever and violate them, subsequently optimizing based on that. In other words, if you cast away const/immutable and modify a variable, for instance, you might find that the entire function body magically disappears under your feet. Maybe it is time to revisit this, though, but last time I tried it broke druntime/Phobos in a couple of places.
That sounds awesome. Maybe only enable it for safe code?
Nov 13 2015
prev sibling parent reply Manu via Digitalmars-d <digitalmars-d puremagic.com> writes:
On 13 November 2015 at 08:38, Iain Buclaw via Digitalmars-d
<digitalmars-d puremagic.com> wrote:
 On 12 Nov 2015 10:25 pm, "David Nadlinger via Digitalmars-d"
 <digitalmars-d puremagic.com> wrote:
 On Thursday, 12 November 2015 at 21:16:25 UTC, Walter Bright wrote:
 It's more than that - dmd's optimizer is designed to make use of the
 guarantees of a pure function. Since C/C++ do not have pure functions,
 ldc/gdc's optimizer may not have that capability.
Oh, GCC has had similar notions as a non-standard attribute for ages, and LLVM since its inception. At least for LDC, the reason why we do not currently lower many of the qualifiers like pure, nothrow, immutable, etc. is that LLVM will ruthlessly consider your code to exhibit undefined behavior if you try to be clever and violate them, subsequently optimizing based on that. In other words, if you cast away const/immutable and modify a variable, for instance, you might find that the entire function body magically disappears under your feet. Maybe it is time to revisit this, though, but last time I tried it broke druntime/Phobos in a couple of places.
Same here, and for some very surprising reasons from what I recall.
These language mechanisms offer D a huge potential advantage, it would be really good to understand why we can't make use of them, and work towards fixing this. I don't think people should be surprised if the optimiser takes advantage of their code attribution. It may break existing code because violating these attributes never caused any problem before, but surely violating those attributes was never actually valid code, and it's reasonable that they expect their code to break in the future as compilers improve their ability to take advantage of these attributes? In the meantime, there probably needs to be strong warnings about violating attributes, and if patterns have emerged that rely on violating such attributes, we should publish a recommended alternative.
Nov 13 2015
parent Marc =?UTF-8?B?U2Now7x0eg==?= <schuetzm gmx.net> writes:
On Saturday, 14 November 2015 at 00:37:51 UTC, Manu wrote:
 In the meantime, there probably needs to be strong warnings 
 about violating attributes, and if patterns have emerged that 
 rely on violating such attributes, we should publish a 
 recommended alternative.
One pattern that comes to mind immediately is lazily initialized members in a const object. Another one that's already officially supported is impure debug statements in pure functions.
Nov 14 2015
prev sibling parent rsw0x <anonymous anonymous.com> writes:
On Thursday, 12 November 2015 at 21:16:25 UTC, Walter Bright 
wrote:
 On 11/12/2015 6:44 AM, John Colvin wrote:
 dmd is being clever and spotting that fmttable is pure, it 
 would be good if
 ldc/gdc could spot this to.
It's more than that - dmd's optimizer is designed to make use of the guarantees of a pure function. Since C/C++ do not have pure functions, ldc/gdc's optimizer may not have that capability.
gcc has had the pure function attribute since version 2.96 in 2000.
Nov 12 2015
prev sibling parent reply =?UTF-8?Q?Ali_=c3=87ehreli?= <acehreli yahoo.com> writes:
I would love to be convinced. :) Can someone come up with a reduced 
example please?

On 11/12/2015 03:59 AM, Daniel Kozak wrote:

      for (i=0; i < 1000000; ++i) {
          fmttable(table);
      }
I think what we are seeing here is more due to the unused side-effect in the loop, where compiling with -w fails compilation: Warning: calling deneme.fmttable without side effects discards return value of type string, prepend a cast(void) if intentional Ali
Nov 12 2015
parent reply =?UTF-8?Q?Ali_=c3=87ehreli?= <acehreli yahoo.com> writes:
On 11/12/2015 11:50 AM, Ali Çehreli wrote:
 I would love to be convinced. :) Can someone come up with a reduced
 example please?

 On 11/12/2015 03:59 AM, Daniel Kozak wrote:

  >      for (i=0; i < 1000000; ++i) {
  >          fmttable(table);
  >      }

 I think what we are seeing here is more due to the unused side-effect in
 the loop, where compiling with -w fails compilation:

 Warning: calling deneme.fmttable without side effects discards return
 value of type string, prepend a cast(void) if intentional

 Ali
Can someone please tell me if I am mistaken not. Once again, I don't think this example is fast because the compiler reuses the return value of fmttable() in the loop. Rather, it simply removes the whole expression because its only side-effect is not used in the program. Perhaps that's what everybody else is saying anyway. :) (Why don't I look at the assembly myself? Going to a meeting... :p) Ali
Nov 13 2015
parent cym13 <cpicard openmailbox.org> writes:
On Friday, 13 November 2015 at 19:59:51 UTC, Ali Çehreli wrote:
 On 11/12/2015 11:50 AM, Ali Çehreli wrote:
 I would love to be convinced. :) Can someone come up with a 
 reduced
 example please?

 On 11/12/2015 03:59 AM, Daniel Kozak wrote:

  >      for (i=0; i < 1000000; ++i) {
  >          fmttable(table);
  >      }

 I think what we are seeing here is more due to the unused 
 side-effect in
 the loop, where compiling with -w fails compilation:

 Warning: calling deneme.fmttable without side effects discards 
 return
 value of type string, prepend a cast(void) if intentional

 Ali
Can someone please tell me if I am mistaken not. Once again, I don't think this example is fast because the compiler reuses the return value of fmttable() in the loop. Rather, it simply removes the whole expression because its only side-effect is not used in the program. Perhaps that's what everybody else is saying anyway. :) (Why don't I look at the assembly myself? Going to a meeting... :p) Ali
I confirm it, here is the loop part: 0x004375c2 31db xor ebx, ebx ┌─> 0x004375c4 ffc3 inc ebx │ 0x004375c6 81fb40420f00 cmp ebx, 0xf4240 └─< 0x004375cc 72f6 jb 0x4375c4 0x004375ce 8bfb mov edi, ebx 0x004375d0 e8230d0000 call sym._D3std5stdio14__T7writelnTiZ7writelnFNfiZv (DMD 2.069 -O -release -inline -boundscheck=off, code from the first post)
Nov 13 2015