digitalmars.D.learn - an example of parallel calculation of metrics
- Jay Norwood (49/49) Sep 30 2015 This is something I'm playing with for work. We do this a lot,
- Jay Norwood (7/14) Sep 30 2015 should use reference parameters here:
- Jay Norwood (72/72) Sep 30 2015 This compiles and appears to execute correctly, but if I
- =?UTF-8?Q?Ali_=c3=87ehreli?= (14/16) Oct 01 2015 TR,"ALL_ACC");
- Jay Norwood (73/74) Oct 01 2015 Thanks. My particular use case, working with metric expressions,
- =?UTF-8?Q?Ali_=c3=87ehreli?= (18/24) Oct 01 2015 Makes sense. Please open a bug at least for investigation why tuples
- Jay Norwood (3/5) Oct 01 2015 ok, thanks. I opened the issue.
- Jay Norwood (40/43) Oct 01 2015 After re-reading your explanation, I see that the problem is only
- Jay Norwood (52/52) Oct 01 2015 So, this is a condensed version of the original problem. It looks
- Jay Norwood (2/2) Oct 01 2015 I re-submitted this as:
- Jay Norwood (60/60) Oct 01 2015 This is another attempt with the metric parallel processing. This
This is something I'm playing with for work. We do this a lot, capture counter events for some number of on-chip performance counters, compute some metrics, display the outputs. This seems ideal for the application. import std.algorithm, std.parallelism, std.range; import std.stdio; import std.datetime; import std.typecons; import std.meta; // define some input measurement sample tuples and output metric tuples alias TI = Tuple!(long, long, long, long, long); alias TO = Tuple!(long, long, long, long); // various metric definitions // the Tuples could also define names for each member and use the names here in the metrics. long met1( TI m){ return m[0] + m[1] + m[2]; } long met2( TI m){ return m[1] + m[2] + m[3]; } long met3( TI m){ return m[0] - m[1] + m[2]; } long met4( TI m){ return m[0] + m[1] - m[2]; } // a convenience to use all the metrics above as a list alias Metrics = AliasSeq!(met1,met2,met3,met4); void main(string[] argv) { auto samples = iota(1_000); auto meas = new TI[samples.length]; auto results = new TO[samples.length]; // Initialize some values for the measured samples foreach(i, ref m; meas){ m[0] = i; m[1] = i+1; m[2] = i+2; m[3] = i+3; m[4] = i+4; } std.datetime.StopWatch sw; sw.start(); ref TI getTerm(int i) { return meas[i]; } // compute the metric results for the above measured sample values in parallel taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results); // how long did this take long exec_ms = sw.peek().msecs; writeln("results:", results); writeln("time:", exec_ms); }
Sep 30 2015
On Wednesday, 30 September 2015 at 22:24:25 UTC, Jay Norwood wrote:// various metric definitions // the Tuples could also define names for each member and use the names here in the metrics. long met1( TI m){ return m[0] + m[1] + m[2]; } long met2( TI m){ return m[1] + m[2] + m[3]; } long met3( TI m){ return m[0] - m[1] + m[2]; } long met4( TI m){ return m[0] + m[1] - m[2]; }should use reference parameters here: long met1( ref TI m){ return m[0] + m[1] + m[2]; } long met2( ref TI m){ return m[1] + m[2] + m[3]; } long met3( ref TI m){ return m[0] - m[1] + m[2]; } long met4( ref TI m){ return m[0] + m[1] - m[2]; }
Sep 30 2015
This compiles and appears to execute correctly, but if I uncomment the taskPool line I get a compile error message about wrong buffer type. Am I breaking some rule for std.parallelism.amap? import std.algorithm, std.parallelism, std.range; import std.stdio; import std.datetime; import std.typecons; import std.meta; // define some input measurement sample tuples and output metric tuples alias TR = Tuple!(double,"per_sec", double, "per_cycle", long,"raw"); alias TI = Tuple!(long, "proc_cyc", long, "DATA_RD", long, "DATA_WR", long, "INST_FETCH", long, "L1I_MISS", long, "L1I_HIT", long,"L1D_HIT", long, "L1D_MISS"); alias TO = Tuple!(TR,"L1_MISS", TR, "L1_HIT", TR,"DATA_ACC", TR,"ALL_ACC"); const double CYC_PER_SEC = 1_600_000_000; // various metric definitions // using Tuples with defined names for each member, and use the names here in the metrics. TR met_l1_miss ( ref TI m){ TR rv; with(rv) with(m) { raw = L1I_MISS+L1D_MISS; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } TR met_l1_hit ( ref TI m){ TR rv; with(rv) with(m) { raw = L1I_HIT+L1D_HIT; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } TR met_data_acc ( ref TI m){ TR rv; with(rv) with(m) { raw = DATA_RD+DATA_WR; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } TR met_all_acc( ref TI m){ TR rv; with(rv) with(m) { raw = DATA_RD+DATA_WR+INST_FETCH; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } // a convenience to use all the metrics above as a list alias Metrics = AliasSeq!(met_l1_miss,met_l1_hit,met_data_acc,met_all_acc); void main(string[] argv) { auto samples = iota(1_00); auto meas = new TI[samples.length]; auto results = new TO[samples.length]; // Initialize some values for the measured samples foreach(i, ref m; meas){ with(m){ proc_cyc = 1_000_000+i*2; DATA_RD = 1000+i; DATA_WR= 2000+i; INST_FETCH=proc_cyc/2; L1I_HIT= INST_FETCH-100; L1I_MISS=100; L1D_HIT= DATA_RD+DATA_WR - 200; L1D_MISS=200;} } std.datetime.StopWatch sw; sw.start(); ref TI getTerm(int i) { return meas[i]; } // compute the metric results for the above measured sample values in parallel //taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results); TR rv1 = met_l1_miss( meas[0]); TR rv2 = met_l1_hit( meas[0]); TR rv3 = met_data_acc( meas[0]); TR rv4 = met_all_acc( meas[0]); // how long did this take long exec_ms = sw.peek().msecs; writeln("measurements:", meas[0]); writeln("rv1:", rv1); writeln("rv2:", rv2); writeln("rv3:", rv3); writeln("rv4:", rv4); writeln("results:", results[1]); writeln("time:", exec_ms); }
Sep 30 2015
On 09/30/2015 09:15 PM, Jay Norwood wrote:alias TO = Tuple!(TR,"L1_MISS", TR, "L1_HIT", TR,"DATA_ACC",TR,"ALL_ACC"); Looks like a bug. Workaround: Get rid of member names there: alias TO = Tuple!(TR, TR, TR, TR);//taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results); For some reason, having member names prevents 'results' passing one of amap's requirements. The following check in std.parallelism thinks that 'results' does not have random assignable elements if it is a Tuple with member names. else static if(randAssignable!(Args[$ - 1]) && Args.length > 1) { static assert(0, "Wrong buffer type."); } Ali
Oct 01 2015
On Thursday, 1 October 2015 at 07:03:40 UTC, Ali Çehreli wrote:Looks like a bug. Workaround: Get rid of member namesThanks. My particular use case, working with metric expressions, is easier to understand if I use the names. I converted the use of Tuple to struct to see if I could get an easier error msg. Turns out the use of struct also results in much cleaner writeln text. Still has the compile error, though. import std.algorithm, std.parallelism, std.range; import std.stdio; import std.datetime; import std.typecons; import std.meta; // define some input measurement sample tuples and output metric tuples struct TR {double per_sec; double per_cycle; long raw;} struct TI {long proc_cyc; long DATA_RD; long DATA_WR; long INST_FETCH; long L1I_MISS; long L1I_HIT; long L1D_HIT; long L1D_MISS;} struct TO { TR L1_MISS; TR L1_HIT; TR DATA_ACC; TR ALL_ACC;} const double CYC_PER_SEC = 1_600_000_000; // various metric definitions // using Tuples with defined names for each member, and use the names here in the metrics. TR met_l1_miss ( ref TI m){ TR rv; with(rv) with(m) { raw = L1I_MISS+L1D_MISS; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } TR met_l1_hit ( ref TI m){ TR rv; with(rv) with(m) { raw = L1I_HIT+L1D_HIT; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } TR met_data_acc ( ref TI m){ TR rv; with(rv) with(m) { raw = DATA_RD+DATA_WR; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } TR met_all_acc( ref TI m){ TR rv; with(rv) with(m) { raw = DATA_RD+DATA_WR+INST_FETCH; per_cycle = cast(double)raw/proc_cyc; per_sec = per_cycle*CYC_PER_SEC;} return rv; } // a convenience to use all the metrics above as a list alias Metrics = AliasSeq!(met_l1_miss,met_l1_hit,met_data_acc,met_all_acc); void main(string[] argv) { auto samples = iota(1_00); auto meas = new TI[samples.length]; auto results = new TO[samples.length]; // Initialize some values for the measured samples foreach(i, ref m; meas){ with(m){ proc_cyc = 1_000_000+i*2; DATA_RD = 1000+i; DATA_WR= 2000+i; INST_FETCH=proc_cyc/2; L1I_HIT= INST_FETCH-100; L1I_MISS=100; L1D_HIT= DATA_RD+DATA_WR - 200; L1D_MISS=200;} } std.datetime.StopWatch sw; sw.start(); ref TI getTerm(int i) { return meas[i]; } // compute the metric results for the above measured sample values in parallel taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results); TR rv1 = met_l1_miss( meas[0]); TR rv2 = met_l1_hit( meas[0]); TR rv3 = met_data_acc( meas[0]); TR rv4 = met_all_acc( meas[0]); // how long did this take long exec_ms = sw.peek().msecs; writeln("measurements:", meas[0]); writeln("rv1:", rv1); writeln("rv2:", rv2); writeln("rv3:", rv3); writeln("rv4:", rv4); writeln("results:", results[1]); writeln("time:", exec_ms); }
Oct 01 2015
On 10/01/2015 08:56 AM, Jay Norwood wrote:Thanks. My particular use case, working with metric expressions, is easier to understand if I use the names.Makes sense. Please open a bug at least for investigation why tuples with named members don't work with amap.I converted the use of Tuple to struct to see if I could get an easier error msg. Turns out the use of struct also results in much cleaner writeln text. Still has the compile error, though.We have to live with the fact that amap and friends produce a Tuple result if there are multiple functions. A struct won't work. However, if you prove to yourself that the result tuple and your struct have the same memory layout, you can cast the tuple slice to struct slice after calling amap: alias TO_for_amap_result = Tuple!(TR, TR, TR, TR); struct TO { TR L1_MISS; TR L1_HIT; TR DATA_ACC; TR ALL_ACC;} // ... auto results_for_amap = new TO_for_amap_result[samples.length]; // ... taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results_for_amap); auto results = cast(TO[])results_for_amap; // Use 'results' from this point on... Ali
Oct 01 2015
On Thursday, 1 October 2015 at 18:08:31 UTC, Ali Çehreli wrote:Makes sense. Please open a bug at least for investigation why tuples with named members don't work with amap.ok, thanks. I opened the issue. https://issues.dlang.org/show_bug.cgi?id=15134
Oct 01 2015
On Thursday, 1 October 2015 at 18:08:31 UTC, Ali Çehreli wrote:However, if you prove to yourself that the result tuple and your struct have the same memory layout, you can cast the tuple slice to struct slice after calling amap:After re-reading your explanation, I see that the problem is only that the results needs to be a Tuple. It works with named tuple members in this example as the result and array of struct as the input. I'll re-check if the multi-member result also works with named members. I'll update the issue report. import std.meta; import std.stdio; // define some input measurement sample tuples and output metric tuples struct TI {long L1I_MISS; long L1D_MISS; } alias TO = Tuple!(long, "raw"); // various metric definitions // using Tuples with defined names for each member, and use the names here in the metrics. TO met_l1_miss ( ref TI m){ TO rv; rv.raw = m.L1I_MISS+m.L1D_MISS; return rv; } // a convenience to use all the metrics above as a list alias Metrics = AliasSeq!(met_l1_miss); void main(string[] argv) { auto samples = iota(100); auto meas = new TI[samples.length]; auto results = new TO[samples.length]; // Initialize some values for the measured samples foreach(i, ref m; meas){ m.L1D_MISS= 100+i; m.L1I_MISS=100-i; } ref TI getTerm(int i) { return meas[i]; } // compute the metric results for the above measured sample values in parallel taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results); TO rv1 = met_l1_miss( meas[1]); writeln("measurements:", meas[1]); writeln("rv1:", rv1); writeln("results:", results[1]); }
Oct 01 2015
So, this is a condensed version of the original problem. It looks like the problem is that the return value for taskPool.amap can't be a tuple of tuples or a tuple of struct. Either way, it fails with the Wrong buffer type error message if I uncomment the taskPool line import std.algorithm, std.parallelism, std.range; import std.typecons; import std.meta; import std.stdio; // define some input measurement sample tuples and output metric tuples struct TR { long raw; double per_cyc;} //alias TR = Tuple!(long, "raw", double, "per_cyc"); alias TI = Tuple!(long, "L1I_MISS",long, "L1D_MISS", long, "L1D_READ", long, "L1D_WRITE", long, "cycles" ); alias TO = Tuple!(TR, "L1_MISS", TR, "L1D_ACCESS"); // various metric definitions // using Tuples with defined names for each member, and use the names here in the metrics. TR met_l1_miss ( ref TI m){ TR rv; rv.raw = m.L1I_MISS+m.L1D_MISS; rv.per_cyc = cast(double)rv.raw/m.cycles; return rv; } TR met_l1_access ( ref TI m){ TR rv; rv.raw = m.L1D_READ+m.L1D_WRITE; rv.per_cyc = cast(double)rv.raw/m.cycles; return rv; } // a convenience to use all the metrics above as a list alias Metrics = AliasSeq!(met_l1_miss, met_l1_access); void main(string[] argv) { auto samples = iota(100); auto meas = new TI[samples.length]; auto results = new TO[samples.length]; // Initialize some values for the measured samples foreach(i, ref m; meas){ m.L1D_MISS= 100+i; m.L1I_MISS=100-i; m.L1D_READ= 200+i; m.L1D_WRITE=200-i; m.cycles= 10+i; } ref TI getTerm(int i) { return meas[i]; } // compute the metric results for the above measured sample values in parallel //taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results); TR rv1 = met_l1_miss( meas[1]); TR rv2 = met_l1_access( meas[1]); writeln("measurements:", meas[1]); writeln("rv1:", rv1); writeln("rv2:", rv2); writeln("results:", results[1]); }
Oct 01 2015
I re-submitted this as: https://issues.dlang.org/show_bug.cgi?id=15135
Oct 01 2015
This is another attempt with the metric parallel processing. This uses the results only to return an int value, which could be used later as an error return value. The metric value locations are now allocated as a part of the input measurement values tuple. The Tuple vs struct definitions seem to have a big difference in default output formatting. import std.algorithm, std.parallelism, std.range; import std.typecons; import std.meta; import std.stdio; // define some input measurement sample tuples and output metric tuples alias TR = Tuple!(long,"raw",double, "per_cycle"); //struct TR {long raw; double per_cycle;} alias TO = Tuple!(TR, "l1_miss", TR, "l1_access" ); //struct TO {TR l1_miss; TR l1_access; }; alias TI = Tuple!(long, "L1I_MISS",long, "L1D_MISS", long, "L1D_READ", long, "L1D_WRITE", long, "cycles", TO, "res"); // various metric definitions // using Tuples with defined names for each member, and use the names here in the metrics. long met_l1_miss ( ref TI m){ return m.L1I_MISS + m.L1D_MISS; } long met_l1_access ( ref TI m){ return m.L1D_READ + m.L1D_WRITE; } int met_all (ref TI m) { with (m.res){ l1_miss.raw = met_l1_miss(m); l1_access.raw = met_l1_access(m); l1_miss.per_cycle = (m.cycles == 0)? double.nan : l1_miss.raw / cast(double)m.cycles; l1_access.per_cycle = (m.cycles == 0)? double.nan : l1_access.raw / cast(double)m.cycles; } return 0; } // a convenience to use all the metrics above as a list alias Metrics = AliasSeq!(met_all); void main(string[] argv) { auto samples = iota(100); auto meas = new TI[samples.length]; auto results = new int[samples.length]; // Initialize some values for the measured samples foreach(i, ref m; meas){ m.L1D_MISS= 100+i; m.L1I_MISS=100-i; m.L1D_READ= 200+i; m.L1D_WRITE=200-i; m.cycles= 10+i; } ref TI getTerm(int i) { return meas[i]; } // compute the metric results for the above measured sample values in parallel taskPool.amap!(Metrics)(std.algorithm.map!getTerm(samples),results); writeln("measurements:", meas[1]); foreach(ref m; meas){ writeln(m.res); } }
Oct 01 2015