digitalmars.D - Combsort comparison
- bearophile (412/412) Jun 20 2010 I have translated another small Python/C++ program to D, this is perform...
- Lutger (36/44) Jun 20 2010 I would be interested in this too, thanks for posting this. If I underst...
- bearophile (5/8) Jun 20 2010 I don't think it's crap with a linked list, it can be slower, but not so...
- Lutger (11/48) Jun 20 2010 I get about 30% improvement if the inner loop is rewritten to:
- bearophile (5/6) Jun 20 2010 Can you please show me the whole program?
- bearophile (101/112) Jun 20 2010 This is a version that works (note the popFront at the end of the loop):
I have translated another small Python/C++ program to D, this is performs the combsort: http://en.wikipedia.org/wiki/Combsort The original code comes from Wikipedia and the rosettacode site. This is a Python version, the algorithm is very simple: def swap(alist, i, j): alist[i], alist[j] = alist[j], alist[i] def combsort(a): gap = len(a) swaps = True while gap > 1 or swaps: gap = max(1, int(gap / 1.2473)) swaps = False for i in xrange(len(a) - gap): if a[i] > a[i + gap]: swap(a, i, i + gap) swaps = True a = [88,18,31,44,4,0,8,81,14,78,20,76,84,33,73,75,82,5,62,70] combsort(a) assert a == sorted(a) A translation to D2 is easy and painless, no D/Phobos bugs found while writing it: import std.algorithm; void combsort(T)(T[] input) { int gap = input.length; bool swaps = true; while (gap > 1 || swaps) { gap = max(1, cast(int)(gap / 1.2473)); swaps = false; foreach (i; 0 .. input.length - gap) if (input[i] > input[i + gap]) { swap(input[i], input[i + gap]); swaps = true; } } } void main() { auto a = [88,18,31,44,4,0,8,81,14,78,20,76,84,33,73,75,82,5,62,70]; combsort(a); assert(a == a.dup.sort); } On Wikipedia I have seen a more general C++ version (see the C++ code at the bottom), so I have written a D version that uses ranges, able (in theory) to bottom. The translation to more generic D has taken me some time because I am not trained to use ranges, but I have found no real problem in translating the algorithm. 1) I have tried to use the combsort to sort a std.container.SList, but you can't perform array() on such list, or even a walkLength (the same is true on a static array as input), so in practice it can't be used by combsort. In my opinion array() and walkLength() must work with anything that can be iterated with a foreach. My bug reports number 4346 and 4114 express this opinion. 2) D2 contract programming doesn't support the "old" (original input data view) yet, so I can't use a postcondition to test if combsort has actually done its work correctly (the result is sorted and contains the same items of the input). To solve this I have used a static nested function that does the actual combsort work, called from an outer function that in debug mode performs the tests. This is not nice. a way to use them more efficiently, I have just started using D Ranges. In this program the left and right "cursors" keep their relative distance, and in practice the C++ program shows that two "single" iterators (instead of two pairs) can be enough for this program. If you know how to improve the ranges I have very often heard that generic C++ programs (like the C++ version of combsort able to work on both linked lists and arrays) are about as efficient as the specialized C ones, I have so far accepted this, but this time I have done some benchmarks. The C version of the code is adapted from the C code present in Wikipedia, it generates 10 million pseudo-random numbers on the heap and then sorts them. The code. Timings, seconds, best of 3: D3: 5.94 D2: 3.13 C++: 3.05 D1: 2.90 C: 2.72 gcc -O3 -Wall comb_c.c -o comb_c g++ -O3 -Wall comb_cpp.cpp -o comb_cpp dmd -O -release -inline comb_d1 GCC version 4.5.0, dmd v2.047. I have not timed the Python version because it's probably slow. probably not so important. about zero if the D code is compiled with LDC. slower (and the random number generation takes constant time, so probably the Even in the C Vs C++ case there is a bit of difference, but it's probably small enough, it can be significant only in special situations. When writing very generic code in D I will take a look at the performance compared to a more specialized version of the code (often specialized for arrays). ----------------------------- // C version #include "stdio.h" #include "stdlib.h" //#define DEBUG void combsort(int items[], int len) { int swapped = 1; int gap = len; while (gap > 1 || swapped) { if (gap > 1) gap /= 1.247330950103979; swapped = 0; int i, j; for (i = 0, j = gap; j < len; i++, j++) { if (items[i] > items[j]) { int temp = items[i]; items[i] = items[j]; items[j] = temp; swapped = 1; } } } } static inline int myrandom() { unsigned long const IM = 139968; unsigned long const IA = 3877; unsigned long const IC = 29573; static unsigned long last = 42; last = (last * IA + IC) % IM; return last; } #ifdef DEBUG int issorted(int items[], int len) { if (len < 2) return 1; int i; for (i = 0; i < (len-1); i++) if (items[i] > items[i+1]) return 0; return 1; } #endif #define N (10 * 1000 * 1000) int main() { int* v = malloc(sizeof(int) * N); int i; for (i = 0; i < N; i++) v[i] = myrandom(); #ifdef DEBUG for (i = 0; i < N; i++) printf("%d ", v[i]); printf("\n"); #endif combsort(v, N); #ifdef DEBUG for (i = 0; i < N; i++) printf("%d ", v[i]); printf("\n"); #endif printf("%d\n", v[N-1]); #ifdef DEBUG if (!issorted(v, N)) printf("NOT sorted\n"); #endif return 0; } ----------------------------- import std.c.stdlib: malloc; import std.c.stdio: printf; void combsort(int* items, int len) { int swapped = 1; int gap = len; while (gap > 1 || swapped) { if (gap > 1) gap /= 1.247330950103979; swapped = 0; int i, j; for (i = 0, j = gap; j < len; i++, j++) { if (items[i] > items[j]) { int temp = items[i]; items[i] = items[j]; items[j] = temp; swapped = 1; } } } } int myrandom() { enum uint IM = 139968; enum uint IA = 3877; enum uint IC = 29573; static uint last = 42; last = (last * IA + IC) % IM; return last; } debug { int issorted(int* items, int len) { if (len < 2) return 1; int i; for (i = 0; i < (len-1); i++) if (items[i] > items[i+1]) return 0; return 1; } } void main() { enum int N = 10_000_000; int* v = cast(int*)malloc(int.sizeof * N); int i; for (i = 0; i < N; i++) v[i] = myrandom(); debug { for (i = 0; i < N; i++) printf("%d ", v[i]); printf("\n"); } combsort(v, N); debug { for (i = 0; i < N; i++) printf("%d ", v[i]); printf("\n"); } printf("%d\n", v[N-1]); debug { if (!issorted(v, N)) printf("NOT sorted\n"); } } ----------------------------- import std.c.stdio: printf; import std.c.stdlib: malloc; import std.algorithm: swap; void combsort(T)(T[] input) { int gap = input.length; bool swaps = true; while (gap > 1 || swaps) { if (gap > 1) gap /= 1.2473; swaps = false; foreach (i; 0 .. input.length - gap) if (input[i] > input[i + gap]) { swap(input[i], input[i + gap]); swaps = true; } } } int myrandom() { enum uint IM = 139968; enum uint IA = 3877; enum uint IC = 29573; static uint last = 42; last = (last * IA + IC) % IM; return last; } debug { bool issorted(int[] items) { if (items.length < 2) return true; foreach (i, el; items[0 .. $-1]) if (el > items[i+1]) return false; return true; } } void main() { enum int N = 10_000_000; int* v = cast(int*)malloc(int.sizeof * N); int i; for (i = 0; i < N; i++) v[i] = myrandom(); debug { for (i = 0; i < N; i++) printf("%d ", v[i]); printf("\n"); } combsort(v[0 .. N]); debug { for (i = 0; i < N; i++) printf("%d ", v[i]); printf("\n"); } printf("%d\n", v[N-1]); debug { if (!issorted(v[0 .. N])) printf("NOT sorted\n"); } } ----------------------------- import std.array: empty, popFront, popFrontN, front; import std.range: walkLength, isForwardRange, hasSwappableElements; import std.algorithm: swap, binaryFun; import std.c.stdlib: malloc; import std.c.stdio: printf; debug { import std.contracts: enforce; import std.algorithm: sort, equal; import std.array: array; } void combsort(alias less="a < b", Range)(Range data) if (isForwardRange!Range && hasSwappableElements!Range) { static void combsort_impl(Range data) { enum double SHRINK_FACTOR = 1.247330950103979; int gap = walkLength(data); bool swapped = true; while (gap > 1 || swapped) { if (gap > 1) gap /= SHRINK_FACTOR; swapped = false; auto right = data; popFrontN(right, gap); for (auto left = data; !right.empty; left.popFront, right.popFront) { if (binaryFun!less(right.front, left.front)) { swap(left.front, right.front); swapped = true; } } } } // postcondition verified in debug mode only. // This is a workaround, D postconditions don't // support the "old" (original input data view) yet. debug { auto data_copy = array(data); sort!(less)(data_copy); combsort_impl(data); enforce(equal(data, data_copy)); } else { combsort_impl(data); } } // the following is C-like code to minimize experimental variables int myrandom() { enum uint IM = 139968; enum uint IA = 3877; enum uint IC = 29573; static uint last = 42; last = (last * IA + IC) % IM; return last; } debug { bool issorted(int[] items) { if (items.length < 2) return true; foreach (i, el; items[0 .. $-1]) if (el > items[i+1]) return false; return true; } } void main() { enum int N = 10_000_000; int* v = cast(int*)malloc(int.sizeof * N); int i; for (i = 0; i < N; i++) v[i] = myrandom(); debug { for (i = 0; i < N; i++) printf("%d ", v[i]); printf("\n"); } combsort(v[0 .. N]); debug { for (i = 0; i < N; i++) printf("%d ", v[i]); printf("\n"); } printf("%d\n", v[N-1]); debug { if (!issorted(v[0 .. N])) printf("NOT sorted\n"); } } ----------------------------- // C++ version #include "stdio.h" #include "stdlib.h" #include <algorithm> //#define DEBUG template<class ForwardIterator> void combsort(ForwardIterator first, ForwardIterator last) { static const double shrink_factor = 1.247330950103979; typedef typename std::iterator_traits<ForwardIterator>::difference_type difference_type; difference_type gap = std::distance(first, last); bool swaps = true; while (gap > 1 || swaps) { if (gap > 1) gap = static_cast<difference_type>(gap / shrink_factor); swaps = false; ForwardIterator itLeft(first); ForwardIterator itRight(first); std::advance(itRight, gap); for ( ; itRight != last; ++itLeft, ++itRight) if (*itRight < *itLeft){ std::iter_swap(itLeft, itRight); swaps = true; } } } static inline int myrandom() { unsigned long const IM = 139968; unsigned long const IA = 3877; unsigned long const IC = 29573; static unsigned long last = 42; last = (last * IA + IC) % IM; return last; } #ifdef DEBUG int issorted(int items[], int len) { if (len < 2) return 1; int i; for (i = 0; i < (len-1); i++) if (items[i] > items[i+1]) return 0; return 1; } #endif #define N (10 * 1000 * 1000) int main() { int* v = (int*)malloc(sizeof(int) * N); int i; for (i = 0; i < N; i++) v[i] = myrandom(); #ifdef DEBUG for (i = 0; i < N; i++) printf("%d ", v[i]); printf("\n"); #endif combsort(&(v[0]), &(v[N])); #ifdef DEBUG for (i = 0; i < N; i++) printf("%d ", v[i]); printf("\n"); #endif printf("%d\n", v[N-1]); #ifdef DEBUG if (!issorted(v, N)) printf("NOT sorted\n"); #endif return 0; } ----------------------------- Bye, bearophile
Jun 20 2010
bearophile wrote: ...a way to use them more efficiently, I have just started using D Ranges. In this program the left and right "cursors" keep their relative distance, and in practice the C++ program shows that two "single" iterators (instead of two pairs) can be enough for this program. If you know how to improve the rangesI would be interested in this too, thanks for posting this. If I understood the algorithm right, the basic idea is a sliding window over the underlying container. Can this be expressed by a single range that does not offer random access at all? Probably not since a range can only shrink. Am I right in thinking this algorithm basically requires random access? Both the C++ version and the D version will work for ranges that don't do that, but the performance difference won't matter anymore because it is likely crap anyway. I am curious where exactly the performance difference comes from. A quick profile did not help much, everything was inlined! With this translation exclusively for random access ranges performance is equal to your first D version (that uses arrays directly): void combsort(alias less="a < b", Range)(Range data) if (isRandomAccessRange!Range && hasSwappableElements!Range) { alias binaryFun!less cmp; enum double SHRINK_FACTOR = 1.247330950103979; int gap = data.length; bool swaps = true; while (gap > 1 || swaps) { if (gap > 1) gap /= SHRINK_FACTOR; swaps = false; foreach (i; 0 .. data.length - gap) { if (cmp(data[i + gap], data[i])) { swap(data[i], data[i + gap]); swaps = true; } } } }
Jun 20 2010
Lutger:Am I right in thinking this algorithm basically requires random access? Both the C++ version and the D version will work for ranges that don't do that, but the performance difference won't matter anymore because it is likely crap anyway.I don't think it's crap with a linked list, it can be slower, but not so much because the inner loop of algorithm doesn't need random access, it just just advances two cursors by one step each. There is a bit of wasted time before the inner loop, to set the right cursor at its right place, but it's a small number of steps compared to the whole O(n^2) play. I have not tried timed it with linked lists because SList isn't working yet. Bye, bearophile
Jun 20 2010
import std.array: empty, popFront, popFrontN, front; import std.range: walkLength, isForwardRange, hasSwappableElements; import std.algorithm: swap, binaryFun; import std.c.stdlib: malloc; import std.c.stdio: printf; debug { import std.contracts: enforce; import std.algorithm: sort, equal; import std.array: array; } void combsort(alias less="a < b", Range)(Range data) if (isForwardRange!Range && hasSwappableElements!Range) { static void combsort_impl(Range data) { enum double SHRINK_FACTOR = 1.247330950103979; int gap = walkLength(data); bool swapped = true; while (gap > 1 || swapped) { if (gap > 1) gap /= SHRINK_FACTOR; swapped = false; auto right = data; popFrontN(right, gap); for (auto left = data; !right.empty; left.popFront, right.popFront) { if (binaryFun!less(right.front, left.front)) { swap(left.front, right.front); swapped = true; } } } }I get about 30% improvement if the inner loop is rewritten to: foreach (i; 0 .. total - gap) // total was already computed by walkLength { left.popFront; right.popFront; if (binaryFun!less(right.front, left.front)) { swap(left.front, right.front); swapped = true; } } Quite surprising. i don't understand asm well enough to analyse this further.
Jun 20 2010
Lutger:I get about 30% improvement if the inner loop is rewritten to:Can you please show me the whole program? Don't show inner loops, everybody on the D newsgroups: let's take the habit of posting runnable code, as done on the Python newsgroups, and not broken cuts. Thank you. Bye, bearophile
Jun 20 2010
Lutger:I get about 30% improvement if the inner loop is rewritten to: foreach (i; 0 .. total - gap) // total was already computed by walkLength { left.popFront; right.popFront; if (binaryFun!less(right.front, left.front)) { swap(left.front, right.front); swapped = true; } }This is a version that works (note the popFront at the end of the loop): import std.array: empty, popFront, popFrontN, front; import std.range: walkLength, isForwardRange, hasSwappableElements; import std.algorithm: swap, binaryFun; import std.c.stdlib: malloc; import std.c.stdio: printf; debug { import std.contracts: enforce; import std.algorithm: sort, equal; import std.array: array; } void combsort(alias less="a < b", Range)(Range data) if (isForwardRange!Range && hasSwappableElements!Range) { static void combsort_impl(Range data) { enum double SHRINK_FACTOR = 1.247330950103979; size_t total = walkLength(data); size_t gap = total; bool swapped = true; while (gap > 1 || swapped) { if (gap > 1) gap /= SHRINK_FACTOR; swapped = false; auto left = data; auto right = data; popFrontN(right, gap); foreach (_; 0 .. total - gap) { if (binaryFun!less(right.front, left.front)) { swap(left.front, right.front); swapped = true; } left.popFront; right.popFront; } } } // postcondition verified in debug mode only. // This is a workaround, D postconditions don't // support the "old" (original input data view) yet. debug { auto data_copy = array(data); sort!(less)(data_copy); combsort_impl(data); enforce(equal(data, data_copy)); } else { combsort_impl(data); } } int myrandom() { enum uint IM = 139968; enum uint IA = 3877; enum uint IC = 29573; static uint last = 42; last = (last * IA + IC) % IM; return last; } debug { bool issorted(int[] items) { if (items.length < 2) return true; foreach (i, el; items[0 .. $-1]) if (el > items[i+1]) return false; return true; } } void main() { enum int N = 10_000_000; int* v = cast(int*)malloc(int.sizeof * N); int i; for (i = 0; i < N; i++) v[i] = myrandom(); debug { for (i = 0; i < N; i++) printf("%d ", v[i]); printf("\n"); } combsort(v[0 .. N]); debug { for (i = 0; i < N; i++) printf("%d ", v[i]); printf("\n"); } printf("%d\n", v[N-1]); debug { if (!issorted(v[0 .. N])) printf("NOT sorted\n"); } } In all the programs I now use a size_t to keep the result of walkLenth and gap. My timings don't show a 30% improvement: Timings, seconds, best of 3: D3: 5.94 D4: 5.86 D2: 3.13 C++: 3.05 D1: 2.90 C: 2.72 Bye, bearophile
Jun 20 2010