D - A third example use of C macros that has not been discussed
- Paul Sheer (75/75) Feb 01 2003 There are many situations where you want to reimplement
- Andy Friesen (41/126) Feb 01 2003 Maybe I'm missing something, but it seems to me that you could do that
- Mike Wynn (12/87) Feb 01 2003 IMHO:
There are many situations where you want to reimplement a piece of code several times, with a different type for each. This can happen for optimization, or for marshalling functions. This is the strongest reason for support of a macro preprocessor. This example shows an optimized memcpy function that copies in words at a time, properly accounting for possible alignment differences on processors that do not support non-aligned word stores. It is eligantly done with macros of course. -paul -------- /* this must be set to the most efficient copying type - usually unsigned long: */ typedef unsigned long cpy_t; #define word_copy(t,d,s,count) \ do { \ unsigned int c; \ char *d8 = (char *) (d); \ char *s8 = (char *) (s); \ register t a0, a1, *dst, *src; \ c = (unsigned long) (d) & (sizeof (t) - 1); \ while (count && (c & (sizeof (t) - 1))) \ (*d8++ = *s8++), count--, c++; \ dst = (t *) d8; \ src = (t *) s8; \ while (count >= (sizeof (t)) * 2) { \ a0 = src[0]; \ a1 = src[1]; \ count -= (sizeof (t)) * 2; \ dst[0] = a0; \ dst[1] = a1; \ src += 2; \ dst += 2; \ } \ while (count >= (sizeof (t))) { \ *dst++ = *src++; \ count -= sizeof (t); \ } \ d8 = (char *) dst; \ s8 = (char *) src; \ while (count--) \ *d8++ = *s8++; \ } while (0) #define byte_copy(t,d,s,count) \ do { \ char *d8 = (char *) (d); \ char *s8 = (char *) (s); \ while (count--) \ *d8++ = *s8++; \ } while (0) void *memcpy (void *_dest, const void *_src, size_t count) { unsigned int f; /* check alignment */ f = sizeof (cpy_t); while ((((unsigned long) _src) & (f - 1)) != (((unsigned long) _dest & (f - 1)))) f >>= 1; switch (f) { case 8: word_copy (u_int64_t, _dest, _src, count); break; case 4: word_copy (u_int32_t, _dest, _src, count); break; case 2: word_copy (u_int16_t, _dest, _src, count); break; case 1: byte_copy (u_int8_t, _dest, _src, count); break; } return (void *) _dest; }
Feb 01 2003
Maybe I'm missing something, but it seems to me that you could do that with templates pretty easily. In so doing, you get a bit of typesafety, and you avoid arguments from being evaluated more than once. template CopyLoop(T) { void word_copy(T* dest, T* src, int count) { do { uint c; char *d8 = (char *) (dest); char *s8 = (char *) (src); register t a0, a1, *dst, *src; c = (ulong) (dest) & (sizeof (t) - 1); while (count && (c & (sizeof (t) - 1))) (*d8++ = *s8++), count--, c++; dst = (t *) d8; src = (t *) s8; while (count >= (sizeof (T)) * 2) { a0 = src[0]; a1 = src[1]; count -= (sizeof (T)) * 2; dst[0] = a0; dst[1] = a1; src += 2; dst += 2; } while (count >= (sizeof (T))) { *dst++ = *src++; count -= sizeof (T); } d8 = (char *) dst; s8 = (char *) src; while (count--) *d8++ = *s8++; } while (0); } } Paul Sheer wrote:There are many situations where you want to reimplement a piece of code several times, with a different type for each. This can happen for optimization, or for marshalling functions. This is the strongest reason for support of a macro preprocessor. This example shows an optimized memcpy function that copies in words at a time, properly accounting for possible alignment differences on processors that do not support non-aligned word stores. It is eligantly done with macros of course. -paul -------- /* this must be set to the most efficient copying type - usually unsigned long: */ typedef unsigned long cpy_t; #define word_copy(t,d,s,count) \ do { \ unsigned int c; \ char *d8 = (char *) (d); \ char *s8 = (char *) (s); \ register t a0, a1, *dst, *src; \ c = (unsigned long) (d) & (sizeof (t) - 1); \ while (count && (c & (sizeof (t) - 1))) \ (*d8++ = *s8++), count--, c++; \ dst = (t *) d8; \ src = (t *) s8; \ while (count >= (sizeof (t)) * 2) { \ a0 = src[0]; \ a1 = src[1]; \ count -= (sizeof (t)) * 2; \ dst[0] = a0; \ dst[1] = a1; \ src += 2; \ dst += 2; \ } \ while (count >= (sizeof (t))) { \ *dst++ = *src++; \ count -= sizeof (t); \ } \ d8 = (char *) dst; \ s8 = (char *) src; \ while (count--) \ *d8++ = *s8++; \ } while (0) #define byte_copy(t,d,s,count) \ do { \ char *d8 = (char *) (d); \ char *s8 = (char *) (s); \ while (count--) \ *d8++ = *s8++; \ } while (0) void *memcpy (void *_dest, const void *_src, size_t count) { unsigned int f; /* check alignment */ f = sizeof (cpy_t); while ((((unsigned long) _src) & (f - 1)) != (((unsigned long) _dest & (f - 1)))) f >>= 1; switch (f) { case 8: word_copy (u_int64_t, _dest, _src, count); break; case 4: word_copy (u_int32_t, _dest, _src, count); break; case 2: word_copy (u_int16_t, _dest, _src, count); break; case 1: byte_copy (u_int8_t, _dest, _src, count); break; } return (void *) _dest; }
Feb 01 2003
IMHO: the compiler should generate an optimised memcpy from the src memcpy( foo, bar, len ); (inlined if optimised for speed, either way the fastest for the platform and the cpu's supported instruction set). and you've not put a duff's device in there, (they're legal in D) tight loops kill performance. and on some architectures unaligned int reads are allowed and less expencive than 4 byte reads. I'm sure less expensive than 4 byte reads and 4 branches. Mike. "Paul Sheer" <psheer icon.co.za> wrote in message news:b1gs7d$2i9c$1 digitaldaemon.com...There are many situations where you want to reimplement a piece of code several times, with a different type for each. This can happen for optimization, or for marshalling functions. This is the strongest reason for support of a macro preprocessor. This example shows an optimized memcpy function that copies in words at a time, properly accounting for possible alignment differences on processors that do not support non-aligned word stores. It is eligantly done with macros of course. -paul -------- /* this must be set to the most efficient copying type - usually unsigned long: */ typedef unsigned long cpy_t; #define word_copy(t,d,s,count) \ do { \ unsigned int c; \ char *d8 = (char *) (d); \ char *s8 = (char *) (s); \ register t a0, a1, *dst, *src; \ c = (unsigned long) (d) & (sizeof (t) - 1); \ while (count && (c & (sizeof (t) - 1))) \ (*d8++ = *s8++), count--, c++; \ dst = (t *) d8; \ src = (t *) s8; \ while (count >= (sizeof (t)) * 2) { \ a0 = src[0]; \ a1 = src[1]; \ count -= (sizeof (t)) * 2; \ dst[0] = a0; \ dst[1] = a1; \ src += 2; \ dst += 2; \ } \ while (count >= (sizeof (t))) { \ *dst++ = *src++; \ count -= sizeof (t); \ } \ d8 = (char *) dst; \ s8 = (char *) src; \ while (count--) \ *d8++ = *s8++; \ } while (0) #define byte_copy(t,d,s,count) \ do { \ char *d8 = (char *) (d); \ char *s8 = (char *) (s); \ while (count--) \ *d8++ = *s8++; \ } while (0) void *memcpy (void *_dest, const void *_src, size_t count) { unsigned int f; /* check alignment */ f = sizeof (cpy_t); while ((((unsigned long) _src) & (f - 1)) != (((unsigned long) _dest & (f - 1)))) f >>= 1; switch (f) { case 8: word_copy (u_int64_t, _dest, _src, count); break; case 4: word_copy (u_int32_t, _dest, _src, count); break; case 2: word_copy (u_int16_t, _dest, _src, count); break; case 1: byte_copy (u_int8_t, _dest, _src, count); break; } return (void *) _dest; }
Feb 01 2003