D - A third example use of C macros that has not been discussed
- Paul Sheer (75/75) Feb 01 2003 There are many situations where you want to reimplement
- Andy Friesen (41/126) Feb 01 2003 Maybe I'm missing something, but it seems to me that you could do that
- Mike Wynn (12/87) Feb 01 2003 IMHO:
There are many situations where you want to reimplement a piece of code several times, with a different type for each. This can happen for optimization, or for marshalling functions. This is the strongest reason for support of a macro preprocessor. This example shows an optimized memcpy function that copies in words at a time, properly accounting for possible alignment differences on processors that do not support non-aligned word stores. It is eligantly done with macros of course. -paul -------- /* this must be set to the most efficient copying type - usually unsigned long: */ typedef unsigned long cpy_t; #define word_copy(t,d,s,count) \ do { \ unsigned int c; \ char *d8 = (char *) (d); \ char *s8 = (char *) (s); \ register t a0, a1, *dst, *src; \ c = (unsigned long) (d) & (sizeof (t) - 1); \ while (count && (c & (sizeof (t) - 1))) \ (*d8++ = *s8++), count--, c++; \ dst = (t *) d8; \ src = (t *) s8; \ while (count >= (sizeof (t)) * 2) { \ a0 = src[0]; \ a1 = src[1]; \ count -= (sizeof (t)) * 2; \ dst[0] = a0; \ dst[1] = a1; \ src += 2; \ dst += 2; \ } \ while (count >= (sizeof (t))) { \ *dst++ = *src++; \ count -= sizeof (t); \ } \ d8 = (char *) dst; \ s8 = (char *) src; \ while (count--) \ *d8++ = *s8++; \ } while (0) #define byte_copy(t,d,s,count) \ do { \ char *d8 = (char *) (d); \ char *s8 = (char *) (s); \ while (count--) \ *d8++ = *s8++; \ } while (0) void *memcpy (void *_dest, const void *_src, size_t count) { unsigned int f; /* check alignment */ f = sizeof (cpy_t); while ((((unsigned long) _src) & (f - 1)) != (((unsigned long) _dest & (f - 1)))) f >>= 1; switch (f) { case 8: word_copy (u_int64_t, _dest, _src, count); break; case 4: word_copy (u_int32_t, _dest, _src, count); break; case 2: word_copy (u_int16_t, _dest, _src, count); break; case 1: byte_copy (u_int8_t, _dest, _src, count); break; } return (void *) _dest; }
Feb 01 2003
Maybe I'm missing something, but it seems to me that you could do that
with templates pretty easily. In so doing, you get a bit of typesafety,
and you avoid arguments from being evaluated more than once.
template CopyLoop(T)
{
void word_copy(T* dest, T* src, int count)
{
do
{
uint c;
char *d8 = (char *) (dest);
char *s8 = (char *) (src);
register t a0, a1, *dst, *src;
c = (ulong) (dest) & (sizeof (t) - 1);
while (count && (c & (sizeof (t) - 1)))
(*d8++ = *s8++), count--, c++;
dst = (t *) d8;
src = (t *) s8;
while (count >= (sizeof (T)) * 2)
{
a0 = src[0];
a1 = src[1];
count -= (sizeof (T)) * 2;
dst[0] = a0;
dst[1] = a1;
src += 2;
dst += 2;
}
while (count >= (sizeof (T)))
{
*dst++ = *src++;
count -= sizeof (T);
}
d8 = (char *) dst;
s8 = (char *) src;
while (count--)
*d8++ = *s8++;
} while (0);
}
}
Paul Sheer wrote:
There are many situations where you want to reimplement
a piece of code several times, with a different type
for each. This can happen for optimization, or for
marshalling functions. This is the strongest reason
for support of a macro preprocessor.
This example shows an optimized memcpy function that
copies in words at a time, properly accounting for
possible alignment differences on processors that
do not support non-aligned word stores.
It is eligantly done with macros of course.
-paul
--------
/* this must be set to the most efficient copying type - usually
unsigned long: */
typedef unsigned long cpy_t;
#define word_copy(t,d,s,count) \
do { \
unsigned int c; \
char *d8 = (char *) (d); \
char *s8 = (char *) (s); \
register t a0, a1, *dst, *src; \
c = (unsigned long) (d) & (sizeof (t) - 1); \
while (count && (c & (sizeof (t) - 1))) \
(*d8++ = *s8++), count--, c++; \
dst = (t *) d8; \
src = (t *) s8; \
while (count >= (sizeof (t)) * 2) { \
a0 = src[0]; \
a1 = src[1]; \
count -= (sizeof (t)) * 2; \
dst[0] = a0; \
dst[1] = a1; \
src += 2; \
dst += 2; \
} \
while (count >= (sizeof (t))) { \
*dst++ = *src++; \
count -= sizeof (t); \
} \
d8 = (char *) dst; \
s8 = (char *) src; \
while (count--) \
*d8++ = *s8++; \
} while (0)
#define byte_copy(t,d,s,count) \
do { \
char *d8 = (char *) (d); \
char *s8 = (char *) (s); \
while (count--) \
*d8++ = *s8++; \
} while (0)
void *memcpy (void *_dest, const void *_src, size_t count)
{
unsigned int f;
/* check alignment */
f = sizeof (cpy_t);
while ((((unsigned long) _src) & (f - 1)) !=
(((unsigned long) _dest & (f - 1))))
f >>= 1;
switch (f) {
case 8:
word_copy (u_int64_t, _dest, _src, count);
break;
case 4:
word_copy (u_int32_t, _dest, _src, count);
break;
case 2:
word_copy (u_int16_t, _dest, _src, count);
break;
case 1:
byte_copy (u_int8_t, _dest, _src, count);
break;
}
return (void *) _dest;
}
Feb 01 2003
IMHO: the compiler should generate an optimised memcpy from the src memcpy( foo, bar, len ); (inlined if optimised for speed, either way the fastest for the platform and the cpu's supported instruction set). and you've not put a duff's device in there, (they're legal in D) tight loops kill performance. and on some architectures unaligned int reads are allowed and less expencive than 4 byte reads. I'm sure less expensive than 4 byte reads and 4 branches. Mike. "Paul Sheer" <psheer icon.co.za> wrote in message news:b1gs7d$2i9c$1 digitaldaemon.com...There are many situations where you want to reimplement a piece of code several times, with a different type for each. This can happen for optimization, or for marshalling functions. This is the strongest reason for support of a macro preprocessor. This example shows an optimized memcpy function that copies in words at a time, properly accounting for possible alignment differences on processors that do not support non-aligned word stores. It is eligantly done with macros of course. -paul -------- /* this must be set to the most efficient copying type - usually unsigned long: */ typedef unsigned long cpy_t; #define word_copy(t,d,s,count) \ do { \ unsigned int c; \ char *d8 = (char *) (d); \ char *s8 = (char *) (s); \ register t a0, a1, *dst, *src; \ c = (unsigned long) (d) & (sizeof (t) - 1); \ while (count && (c & (sizeof (t) - 1))) \ (*d8++ = *s8++), count--, c++; \ dst = (t *) d8; \ src = (t *) s8; \ while (count >= (sizeof (t)) * 2) { \ a0 = src[0]; \ a1 = src[1]; \ count -= (sizeof (t)) * 2; \ dst[0] = a0; \ dst[1] = a1; \ src += 2; \ dst += 2; \ } \ while (count >= (sizeof (t))) { \ *dst++ = *src++; \ count -= sizeof (t); \ } \ d8 = (char *) dst; \ s8 = (char *) src; \ while (count--) \ *d8++ = *s8++; \ } while (0) #define byte_copy(t,d,s,count) \ do { \ char *d8 = (char *) (d); \ char *s8 = (char *) (s); \ while (count--) \ *d8++ = *s8++; \ } while (0) void *memcpy (void *_dest, const void *_src, size_t count) { unsigned int f; /* check alignment */ f = sizeof (cpy_t); while ((((unsigned long) _src) & (f - 1)) != (((unsigned long) _dest & (f - 1)))) f >>= 1; switch (f) { case 8: word_copy (u_int64_t, _dest, _src, count); break; case 4: word_copy (u_int32_t, _dest, _src, count); break; case 2: word_copy (u_int16_t, _dest, _src, count); break; case 1: byte_copy (u_int8_t, _dest, _src, count); break; } return (void *) _dest; }
Feb 01 2003









Andy Friesen <andy ikagames.com> 