digitalmars.D.ldc - D-specific optimisation: opCmp
- John Colvin (158/158) Jan 13 2016 opCmp is a pretty roundabout way of dealing with comparisons from
opCmp is a pretty roundabout way of dealing with comparisons from a computational point of view and optimisers seem quite bad at dealing with it. For example, based on https://github.com/D-Programming-Language/phobos/pull/3927 : % cat comparisons.d float opCmp(float a, float b) { return a < b ? -1 : a > b ? +1 : a == b ? 0 : float.nan; } int gt(float a, float b) { return opCmp(a, b) > 0; } int gte(float a, float b) { return opCmp(a, b) >= 0; } int lt(float a, float b) { return opCmp(a, b) < 0; } int lte(float a, float b) { return opCmp(a, b) <= 0; } int gt_direct(float a, float b) { return a > b; } int gte_direct(float a, float b) { return a >= b; } int lt_direct(float a, float b) { return a < b; } int lte_direct(float a, float b) { return a <= b; } % ldmd2 -O -inline -release -output-s comparisons.d % cat comparisons.s .section __TEXT,__text,regular,pure_instructions .section __TEXT,__literal4,4byte_literals .align 2 LCPI0_0: .long 3212836864 LCPI0_1: .long 1065353216 LCPI0_2: .long 2143289344 .section __TEXT,__text,regular,pure_instructions .globl __D11comparisons5opCmpFffZf .align 4, 0x90 __D11comparisons5opCmpFffZf: .cfi_startproc ucomiss %xmm1, %xmm0 jbe LBB0_2 movss LCPI0_0(%rip), %xmm1 movaps %xmm1, %xmm0 retq LBB0_2: ucomiss %xmm0, %xmm1 jbe LBB0_5 movss LCPI0_1(%rip), %xmm1 movaps %xmm1, %xmm0 retq LBB0_5: cmpeqss %xmm0, %xmm1 movss LCPI0_2(%rip), %xmm0 andnps %xmm0, %xmm1 movaps %xmm1, %xmm0 retq .cfi_endproc .globl __D11comparisons2gtFffZi .align 4, 0x90 __D11comparisons2gtFffZi: .cfi_startproc xorl %eax, %eax ucomiss %xmm1, %xmm0 ja LBB1_3 movl $1, %eax ucomiss %xmm0, %xmm1 ja LBB1_3 xorl %eax, %eax LBB1_3: retq .cfi_endproc .globl __D11comparisons3gteFffZi .align 4, 0x90 __D11comparisons3gteFffZi: .cfi_startproc ucomiss %xmm1, %xmm0 jbe LBB2_2 xorl %eax, %eax movzbl %al, %eax retq LBB2_2: ucomiss %xmm0, %xmm1 setae %al movzbl %al, %eax retq .cfi_endproc .globl __D11comparisons2ltFffZi .align 4, 0x90 __D11comparisons2ltFffZi: .cfi_startproc ucomiss %xmm1, %xmm0 seta %al movzbl %al, %eax retq .cfi_endproc .globl __D11comparisons3lteFffZi .align 4, 0x90 __D11comparisons3lteFffZi: .cfi_startproc movb $1, %al ucomiss %xmm1, %xmm0 ja LBB4_2 cmpeqss %xmm0, %xmm1 movd %xmm1, %eax andl $1, %eax LBB4_2: movzbl %al, %eax retq .cfi_endproc .globl __D11comparisons9gt_directFffZi .align 4, 0x90 __D11comparisons9gt_directFffZi: .cfi_startproc ucomiss %xmm0, %xmm1 seta %al movzbl %al, %eax retq .cfi_endproc .globl __D11comparisons10gte_directFffZi .align 4, 0x90 __D11comparisons10gte_directFffZi: .cfi_startproc ucomiss %xmm0, %xmm1 setae %al movzbl %al, %eax retq .cfi_endproc .globl __D11comparisons9lt_directFffZi .align 4, 0x90 __D11comparisons9lt_directFffZi: .cfi_startproc ucomiss %xmm1, %xmm0 seta %al movzbl %al, %eax retq .cfi_endproc .globl __D11comparisons10lte_directFffZi .align 4, 0x90 __D11comparisons10lte_directFffZi: .cfi_startproc ucomiss %xmm1, %xmm0 setae %al movzbl %al, %eax retq .cfi_endproc See how much better the code-gen is for the direct implementations? It would be great if LDC was somehow able to get this right.
Jan 13 2016