[llvm-dev] Where's the optimiser gone (part 9): bit twiddling not recognised

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
2 messages Options
Reply | Threaded
Open this post in threaded view
|

[llvm-dev] Where's the optimiser gone (part 9): bit twiddling not recognised

Shawn Webb via llvm-dev
Hi @ll,

compile with -O3 -m32 -march=i686 (see <https://godbolt.org/z/fjjUl6>)

unsigned long lreverse(unsigned long x)    // swap all bits
{
    x = ((x &  0xAAAAAAAAUL) >> 1)
      | ((x & ~0xAAAAAAAAUL) << 1);
    x = ((x &  0xCCCCCCCCUL) >> 2)
      | ((x & ~0xCCCCCCCCUL) << 2);
    x = ((x &  0xF0F0F0F0UL) >> 4)
      | ((x & ~0xF0F0F0F0UL) << 4);
    x = ((x &  0xFF00FF00UL) >> 8)
      | ((x & ~0xFF00FF00UL) << 8);
    x = ((x &  0xFFFF0000UL) >> 16)
      | ((x & ~0xFFFF0000UL) << 16);

    return x;
}

lreverse: # @lreverse
    mov   eax, dword ptr [esp + 4]
    mov   ecx, eax
    shr   ecx, 1
    and   ecx, 1431655765
    and   eax, 1431655765
    lea   eax, [ecx + 2*eax]
    mov   ecx, eax
    shr   ecx, 2
    and   ecx, 858993459
    and   eax, 858993459
    lea   eax, [ecx + 4*eax]
    mov   ecx, eax
    shr   ecx, 4
    and   ecx, 252645135
    shl   eax, 4
    and   eax, -252645136
    or    eax, ecx
#ifndef _OPTIMISER_GONE_FISHING // until here, almost perfect;
    bswap eax                   //  the next 7 instructions but can
#else                           //   be replaced by a single BSWAP
    mov   ecx, eax
    shr   ecx, 8
    and   ecx, 16711935              
    shl   eax, 8
    and   eax, -16711936
    or    eax, ecx
    rol   eax, 16
#endif
    ret


unsigned long long llreverse(unsigned long long x)
{
#if 0
    x = ((x & 0xAAAAAAAAAAAAAAAAULL) >> 1)
      | ((x & ~0xAAAAAAAAAAAAAAAAULL) << 1);
    x = ((x & 0xCCCCCCCCCCCCCCCCULL) >> 2)
      | ((x & ~0xCCCCCCCCCCCCCCCCULL) << 2);
    x = ((x & 0xF0F0F0F0F0F0F0F0ULL) >> 4)
      | ((x & ~0xF0F0F0F0F0F0F0F0ULL) << 4);
    x = ((x & 0xFF00FF00FF00FF00ULL) >> 8)
      | ((x & ~0xFF00FF00FF00FF00ULL) << 8);
    x = ((x & 0xFFFF0000FFFF0000ULL) >> 16)
      | ((x & ~0xFFFF0000FFFF0000ULL) << 16);
    x = ((x & 0xFFFFFFFF00000000ULL) >> 32)
      | ((x & ~0xFFFFFFFF00000000ULL) << 32);

    return x;
#else
    x = ((x >> 1) & ~0xAAAAAAAAAAAAAAAAULL)
      | ((x << 1) &  0xAAAAAAAAAAAAAAAAULL);
    x = ((x >> 2) & ~0xCCCCCCCCCCCCCCCCULL)
      | ((x << 2) &  0xCCCCCCCCCCCCCCCCULL);
    x = ((x >> 4) & ~0xF0F0F0F0F0F0F0F0ULL)
      | ((x << 4) &  0xF0F0F0F0F0F0F0F0ULL);
    x = ((x >> 8) & ~0xFF00FF00FF00FF00ULL)
      | ((x << 8) &  0xFF00FF00FF00FF00ULL);
    x = ((x >> 16) & ~0xFFFF0000FFFF0000ULL)
      | ((x << 16) &  0xFFFF0000FFFF0000ULL);

    return (x >> 32) | (x << 32);
#endif
}

llreverse: # @llreverse
    push  esi
    mov   eax, dword ptr [esp + 8]
    mov   ecx, dword ptr [esp + 12]
    mov   edx, eax
    shr   edx
    mov   esi, ecx
    shr   esi

#ifdef OPTIMISER_GONE_FISHING
    and   esi, 1431655765
    and   edx, 1431655765
    and   ecx, 1431655765
    and   eax, 1431655765
#else
    push  ebx
    mov   ebx, 1431655765
    and   esi, ebx
    and   edx, ebx
    and   ecx, ebx
    and   eax, ebx
#endif
    lea   edx, [edx + 2*eax]
    lea   eax, [esi + 2*ecx]
    mov   ecx, eax
    shr   ecx, 2
    mov   esi, edx
    shr   esi, 2
#ifdef OPTIMISER_GONE_FISHING
    and   esi, 858993459
    and   ecx, 858993459
    and   edx, 858993459
    and   eax, 858993459

#else
    mov   ebx, 858993459
    and   esi, ebx
    and   ecx, ebx
    and   edx, ebx
    and   eax, ebx
#endif
    lea   eax, [ecx + 4*eax]
    lea   edx, [esi + 4*edx]
    mov   ecx, edx
    shr   ecx, 4
    mov   esi, eax
    shr   esi, 4

#ifdef OPTIMISER_GONE_FISHING
    and   esi, 252645135
    and   ecx, 252645135

#else
    mov   ebx, 252645135
    and   esi, ebx
    and   ecx, ebx
#endif
    shl   edx, 4
    shl   eax, 4

#ifdef OPTIMISER_GONE_FISHING
    and   eax, -252645136
#else
    not   ebx
    and   eax, ebx
#endif
    or    eax, esi

#ifdef OPTIMISER_GONE_FISHING
    and   edx, -252645136
#else
    and   edx, ebx
    pop   ebx
#endif
    or    edx, ecx
#ifndef OPTIMISER_GONE_FISHING // until here, quite good;
    bswap eax                  //  the next 14 instructions but can
    bswap edx                  //   be replaced by a two BSWAPs
#else
    mov   ecx, eax
    shr   ecx, 8
    mov   esi, edx
    shr   esi, 8
    and   esi, 16711935
    and   ecx, 16711935
    shl   eax, 8
    shl   edx, 8
    and   edx, -16711936
    or    edx, esi
    and   eax, -16711936
    or    eax, ecx
    rol   edx, 16
    rol   eax, 16
#endif
    pop   esi
    ret
_______________________________________________
LLVM Developers mailing list
[hidden email]
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
Reply | Threaded
Open this post in threaded view
|

Re: [llvm-dev] Where's the optimiser gone (part 9): bit twiddling not recognised

Shawn Webb via llvm-dev
I've filed a bugzilla for this https://bugs.llvm.org/show_bug.cgi?id=40058
~Craig


On Mon, Dec 17, 2018 at 3:48 PM Stefan Kanthak via llvm-dev <[hidden email]> wrote:
Hi @ll,

compile with -O3 -m32 -march=i686 (see <https://godbolt.org/z/fjjUl6>)

unsigned long lreverse(unsigned long x)    // swap all bits
{
    x = ((x &  0xAAAAAAAAUL) >> 1)
      | ((x & ~0xAAAAAAAAUL) << 1);
    x = ((x &  0xCCCCCCCCUL) >> 2)
      | ((x & ~0xCCCCCCCCUL) << 2);
    x = ((x &  0xF0F0F0F0UL) >> 4)
      | ((x & ~0xF0F0F0F0UL) << 4);
    x = ((x &  0xFF00FF00UL) >> 8)
      | ((x & ~0xFF00FF00UL) << 8);
    x = ((x &  0xFFFF0000UL) >> 16)
      | ((x & ~0xFFFF0000UL) << 16);

    return x;
}

lreverse: # @lreverse
    mov   eax, dword ptr [esp + 4]
    mov   ecx, eax
    shr   ecx, 1
    and   ecx, 1431655765
    and   eax, 1431655765
    lea   eax, [ecx + 2*eax]
    mov   ecx, eax
    shr   ecx, 2
    and   ecx, 858993459
    and   eax, 858993459
    lea   eax, [ecx + 4*eax]
    mov   ecx, eax
    shr   ecx, 4
    and   ecx, 252645135
    shl   eax, 4
    and   eax, -252645136
    or    eax, ecx
#ifndef _OPTIMISER_GONE_FISHING // until here, almost perfect;
    bswap eax                   //  the next 7 instructions but can
#else                           //   be replaced by a single BSWAP
    mov   ecx, eax
    shr   ecx, 8
    and   ecx, 16711935               
    shl   eax, 8
    and   eax, -16711936
    or    eax, ecx
    rol   eax, 16
#endif
    ret


unsigned long long llreverse(unsigned long long x)
{
#if 0
    x = ((x & 0xAAAAAAAAAAAAAAAAULL) >> 1)
      | ((x & ~0xAAAAAAAAAAAAAAAAULL) << 1);
    x = ((x & 0xCCCCCCCCCCCCCCCCULL) >> 2)
      | ((x & ~0xCCCCCCCCCCCCCCCCULL) << 2);
    x = ((x & 0xF0F0F0F0F0F0F0F0ULL) >> 4)
      | ((x & ~0xF0F0F0F0F0F0F0F0ULL) << 4);
    x = ((x & 0xFF00FF00FF00FF00ULL) >> 8)
      | ((x & ~0xFF00FF00FF00FF00ULL) << 8);
    x = ((x & 0xFFFF0000FFFF0000ULL) >> 16)
      | ((x & ~0xFFFF0000FFFF0000ULL) << 16);
    x = ((x & 0xFFFFFFFF00000000ULL) >> 32)
      | ((x & ~0xFFFFFFFF00000000ULL) << 32);

    return x;
#else
    x = ((x >> 1) & ~0xAAAAAAAAAAAAAAAAULL)
      | ((x << 1) &  0xAAAAAAAAAAAAAAAAULL);
    x = ((x >> 2) & ~0xCCCCCCCCCCCCCCCCULL)
      | ((x << 2) &  0xCCCCCCCCCCCCCCCCULL);
    x = ((x >> 4) & ~0xF0F0F0F0F0F0F0F0ULL)
      | ((x << 4) &  0xF0F0F0F0F0F0F0F0ULL);
    x = ((x >> 8) & ~0xFF00FF00FF00FF00ULL)
      | ((x << 8) &  0xFF00FF00FF00FF00ULL);
    x = ((x >> 16) & ~0xFFFF0000FFFF0000ULL)
      | ((x << 16) &  0xFFFF0000FFFF0000ULL);

    return (x >> 32) | (x << 32);
#endif
}

llreverse: # @llreverse
    push  esi
    mov   eax, dword ptr [esp + 8]
    mov   ecx, dword ptr [esp + 12]
    mov   edx, eax
    shr   edx
    mov   esi, ecx
    shr   esi

#ifdef OPTIMISER_GONE_FISHING
    and   esi, 1431655765
    and   edx, 1431655765
    and   ecx, 1431655765
    and   eax, 1431655765
#else
    push  ebx
    mov   ebx, 1431655765
    and   esi, ebx
    and   edx, ebx
    and   ecx, ebx
    and   eax, ebx
#endif
    lea   edx, [edx + 2*eax]
    lea   eax, [esi + 2*ecx]
    mov   ecx, eax
    shr   ecx, 2
    mov   esi, edx
    shr   esi, 2
#ifdef OPTIMISER_GONE_FISHING
    and   esi, 858993459
    and   ecx, 858993459
    and   edx, 858993459
    and   eax, 858993459

#else
    mov   ebx, 858993459
    and   esi, ebx
    and   ecx, ebx
    and   edx, ebx
    and   eax, ebx
#endif
    lea   eax, [ecx + 4*eax]
    lea   edx, [esi + 4*edx]
    mov   ecx, edx
    shr   ecx, 4
    mov   esi, eax
    shr   esi, 4

#ifdef OPTIMISER_GONE_FISHING
    and   esi, 252645135
    and   ecx, 252645135

#else
    mov   ebx, 252645135
    and   esi, ebx
    and   ecx, ebx
#endif
    shl   edx, 4
    shl   eax, 4

#ifdef OPTIMISER_GONE_FISHING
    and   eax, -252645136
#else
    not   ebx
    and   eax, ebx
#endif
    or    eax, esi

#ifdef OPTIMISER_GONE_FISHING
    and   edx, -252645136
#else
    and   edx, ebx
    pop   ebx
#endif
    or    edx, ecx
#ifndef OPTIMISER_GONE_FISHING // until here, quite good;
    bswap eax                  //  the next 14 instructions but can
    bswap edx                  //   be replaced by a two BSWAPs
#else
    mov   ecx, eax
    shr   ecx, 8
    mov   esi, edx
    shr   esi, 8
    and   esi, 16711935
    and   ecx, 16711935
    shl   eax, 8
    shl   edx, 8
    and   edx, -16711936
    or    edx, esi
    and   eax, -16711936
    or    eax, ecx
    rol   edx, 16
    rol   eax, 16
#endif
    pop   esi
    ret
_______________________________________________
LLVM Developers mailing list
[hidden email]
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev

_______________________________________________
LLVM Developers mailing list
[hidden email]
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev