[llvm-dev] Where's the optimiser gone? (part 0)

Previous Topic Next Topic
 
classic Classic list List threaded Threaded
2 messages Options
Reply | Threaded
Open this post in threaded view
|

[llvm-dev] Where's the optimiser gone? (part 0)

Alberto Barbaro via llvm-dev
Hi @ll,

compiler-rt implements the Windows-specific routines
compiler-rt/lib/builtins/i386/chkstk.S and
compiler-rt/lib/builtins/x86_64/chkstk.S
See <http://msdn.microsoft.com/en-us/library/ms648426.aspx>

Their implementation is but LESS THAN optimal: they can
yield upto (stacksize / pagesize) superfluous page accesses
(and thus superfluous page faults)!

As implemented, ALL calls of chkstk() touch ALL pages from
the current "top" of stack to its new "top", which might
become the new stack "limit": on access of the "guard page"
Windows handles the stack growth.
Touching of pages already touched before, ie. above the
current "limit" of the stack, is but NOT necessary!

Properly optimised chkstk() implementations (for ML.EXE
and ML64.EXE respectively), which touch every page only
once, are shown below!

regards
Stefan Kanthak

See <https://godbolt.org/z/1jSn6->

--- sample0.c ---

void foo(int bar) {
    int array[234567];
    array[234566] = bar;
}


_foo: # @foo
    push  ebp
    mov   ebp, esp
    mov   eax, 938272
    call  __chkstk
    mov   eax, dword ptr [ebp + 8]
    mov   ecx, dword ptr [ebp + 8]
    mov   dword ptr [ebp - 4], ecx
    mov   dword ptr [ebp - 938272], eax # 4-byte Spill
    add   esp, 938272
    pop   ebp
    ret

int main(int argc) {
    foo (argc);
    foo (argc);
}

--- chkstk.asm (for I386) ---
; Copyright (C) 2004-2018, Stefan Kanthak <[hidden email]>

    .686
    .model flat, C
    .code

; MSVC internal intrinsic _alloca() alias _chkstk():
; argument is passed in eax, result is returned in esp
; https://msdn.microsoft.com/en-us/library/wb1s57t5.aspx
; https://msdn.microsoft.com/en-us/library/ms648426.aspx

_alloca_probe proc public  ; VOID *_alloca_probe(DWORD size)
_chkstk proc public        ; VOID _chkstk(DWORD size)

    push   ecx              ; decrement esp, save ecx

    lea    ecx, [esp+8]     ; ecx = stack pointer of caller
    sub    ecx, eax         ; ecx = new (unaligned) stack pointer

    ; Check for wraparound, yield 'stack overflow' exception
    sbb    eax, eax         ; eax = -1 on carry, else 0
    not    eax              ; eax = 0 if wraparound, else -1
    and    ecx, eax         ; ecx = 0 if wraparound, else unchanged

    assume fs:flat
    mov    eax, fs:[8]      ; eax = (current) stack limit

    cmp    eax, ecx
    jna    short DONE       ; stack limit not above new stack pointer?

    ; Probe next stack page, yield 'guard page' exception
PROBE:
    sub    eax, 4096        ; eax = next stack page
    test   eax, [eax]

    cmp    eax, ecx
    ja     short PROBE      ; stack limit above new stack pointer?

DONE:
    mov    eax, ecx         ; eax = new stack pointer
    pop    ecx              ; restore ecx
    xchg   eax, esp         ; esp = new stack pointer,
                            ; eax = old stack pointer

    push   [eax]
    ret


_chkstk endp
_alloca_probe endp

 end

--- chkstk.asm (for AMD64) ---
; Copyright (C) 2004-2018, Stefan Kanthak <[hidden email]>

    .code

; MSVC internal intrinsic _alloca() alias _chkstk():
; argument is passed in rax
; https://msdn.microsoft.com/en-us/library/wb1s57t5.aspx
; https://msdn.microsoft.com/en-us/library/ms648426.aspx
; https://msdn.microsoft.com/en-us/library/tawsa7cb.aspx

__chkstk proc public         ; VOID _chkstk(QWORD size)

    xor    r10, r10          ; r10 = 0
    lea    r11, [rsp+8]      ; r11 = stack pointer of caller
    sub    r11, rax          ; r11 = new stack pointer
    cmovb  r11, r10          ; r11 = r10 = 0 if wraparound, else unchanged

;;  and    r11, -16          ; r11 = new (aligned) stack pointer

    mov    r10, gs:[r10+16]  ; r10 = (current) stack limit
    cmp    r10, r11
    jna    RETURN            ; stack limit not above new stack pointer?

    ; Probe next stack page, yield 'guard page' exception
PROBE:
    sub    r10, 4096         ; r10 = next stack page
    test   r10, [r10]

    cmp    r10, r11
    ja     PROBE             ; stack limit above new stack pointer?

RETURN:
    ret

__chkstk endp

    end
--- EOF ---
_______________________________________________
LLVM Developers mailing list
[hidden email]
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
Reply | Threaded
Open this post in threaded view
|

Re: [llvm-dev] Where's the optimiser gone? (part 0)

Alberto Barbaro via llvm-dev
On Wed, Nov 28, 2018 at 4:24 PM Stefan Kanthak via llvm-dev
<[hidden email]> wrote:
>
> Hi @ll,
I'm not sure this continuous stream of emails is the most productive form.
I would think these all should be either bugs on https://bugs.llvm.org,
or patches on http://reviews.llvm.org
And in any case, maybe they should be worded slightly differently..

> compiler-rt implements the Windows-specific routines
> compiler-rt/lib/builtins/i386/chkstk.S and
> compiler-rt/lib/builtins/x86_64/chkstk.S
> See <http://msdn.microsoft.com/en-us/library/ms648426.aspx>
>
> Their implementation is but LESS THAN optimal: they can
> yield upto (stacksize / pagesize) superfluous page accesses
> (and thus superfluous page faults)!
>
> As implemented, ALL calls of chkstk() touch ALL pages from
> the current "top" of stack to its new "top", which might
> become the new stack "limit": on access of the "guard page"
> Windows handles the stack growth.
> Touching of pages already touched before, ie. above the
> current "limit" of the stack, is but NOT necessary!
>
> Properly optimised chkstk() implementations (for ML.EXE
> and ML64.EXE respectively), which touch every page only
> once, are shown below!
>
> regards
> Stefan Kanthak
Roman.

> See <https://godbolt.org/z/1jSn6->
>
> --- sample0.c ---
>
> void foo(int bar) {
>     int array[234567];
>     array[234566] = bar;
> }
>
>
> _foo: # @foo
>     push  ebp
>     mov   ebp, esp
>     mov   eax, 938272
>     call  __chkstk
>     mov   eax, dword ptr [ebp + 8]
>     mov   ecx, dword ptr [ebp + 8]
>     mov   dword ptr [ebp - 4], ecx
>     mov   dword ptr [ebp - 938272], eax # 4-byte Spill
>     add   esp, 938272
>     pop   ebp
>     ret
>
> int main(int argc) {
>     foo (argc);
>     foo (argc);
> }
>
> --- chkstk.asm (for I386) ---
> ; Copyright (C) 2004-2018, Stefan Kanthak <[hidden email]>
>
>     .686
>     .model flat, C
>     .code
>
> ; MSVC internal intrinsic _alloca() alias _chkstk():
> ; argument is passed in eax, result is returned in esp
> ; https://msdn.microsoft.com/en-us/library/wb1s57t5.aspx
> ; https://msdn.microsoft.com/en-us/library/ms648426.aspx
>
> _alloca_probe proc public  ; VOID *_alloca_probe(DWORD size)
> _chkstk proc public        ; VOID _chkstk(DWORD size)
>
>     push   ecx              ; decrement esp, save ecx
>
>     lea    ecx, [esp+8]     ; ecx = stack pointer of caller
>     sub    ecx, eax         ; ecx = new (unaligned) stack pointer
>
>     ; Check for wraparound, yield 'stack overflow' exception
>     sbb    eax, eax         ; eax = -1 on carry, else 0
>     not    eax              ; eax = 0 if wraparound, else -1
>     and    ecx, eax         ; ecx = 0 if wraparound, else unchanged
>
>     assume fs:flat
>     mov    eax, fs:[8]      ; eax = (current) stack limit
>
>     cmp    eax, ecx
>     jna    short DONE       ; stack limit not above new stack pointer?
>
>     ; Probe next stack page, yield 'guard page' exception
> PROBE:
>     sub    eax, 4096        ; eax = next stack page
>     test   eax, [eax]
>
>     cmp    eax, ecx
>     ja     short PROBE      ; stack limit above new stack pointer?
>
> DONE:
>     mov    eax, ecx         ; eax = new stack pointer
>     pop    ecx              ; restore ecx
>     xchg   eax, esp         ; esp = new stack pointer,
>                             ; eax = old stack pointer
>
>     push   [eax]
>     ret
>
>
> _chkstk endp
> _alloca_probe endp
>
>  end
>
> --- chkstk.asm (for AMD64) ---
> ; Copyright (C) 2004-2018, Stefan Kanthak <[hidden email]>
>
>     .code
>
> ; MSVC internal intrinsic _alloca() alias _chkstk():
> ; argument is passed in rax
> ; https://msdn.microsoft.com/en-us/library/wb1s57t5.aspx
> ; https://msdn.microsoft.com/en-us/library/ms648426.aspx
> ; https://msdn.microsoft.com/en-us/library/tawsa7cb.aspx
>
> __chkstk proc public         ; VOID _chkstk(QWORD size)
>
>     xor    r10, r10          ; r10 = 0
>     lea    r11, [rsp+8]      ; r11 = stack pointer of caller
>     sub    r11, rax          ; r11 = new stack pointer
>     cmovb  r11, r10          ; r11 = r10 = 0 if wraparound, else unchanged
>
> ;;  and    r11, -16          ; r11 = new (aligned) stack pointer
>
>     mov    r10, gs:[r10+16]  ; r10 = (current) stack limit
>     cmp    r10, r11
>     jna    RETURN            ; stack limit not above new stack pointer?
>
>     ; Probe next stack page, yield 'guard page' exception
> PROBE:
>     sub    r10, 4096         ; r10 = next stack page
>     test   r10, [r10]
>
>     cmp    r10, r11
>     ja     PROBE             ; stack limit above new stack pointer?
>
> RETURN:
>     ret
>
> __chkstk endp
>
>     end
> --- EOF ---
> _______________________________________________
> LLVM Developers mailing list
> [hidden email]
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev
_______________________________________________
LLVM Developers mailing list
[hidden email]
http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-dev