Quelle mpi_x86.s

Sprache: Sparc

#
# This Source Code Form is subject to the terms of the Mozilla Public
# License, v. 2.0. If a copy of the MPL was not distributed with this
# file, You can obtain one at http://mozilla.org/MPL/2.0/.

.data
.align 4
#
# -1 means to call s_mpi_is_sse to determine if we support sse
#    instructions.
#  0 means to use x86 instructions
#  1 means to use sse2 instructions
.type is_sse,@object
.size is_sse,4
is_sse: .long -1

#
# sigh, handle the difference between -fPIC and not PIC
# default to pic, since this file seems to be exclusively
# linux right now (solaris uses mpi_i86pc.s and windows uses
# mpi_x86_asm.c)
#
.ifndef NO_PIC
.macro GET   var,reg
    movl   \var@GOTOFF(%ebx),\reg
.endm
.macro PUT   reg,var
    movl   \reg,\var@GOTOFF(%ebx)
.endm
.else
.macro GET   var,reg
    movl   \var,\reg
.endm
.macro PUT   reg,var
    movl   \reg,\var
.endm
.endif

.text

#  ebp - 36: caller's esi
#  ebp - 32: caller's edi
#  ebp - 28:
#  ebp - 24:
#  ebp - 20:
#  ebp - 16:
#  ebp - 12:
#  ebp - 8:
#  ebp - 4:
#  ebp + 0: caller's ebp
#  ebp + 4: return address
#  ebp + 8: a argument
#  ebp + 12: a_len argument
#  ebp + 16: b argument
#  ebp + 20: c argument
#  registers:
#  eax:
# ebx: carry
# ecx: a_len
# edx:
# esi: a ptr
# edi: c ptr
.globl s_mpv_mul_d
.type s_mpv_mul_d,@function
s_mpv_mul_d:
    GET    is_sse,%eax
    cmp    $0,%eax
    je     s_mpv_mul_d_x86
    jg     s_mpv_mul_d_sse2
    call   s_mpi_is_sse2
    PUT    %eax,is_sse
    cmp    $0,%eax
    jg     s_mpv_mul_d_sse2
s_mpv_mul_d_x86:
    push   %ebp
    mov    %esp,%ebp
    sub    $28,%esp
    push   %edi
    push   %esi
    push   %ebx
    movl   $0,%ebx  # carry = 0
    mov    12(%ebp),%ecx # ecx = a_len
    mov    20(%ebp),%edi
    cmp    $0,%ecx
    je     2f   # jmp if a_len == 0
    mov    8(%ebp),%esi  # esi = a
    cld
1:
    lodsl   # eax = [ds:esi]; esi += 4
    mov    16(%ebp),%edx # edx = b
    mull   %edx   # edx:eax = Phi:Plo = a_i * b

    add    %ebx,%eax  # add carry (%ebx) to edx:eax
    adc    $0,%edx
    mov    %edx,%ebx  # high half of product becomes next carry

    stosl   # [es:edi] = ax; edi += 4;
    dec    %ecx   # --a_len
    jnz    1b   # jmp if a_len != 0
2:
    mov    %ebx,0(%edi)  # *c = carry
    pop    %ebx
    pop    %esi
    pop    %edi
    leave
    ret
    nop
s_mpv_mul_d_sse2:
    push   %ebp
    mov    %esp,%ebp
    push   %edi
    push   %esi
    psubq  %mm2,%mm2  # carry = 0
    mov    12(%ebp),%ecx # ecx = a_len
    movd   16(%ebp),%mm1 # mm1 = b
    mov    20(%ebp),%edi
    cmp    $0,%ecx
    je     6f   # jmp if a_len == 0
    mov    8(%ebp),%esi  # esi = a
    cld
5:
    movd   0(%esi),%mm0         # mm0 = *a++
    add    $4,%esi
    pmuludq %mm1,%mm0           # mm0 = b * *a++
    paddq  %mm0,%mm2            # add the carry
    movd   %mm2,0(%edi)         # store the 32bit result
    add    $4,%edi
    psrlq  $32, %mm2  # save the carry
    dec    %ecx   # --a_len
    jnz    5b   # jmp if a_len != 0
6:
    movd   %mm2,0(%edi)  # *c = carry
    emms
    pop    %esi
    pop    %edi
    leave
    ret
    nop

#  ebp - 36: caller's esi
#  ebp - 32: caller's edi
#  ebp - 28:
#  ebp - 24:
#  ebp - 20:
#  ebp - 16:
#  ebp - 12:
#  ebp - 8:
#  ebp - 4:
#  ebp + 0: caller's ebp
#  ebp + 4: return address
#  ebp + 8: a argument
#  ebp + 12: a_len argument
#  ebp + 16: b argument
#  ebp + 20: c argument
#  registers:
#  eax:
# ebx: carry
# ecx: a_len
# edx:
# esi: a ptr
# edi: c ptr
.globl s_mpv_mul_d_add
.type s_mpv_mul_d_add,@function
s_mpv_mul_d_add:
    GET    is_sse,%eax
    cmp    $0,%eax
    je     s_mpv_mul_d_add_x86
    jg     s_mpv_mul_d_add_sse2
    call   s_mpi_is_sse2
    PUT    %eax,is_sse
    cmp    $0,%eax
    jg     s_mpv_mul_d_add_sse2
s_mpv_mul_d_add_x86:
    push   %ebp
    mov    %esp,%ebp
    sub    $28,%esp
    push   %edi
    push   %esi
    push   %ebx
    movl   $0,%ebx  # carry = 0
    mov    12(%ebp),%ecx # ecx = a_len
    mov    20(%ebp),%edi
    cmp    $0,%ecx
    je     11f   # jmp if a_len == 0
    mov    8(%ebp),%esi  # esi = a
    cld
10:
    lodsl   # eax = [ds:esi]; esi += 4
    mov    16(%ebp),%edx # edx = b
    mull   %edx   # edx:eax = Phi:Plo = a_i * b

    add    %ebx,%eax  # add carry (%ebx) to edx:eax
    adc    $0,%edx
    mov    0(%edi),%ebx  # add in current word from *c
    add    %ebx,%eax
    adc    $0,%edx
    mov    %edx,%ebx  # high half of product becomes next carry

    stosl   # [es:edi] = ax; edi += 4;
    dec    %ecx   # --a_len
    jnz    10b   # jmp if a_len != 0
11:
    mov    %ebx,0(%edi)  # *c = carry
    pop    %ebx
    pop    %esi
    pop    %edi
    leave
    ret
    nop
s_mpv_mul_d_add_sse2:
    push   %ebp
    mov    %esp,%ebp
    push   %edi
    push   %esi
    psubq  %mm2,%mm2  # carry = 0
    mov    12(%ebp),%ecx # ecx = a_len
    movd   16(%ebp),%mm1 # mm1 = b
    mov    20(%ebp),%edi
    cmp    $0,%ecx
    je     16f   # jmp if a_len == 0
    mov    8(%ebp),%esi  # esi = a
    cld
15:
    movd   0(%esi),%mm0         # mm0 = *a++
    add    $4,%esi
    pmuludq %mm1,%mm0           # mm0 = b * *a++
    paddq  %mm0,%mm2            # add the carry
    movd   0(%edi),%mm0
    paddq  %mm0,%mm2            # add the carry
    movd   %mm2,0(%edi)         # store the 32bit result
    add    $4,%edi
    psrlq  $32, %mm2  # save the carry
    dec    %ecx   # --a_len
    jnz    15b   # jmp if a_len != 0
16:
    movd   %mm2,0(%edi)  # *c = carry
    emms
    pop    %esi
    pop    %edi
    leave
    ret
    nop

#  ebp - 8: caller's esi
#  ebp - 4: caller's edi
#  ebp + 0: caller's ebp
#  ebp + 4: return address
#  ebp + 8: a argument
#  ebp + 12: a_len argument
#  ebp + 16: b argument
#  ebp + 20: c argument
#  registers:
#  eax:
# ebx: carry
# ecx: a_len
# edx:
# esi: a ptr
# edi: c ptr
.globl s_mpv_mul_d_add_prop
.type s_mpv_mul_d_add_prop,@function
s_mpv_mul_d_add_prop:
    GET    is_sse,%eax
    cmp    $0,%eax
    je     s_mpv_mul_d_add_prop_x86
    jg     s_mpv_mul_d_add_prop_sse2
    call   s_mpi_is_sse2
    PUT    %eax,is_sse
    cmp    $0,%eax
    jg     s_mpv_mul_d_add_prop_sse2
s_mpv_mul_d_add_prop_x86:
    push   %ebp
    mov    %esp,%ebp
    sub    $28,%esp
    push   %edi
    push   %esi
    push   %ebx
    movl   $0,%ebx  # carry = 0
    mov    12(%ebp),%ecx # ecx = a_len
    mov    20(%ebp),%edi
    cmp    $0,%ecx
    je     21f   # jmp if a_len == 0
    cld
    mov    8(%ebp),%esi  # esi = a
20:
    lodsl   # eax = [ds:esi]; esi += 4
    mov    16(%ebp),%edx # edx = b
    mull   %edx   # edx:eax = Phi:Plo = a_i * b

    add    %ebx,%eax  # add carry (%ebx) to edx:eax
    adc    $0,%edx
    mov    0(%edi),%ebx  # add in current word from *c
    add    %ebx,%eax
    adc    $0,%edx
    mov    %edx,%ebx  # high half of product becomes next carry

    stosl   # [es:edi] = ax; edi += 4;
    dec    %ecx   # --a_len
    jnz    20b   # jmp if a_len != 0
21:
    cmp    $0,%ebx  # is carry zero?
    jz     23f
    mov    0(%edi),%eax  # add in current word from *c
    add    %ebx,%eax
    stosl   # [es:edi] = ax; edi += 4;
    jnc    23f
22:
    mov    0(%edi),%eax  # add in current word from *c
    adc    $0,%eax
    stosl   # [es:edi] = ax; edi += 4;
    jc     22b
23:
    pop    %ebx
    pop    %esi
    pop    %edi
    leave
    ret
    nop
s_mpv_mul_d_add_prop_sse2:
    push   %ebp
    mov    %esp,%ebp
    push   %edi
    push   %esi
    push   %ebx
    psubq  %mm2,%mm2  # carry = 0
    mov    12(%ebp),%ecx # ecx = a_len
    movd   16(%ebp),%mm1 # mm1 = b
    mov    20(%ebp),%edi
    cmp    $0,%ecx
    je     26f   # jmp if a_len == 0
    mov    8(%ebp),%esi  # esi = a
    cld
25:
    movd   0(%esi),%mm0         # mm0 = *a++
    movd   0(%edi),%mm3  # fetch the sum
    add    $4,%esi
    pmuludq %mm1,%mm0           # mm0 = b * *a++
    paddq  %mm0,%mm2            # add the carry
    paddq  %mm3,%mm2            # add *c++
    movd   %mm2,0(%edi)         # store the 32bit result
    add    $4,%edi
    psrlq  $32, %mm2  # save the carry
    dec    %ecx   # --a_len
    jnz    25b   # jmp if a_len != 0
26:
    movd   %mm2,%ebx
    cmp    $0,%ebx  # is carry zero?
    jz     28f
    mov    0(%edi),%eax
    add    %ebx, %eax
    stosl
    jnc    28f
27:
    mov    0(%edi),%eax  # add in current word from *c
    adc    $0,%eax
    stosl   # [es:edi] = ax; edi += 4;
    jc     27b
28:
    emms
    pop    %ebx
    pop    %esi
    pop    %edi
    leave
    ret
    nop

#  ebp - 20: caller's esi
#  ebp - 16: caller's edi
#  ebp - 12:
#  ebp - 8: carry
#  ebp - 4: a_len local
#  ebp + 0: caller's ebp
#  ebp + 4: return address
#  ebp + 8: pa argument
#  ebp + 12: a_len argument
#  ebp + 16: ps argument
#  ebp + 20:
#  registers:
#  eax:
# ebx: carry
# ecx: a_len
# edx:
# esi: a ptr
# edi: c ptr

.globl s_mpv_sqr_add_prop
.type s_mpv_sqr_add_prop,@function
s_mpv_sqr_add_prop:
     GET   is_sse,%eax
     cmp    $0,%eax
     je     s_mpv_sqr_add_prop_x86
     jg     s_mpv_sqr_add_prop_sse2
     call   s_mpi_is_sse2
     PUT    %eax,is_sse
     cmp    $0,%eax
     jg     s_mpv_sqr_add_prop_sse2
s_mpv_sqr_add_prop_x86:
     push   %ebp
     mov    %esp,%ebp
     sub    $12,%esp
     push   %edi
     push   %esi
     push   %ebx
     movl   $0,%ebx  # carry = 0
     mov    12(%ebp),%ecx # a_len
     mov    16(%ebp),%edi # edi = ps
     cmp    $0,%ecx
     je     31f   # jump if a_len == 0
     cld
     mov    8(%ebp),%esi # esi = pa
30:
     lodsl   # %eax = [ds:si]; si += 4;
     mull   %eax

     add    %ebx,%eax  # add "carry"
     adc    $0,%edx
     mov    0(%edi),%ebx
     add    %ebx,%eax  # add low word from result
     mov    4(%edi),%ebx
     stosl   # [es:di] = %eax; di += 4;
     adc    %ebx,%edx  # add high word from result
     movl   $0,%ebx
     mov    %edx,%eax
     adc    $0,%ebx
     stosl   # [es:di] = %eax; di += 4;
     dec    %ecx  # --a_len
     jnz    30b   # jmp if a_len != 0
31:
    cmp    $0,%ebx  # is carry zero?
    jz     34f
    mov    0(%edi),%eax  # add in current word from *c
    add    %ebx,%eax
    stosl   # [es:edi] = ax; edi += 4;
    jnc    34f
32:
    mov    0(%edi),%eax  # add in current word from *c
    adc    $0,%eax
    stosl   # [es:edi] = ax; edi += 4;
    jc     32b
34:
    pop    %ebx
    pop    %esi
    pop    %edi
    leave
    ret
    nop
s_mpv_sqr_add_prop_sse2:
    push   %ebp
    mov    %esp,%ebp
    push   %edi
    push   %esi
    push   %ebx
    psubq  %mm2,%mm2  # carry = 0
    mov    12(%ebp),%ecx # ecx = a_len
    mov    16(%ebp),%edi
    cmp    $0,%ecx
    je     36f   # jmp if a_len == 0
    mov    8(%ebp),%esi  # esi = a
    cld
35:
    movd   0(%esi),%mm0        # mm0 = *a
    movd   0(%edi),%mm3        # fetch the sum
    add    $4,%esi
    pmuludq %mm0,%mm0          # mm0 = sqr(a)
    paddq  %mm0,%mm2           # add the carry
    paddq  %mm3,%mm2           # add the low word
    movd   4(%edi),%mm3
    movd   %mm2,0(%edi)        # store the 32bit result
    psrlq  $32, %mm2
    paddq  %mm3,%mm2           # add the high word
    movd   %mm2,4(%edi)        # store the 32bit result
    psrlq  $32, %mm2        # save the carry.
    add    $8,%edi
    dec    %ecx   # --a_len
    jnz    35b   # jmp if a_len != 0
36:
    movd   %mm2,%ebx
    cmp    $0,%ebx  # is carry zero?
    jz     38f
    mov    0(%edi),%eax
    add    %ebx, %eax
    stosl
    jnc    38f
37:
    mov    0(%edi),%eax  # add in current word from *c
    adc    $0,%eax
    stosl   # [es:edi] = ax; edi += 4;
    jc     37b
38:
    emms
    pop    %ebx
    pop    %esi
    pop    %edi
    leave
    ret
    nop

#
# Divide 64-bit (Nhi,Nlo) by 32-bit divisor, which must be normalized
# so its high bit is 1.   This code is from NSPR.
#
# mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
#             mp_digit *qp, mp_digit *rp)

#  esp +  0:   Caller's ebx
#  esp +  4: return address
#  esp +  8: Nhi argument
#  esp + 12: Nlo argument
#  esp + 16: divisor argument
#  esp + 20: qp argument
#  esp + 24:   rp argument
#  registers:
#  eax:
# ebx: carry
# ecx: a_len
# edx:
# esi: a ptr
# edi: c ptr
#

.globl s_mpv_div_2dx1d
.type s_mpv_div_2dx1d,@function
s_mpv_div_2dx1d:
       push   %ebx
       mov    8(%esp),%edx
       mov    12(%esp),%eax
       mov    16(%esp),%ebx
       div    %ebx
       mov    20(%esp),%ebx
       mov    %eax,0(%ebx)
       mov    24(%esp),%ebx
       mov    %edx,0(%ebx)
       xor    %eax,%eax  # return zero
       pop    %ebx
       ret
       nop

# Magic indicating no need for an executable stack
.section .note.GNU-stack, "", @progbits
.previous

Messung V0.5 in Prozent

¤ Dauer der Verarbeitung: 0.4 Sekunden ¤

Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.