# This Source Code Form is subject to the terms of the Mozilla Public
# License, v.
2.
0. If a copy of the MPL was not distributed with this
#
file, You can obtain one at
http://mozilla.org/MPL/2.
0/.
#ifdef DARWIN
#define s_mpv_mul_d _s_mpv_mul_d
#define s_mpv_mul_d_add _s_mpv_mul_d_add
#define s_mpv_mul_d_add_prop _s_mpv_mul_d_add_prop
#define s_mpv_sqr_add_prop _s_mpv_sqr_add_prop
#define s_mpv_div_2dx1d _s_mpv_div_2dx1d
#define TYPE_FUNCTION(x)
#else
#define TYPE_FUNCTION(x) .type x, @function
#endif
.text
# ebp -
8: caller
's esi
# ebp -
4: caller
's edi
# ebp +
0: caller
's ebp
# ebp +
4: return address
# ebp +
8: a argument
# ebp +
12: a_len argument
# ebp +
16: b argument
# ebp +
20: c argument
# registers:
# ebx:
# ecx: a_len
# esi: a ptr
# edi: c ptr
.globl s_mpv_mul_d
.private_extern s_mpv_mul_d
TYPE_FUNCTION(s_mpv_mul_d)
s_mpv_mul_d:
push %ebp
mov %esp, %ebp
push %edi
push %esi
psubq %mm2, %mm2 # carry =
0
mov
12(%ebp), %ecx # ecx = a_len
movd
16(%ebp), %mm1 # mm1 = b
mov
20(%ebp), %edi
cmp $
0, %ecx
je
2f # jmp if a_len ==
0
mov
8(%ebp), %esi # esi = a
cld
1:
movd
0(%esi), %mm0 # mm0 = *a++
add $
4, %esi
pmuludq %mm1, %mm0 # mm0 = b * *a++
paddq %mm0, %mm2 # add the carry
movd %mm2,
0(%edi) # store the
32bit result
add $
4, %edi
psrlq $
32, %mm2 # save the carry
dec %ecx # --a_len
jnz
1b # jmp if a_len
!= 0
2:
movd %mm2,
0(%edi) # *c = carry
emms
pop %esi
pop %edi
leave
ret
nop
# ebp -
8: caller
's esi
# ebp -
4: caller
's edi
# ebp +
0: caller
's ebp
# ebp +
4: return address
# ebp +
8: a argument
# ebp +
12: a_len argument
# ebp +
16: b argument
# ebp +
20: c argument
# registers:
# ebx:
# ecx: a_len
# esi: a ptr
# edi: c ptr
.globl s_mpv_mul_d_add
.private_extern s_mpv_mul_d_add
TYPE_FUNCTION(s_mpv_mul_d_add)
s_mpv_mul_d_add:
push %ebp
mov %esp, %ebp
push %edi
push %esi
psubq %mm2, %mm2 # carry =
0
mov
12(%ebp), %ecx # ecx = a_len
movd
16(%ebp), %mm1 # mm1 = b
mov
20(%ebp), %edi
cmp $
0, %ecx
je
2f # jmp if a_len ==
0
mov
8(%ebp), %esi # esi = a
cld
1:
movd
0(%esi), %mm0 # mm0 = *a++
add $
4, %esi
pmuludq %mm1, %mm0 # mm0 = b * *a++
paddq %mm0, %mm2 # add the carry
movd
0(%edi), %mm0
paddq %mm0, %mm2 # add the carry
movd %mm2,
0(%edi) # store the
32bit result
add $
4, %edi
psrlq $
32, %mm2 # save the carry
dec %ecx # --a_len
jnz
1b # jmp if a_len
!= 0
2:
movd %mm2,
0(%edi) # *c = carry
emms
pop %esi
pop %edi
leave
ret
nop
# ebp -
12: caller
's ebx
# ebp -
8: caller
's esi
# ebp -
4: caller
's edi
# ebp +
0: caller
's ebp
# ebp +
4: return address
# ebp +
8: a argument
# ebp +
12: a_len argument
# ebp +
16: b argument
# ebp +
20: c argument
# registers:
# eax:
# ebx: carry
# ecx: a_len
# esi: a ptr
# edi: c ptr
.globl s_mpv_mul_d_add_prop
.private_extern s_mpv_mul_d_add_prop
TYPE_FUNCTION(s_mpv_mul_d_add_prop)
s_mpv_mul_d_add_prop:
push %ebp
mov %esp, %ebp
push %edi
push %esi
push %ebx
psubq %mm2, %mm2 # carry =
0
mov
12(%ebp), %ecx # ecx = a_len
movd
16(%ebp), %mm1 # mm1 = b
mov
20(%ebp), %edi
cmp $
0, %ecx
je
2f # jmp if a_len ==
0
mov
8(%ebp), %esi # esi = a
cld
1:
movd
0(%esi), %mm0 # mm0 = *a++
movd
0(%edi), %mm3 # fetch the sum
add $
4, %esi
pmuludq %mm1, %mm0 # mm0 = b * *a++
paddq %mm0, %mm2 # add the carry
paddq %mm3, %mm2 # add *c++
movd %mm2,
0(%edi) # store the
32bit result
add $
4, %edi
psrlq $
32, %mm2 # save the carry
dec %ecx # --a_len
jnz
1b # jmp if a_len
!= 0
2:
movd %mm2, %ebx
cmp $
0, %ebx # is carry zero?
jz
4f
mov
0(%edi), %eax
add %ebx, %eax
stosl
jnc
4f
3:
mov
0(%edi), %eax # add in current
word from *c
adc $
0, %eax
stosl # [es:edi] = ax; edi +=
4;
jc
3b
4:
emms
pop %ebx
pop %esi
pop %edi
leave
ret
nop
# ebp -
12: caller
's ebx
# ebp -
8: caller
's esi
# ebp -
4: caller
's edi
# ebp +
0: caller
's ebp
# ebp +
4: return address
# ebp +
8: pa argument
# ebp +
12: a_len argument
# ebp +
16: ps argument
# registers:
# eax:
# ebx: carry
# ecx: a_len
# esi: a ptr
# edi: c ptr
.globl s_mpv_sqr_add_prop
.private_extern s_mpv_sqr_add_prop
TYPE_FUNCTION(s_mpv_sqr_add_prop)
s_mpv_sqr_add_prop:
push %ebp
mov %esp, %ebp
push %edi
push %esi
push %ebx
psubq %mm2, %mm2 # carry =
0
mov
12(%ebp), %ecx # ecx = a_len
mov
16(%ebp), %edi
cmp $
0, %ecx
je
2f # jmp if a_len ==
0
mov
8(%ebp), %esi # esi = a
cld
1:
movd
0(%esi), %mm0 # mm0 = *a
movd
0(%edi), %mm3 # fetch the sum
add $
4, %esi
pmuludq %mm0, %mm0 # mm0 = sqr(a)
paddq %mm0, %mm2 # add the carry
paddq %mm3, %mm2 # add the low
word
movd
4(%edi), %mm3
movd %mm2,
0(%edi) # store the
32bit result
psrlq $
32, %mm2
paddq %mm3, %mm2 # add the high
word
movd %mm2,
4(%edi) # store the
32bit result
psrlq $
32, %mm2 # save the carry.
add $
8, %edi
dec %ecx # --a_len
jnz
1b # jmp if a_len
!= 0
2:
movd %mm2, %ebx
cmp $
0, %ebx # is carry zero?
jz
4f
mov
0(%edi), %eax
add %ebx, %eax
stosl
jnc
4f
3:
mov
0(%edi), %eax # add in current
word from *c
adc $
0, %eax
stosl # [es:edi] = ax; edi +=
4;
jc
3b
4:
emms
pop %ebx
pop %esi
pop %edi
leave
ret
nop
#
# Divide
64-bit (Nhi,Nlo) by
32-bit divisor, which must be normalized
# so its high bit is
1. This code is from NSPR.
#
# mp_err s_mpv_div_2dx1d(mp_digit Nhi, mp_digit Nlo, mp_digit divisor,
# mp_digit *qp, mp_digit *rp)
# esp +
0: Caller
's ebx
# esp +
4: return address
# esp +
8: Nhi argument
# esp +
12: Nlo argument
# esp +
16: divisor argument
# esp +
20: qp argument
# esp +
24: rp argument
# registers:
# eax:
# ebx: carry
# ecx: a_len
# edx:
# esi: a ptr
# edi: c ptr
#
.globl s_mpv_div_2dx1d
.private_extern s_mpv_div_2dx1d
TYPE_FUNCTION(s_mpv_div_2dx1d)
s_mpv_div_2dx1d:
push %ebx
mov
8(%esp), %edx
mov
12(%esp), %eax
mov
16(%esp), %ebx
div %ebx
mov
20(%esp), %ebx
mov %eax,
0(%ebx)
mov
24(%esp), %ebx
mov %edx,
0(%ebx)
xor %eax, %eax # return zero
pop %ebx
ret
nop
#ifndef DARWIN
# Magic indicating no need for an executable stack
.
section .note.GNU-stack,
"", @progbits
.previous
#endif