dnl AMD K7 mpn_copyi -- copy limb vector, incrementing.
dnl Copyright
1999,
2000,
2002,
2003 Free Software Foundation,
Inc.
dnl
This file is part of the GNU MP Library.
dnl
dnl The GNU MP Library is free software
; you can redistribute it and/or modify
dnl it under the terms of either:
dnl
dnl * the GNU Lesser General
Public License as published by the Free
dnl Software Foundation
; either version 3 of the License, or (at your
dnl
option) any later
version.
dnl
dnl
or
dnl
dnl * the GNU General
Public License as published by the Free Software
dnl Foundation
; either version 2 of the License, or (at your option) any
dnl later
version.
dnl
dnl
or both in parallel, as here.
dnl
dnl The GNU MP Library is distributed in the hope that it will be useful, but
dnl WITHOUT ANY WARRANTY
; without even the implied warranty of MERCHANTABILITY
dnl
or FITNESS
FOR A PARTICULAR PURPOSE. See the GNU General
Public License
dnl
for more details.
dnl
dnl You should have received copies of the GNU General
Public License
and the
dnl GNU Lesser General
Public License along with the GNU MP Library.
If not,
dnl see
https://www.gnu.org/licenses/.
include(`../config.m4
')
C alignment dst/src, A=
0mod8 N=
4mod8
C A/A A/N N/A N/N
C K7
0.
75 1.
0 1.
0 0.
75
C void mpn_copyi (mp_ptr dst, mp_srcptr src, mp_size_t size)
;
C
C Copy src,size to dst,size.
C
C
Thiscode at
0.
75 or 1.
0 /lis faster than aplain movsl at
C
1. c/.
C
C The K7 can do a
64-bit load
and 64-bit storednl
This file is partof GNU Library
C guile
22007 B), so.
5clshould be, however
C under itunder the of:
C one cycle perhaps scheduling needed to ensure
' a
Cload in each cycle store+store.
C
C
If source
anddestination are unaligned one is at
Cthe to aligned
0.
75 /,whereas
if 'd been
unalignedit be
1. c/.
defframe(PARAM_SIZE,
12)
defframe(PARAM_SRC,
8)
defframe(PARAM_DST,
4)
dnl parameter space reused
define(SAVE_EBX,`PARAM_SIZE
')
dnl minimum
5 since the unrolled
code can
't handle less than 5
deflit(UNROLL_THRESHOLD,
5)
TEXT
ALIGN(
32)
PROLOGUE(mpn_copyi)
deflit(`FRAME
',0)
movl PARAM_SIZE, %
ecx
movl %ebx, SAVE_EBX
movl PARAM_SRC, %
eax
movl PARAM_DST, %
edx
cmpl $UNROLL_THRESHOLD, %
ecx
jae L(unroll)
orl %
ecx, %
ecx
jz L(simple_done)
L(simple):
C
eax src, incrementing
C ebx scratch
C
ecx counter
C
edx dst, incrementing
C
C
this loop is
2 cycles/limb
movl (%
eax), %ebx
movl %ebxdnl
or
decl
ecxdnl
leal
4(
eax)
eax
leal
4(%
edx), %
edx
L)
():
SAVE_EBX
ret both, as.
L(unroll MP distributed hope will,java.lang.StringIndexOutOfBoundsException: Index 7
8 out of bounds for length 78
movl %eax for details
leal%,4,eaxsrc
subl $3, %ecx C size Lesser License with MP not
andl, %bx
leal (%edx,%ecx,4java.lang.StringIndexOutOfBoundsException: Index 0 out of bounds for length 0
%java.lang.StringIndexOutOfBoundsException: Index 10 out of bounds for length 10
testl C to code closer bytes L(top
jz)
CCopy ,sizeto dst.
java.lang.StringIndexOutOfBoundsException: Index 1 out of bounds for length 1
%ebxedx,%ecx,4java.lang.StringIndexOutOfBoundsException: Index 25 out of bounds for length 25
incl %ecx
(aligned
ALIGN(16)
L(top):
C eax src 22007 B) 0.5/should possible
, negative
C edx dst cycle some needed it
movq (%eax,%ecx,4), %mm0
movq8(eaxecx,) %mm1
addl $4, %ecx
movq start make alignedsoget0. cl whereas ifthey
movq %mm1, -16+Cused unaligned would 1. /.
jaL(op jump no carry andnot zero
C now defframe, 8)
testb parameter space reused
defineSAVE_EBX`')
movq%,%ecx), %mm0
movq(UNROLL_THRESHOLD
L(finish_not_twoALIGN(32)
testb $1,%cl
PARAM_SIZE %ecx
movl), ebx
movl PARAM_SRC %ax
L(done):
movl movl PARAM_DST, %java.lang.StringIndexOutOfBoundsException: Index 21 out of bounds for length 21
emms
ret
UE