/*
* Copyright © 2018, VideoLAN and dav1d authors
* Copyright © 2020, Martin Storsjo
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice, this
* list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright notice,
* this list of conditions and the following disclaimer in the documentation
* and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
* ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
* DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
* ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
* (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
* LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
* ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
* SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "src/arm/asm.S"
#include "util.S"
#include "cdef_tmpl.S"
// r1 = d0/q0
// r2 = d2/q1
.macro pad_top_bot_16 s1, s2, w, stride, r1, r2, align , ret
tst r7, #1 // CDEF_HAVE_LEFT
beq 2 f
// CDEF_HAVE_LEFT
tst r7, #2 // CDEF_HAVE_RIGHT
beq 1 f
// CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
vldr s8, [\s1, #-4 ]
vld1.16 {\r1}, [\s1, :\align ]
vldr s9, [\s1, #2 *\w]
vldr s10, [\s2, #-4 ]
vld1.16 {\r2}, [\s2, :\align ]
vldr s11, [\s2, #2 *\w]
vstr s8, [r0, #-4 ]
vst1.16 {\r1}, [r0, :\align ]
vstr s9, [r0, #2 *\w]
add r0, r0, #2 *\stride
vstr s10, [r0, #-4 ]
vst1.16 {\r2}, [r0, :\align ]
vstr s11, [r0, #2 *\w]
.if \ret
pop {r4-r8,pc}
.else
add r0, r0, #2 *\stride
b 3 f
.endif
1 :
// CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
vldr s8, [\s1, #-4 ]
vld1.16 {\r1}, [\s1, :\align ]
vldr s9, [\s2, #-4 ]
vld1.16 {\r2}, [\s2, :\align ]
vstr s8, [r0, #-4 ]
vst1.16 {\r1}, [r0, :\align ]
vstr s12, [r0, #2 *\w]
add r0, r0, #2 *\stride
vstr s9, [r0, #-4 ]
vst1.16 {\r2}, [r0, :\align ]
vstr s12, [r0, #2 *\w]
.if \ret
pop {r4-r8,pc}
.else
add r0, r0, #2 *\stride
b 3 f
.endif
2 :
// !CDEF_HAVE_LEFT
tst r7, #2 // CDEF_HAVE_RIGHT
beq 1 f
// !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
vld1.16 {\r1}, [\s1, :\align ]
vldr s8, [\s1, #2 *\w]
vld1.16 {\r2}, [\s2, :\align ]
vldr s9, [\s2, #2 *\w]
vstr s12, [r0, #-4 ]
vst1.16 {\r1}, [r0, :\align ]
vstr s8, [r0, #2 *\w]
add r0, r0, #2 *\stride
vstr s12, [r0, #-4 ]
vst1.16 {\r2}, [r0, :\align ]
vstr s9, [r0, #2 *\w]
.if \ret
pop {r4-r8,pc}
.else
add r0, r0, #2 *\stride
b 3 f
.endif
1 :
// !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
vld1.16 {\r1}, [\s1, :\align ]
vld1.16 {\r2}, [\s2, :\align ]
vstr s12, [r0, #-4 ]
vst1.16 {\r1}, [r0, :\align ]
vstr s12, [r0, #2 *\w]
add r0, r0, #2 *\stride
vstr s12, [r0, #-4 ]
vst1.16 {\r2}, [r0, :\align ]
vstr s12, [r0, #2 *\w]
.if \ret
pop {r4-r8,pc}
.else
add r0, r0, #2 *\stride
.endif
3 :
.endm
// void dav1d_cdef_paddingX_16bpc_neon(uint16_t *tmp, const pixel *src,
// ptrdiff_t src_stride, const pixel (*left)[2 ],
// const pixel *const top,
// const pixel *const bottom, int h,
// enum CdefEdgeFlags edges);
// r1 = d0/q0
// r2 = d2/q1
.macro padding_func_16 w, stride, r1, r2, align
function cdef_padding\w\()_16 bpc_neon, export=1
push {r4-r8,lr}
ldrd r4, r5, [sp, #24 ]
ldrd r6, r7, [sp, #32 ]
vmov.i16 q3, #0 x8000
tst r7, #4 // CDEF_HAVE_TOP
bne 1 f
// !CDEF_HAVE_TOP
sub r12, r0, #2 *(2 *\stride+2 )
vmov.i16 q2, #0 x8000
vst1.16 {q2,q3}, [r12]!
.if \w == 8
vst1.16 {q2,q3}, [r12]!
.endif
b 3 f
1 :
// CDEF_HAVE_TOP
add r8, r4, r2
sub r0, r0, #2 *(2 *\stride)
pad_top_bot_16 r4, r8, \w, \stride, \r1, \r2, \align , 0
// Middle section
3 :
tst r7, #1 // CDEF_HAVE_LEFT
beq 2 f
// CDEF_HAVE_LEFT
tst r7, #2 // CDEF_HAVE_RIGHT
beq 1 f
// CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
0 :
vld1.32 {d2[]}, [r3, :32 ]!
vldr s5, [r1, #2 *\w]
vld1.16 {\r1}, [r1, :\align ], r2
subs r6, r6, #1
vstr s4, [r0, #-4 ]
vst1.16 {\r1}, [r0, :\align ]
vstr s5, [r0, #2 *\w]
add r0, r0, #2 *\stride
bgt 0 b
b 3 f
1 :
// CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
vld1.32 {d2[]}, [r3, :32 ]!
vld1.16 {\r1}, [r1, :\align ], r2
subs r6, r6, #1
vstr s4, [r0, #-4 ]
vst1.16 {\r1}, [r0, :\align ]
vstr s12, [r0, #2 *\w]
add r0, r0, #2 *\stride
bgt 1 b
b 3 f
2 :
tst r7, #2 // CDEF_HAVE_RIGHT
beq 1 f
// !CDEF_HAVE_LEFT+CDEF_HAVE_RIGHT
0 :
vldr s4, [r1, #2 *\w]
vld1.16 {\r1}, [r1, :\align ], r2
subs r6, r6, #1
vstr s12, [r0, #-4 ]
vst1.16 {\r1}, [r0, :\align ]
vstr s4, [r0, #2 *\w]
add r0, r0, #2 *\stride
bgt 0 b
b 3 f
1 :
// !CDEF_HAVE_LEFT+!CDEF_HAVE_RIGHT
vld1.16 {\r1}, [r1, :\align ], r2
subs r6, r6, #1
vstr s12, [r0, #-4 ]
vst1.16 {\r1}, [r0, :\align ]
vstr s12, [r0, #2 *\w]
add r0, r0, #2 *\stride
bgt 1 b
3 :
tst r7, #8 // CDEF_HAVE_BOTTOM
bne 1 f
// !CDEF_HAVE_BOTTOM
sub r12, r0, #4
vmov.i16 q2, #0 x8000
vst1.16 {q2,q3}, [r12]!
.if \w == 8
vst1.16 {q2,q3}, [r12]!
.endif
pop {r4-r8,pc}
1 :
// CDEF_HAVE_BOTTOM
add r8, r5, r2
pad_top_bot_16 r5, r8, \w, \stride, \r1, \r2, \align , 1
endfunc
.endm
padding_func_16 8 , 16 , q0, q1, 128
padding_func_16 4 , 8 , d0, d2, 64
tables
filter 8 , 16
filter 4 , 16
find_dir 16
Messung V0.5 in Prozent C=87 H=73 G=80
¤ Dauer der Verarbeitung: 0.11 Sekunden
(vorverarbeitet am 2026-06-07)
¤
*© Formatika GbR, Deutschland