;****************************************************************************** ;* Core video DSP functions ;* Copyright (c) 2012 Ronald S. Bultje <rsbultje@gmail.com> ;* ;* This file is part of FFmpeg. ;* ;* FFmpeg is free software; you can redistribute it and/or ;* modify it under the terms of the GNU Lesser General Public ;* License as published by the Free Software Foundation; either ;* version 2.1 of the License, or (at your option) any later version. ;* ;* FFmpeg is distributed in the hope that it will be useful, ;* but WITHOUT ANY WARRANTY; without even the implied warranty of ;* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;* Lesser General Public License for more details. ;* ;* You should have received a copy of the GNU Lesser General Public ;* License along with FFmpeg; if not, write to the Free Software ;* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA ;******************************************************************************
%include"libavutil/x86/x86util.asm"
SECTION .text
; slow vertical extension loop function. Works with variable-width, and ; does per-line reading/writing of source data
%macro V_COPY_ROW 2; type (top/body/bottom), h
.%1_y_loop: ; do { mov wq, r7mp ; initialize w (r7mp = wmp)
.%1_x_loop: ; do {
movu m0, [srcq+wq] ; m0 = read($mmsize)
movu [dstq+wq], m0 ; write(m0, $mmsize) add wq, mmsize ; w -= $mmsize cmp wq, -mmsize ; } while (w > $mmsize); jl .%1_x_loop
movu m0, [srcq-mmsize] ; m0 = read($mmsize)
movu [dstq-mmsize], m0 ; write(m0, $mmsize)
%ifidn %1, body ; if ($type == body) { add srcq, src_strideq ; src += src_stride
%endif ; } add dstq, dst_strideq ; dst += dst_stride dec %2; } while (--$h); jnz .%1_y_loop
%endmacro
; .----. <- zero ; | | <- top is copied from first line in body of source ; |----| <- start_y ; | | <- body is copied verbatim (line-by-line) from source ; |----| <- end_y ; | | <- bottom is copied from last line in body of source ; '----' <- bh
INIT_XMM sse
%if ARCH_X86_64
cglobal emu_edge_vvar, 7, 8, 1, dst, dst_stride, src, src_stride, \
start_y, end_y, bh, w
%else; x86-32
cglobal emu_edge_vvar, 1, 6, 1, dst, src, start_y, end_y, bh, w
%define src_strideq r3mp
%define dst_strideq r1mp mov srcq, r2mp mov start_yq, r4mp mov end_yq, r5mp mov bhq, r6mp
%endif sub bhq, end_yq ; bh -= end_q sub end_yq, start_yq ; end_q -= start_q add srcq, r7mp ; (r7mp = wmp) add dstq, r7mp ; (r7mp = wmp) neg r7mp ; (r7mp = wmp)
test start_yq, start_yq ; if (start_q) { jz .body
V_COPY_ROW top, start_yq ; v_copy_row(top, start_yq)
.body: ; }
V_COPY_ROW body, end_yq ; v_copy_row(body, end_yq)
test bhq, bhq ; if (bh) { jz .end sub srcq, src_strideq ; src -= src_stride
V_COPY_ROW bottom, bhq ; v_copy_row(bottom, bh)
.end: ; } RET
%macro hvar_fn 0
cglobal emu_edge_hvar, 5, 6, 1, dst, dst_stride, start_x, n_words, h, w lea dstq, [dstq+n_wordsq*2] neg n_wordsq lea start_xq, [start_xq+n_wordsq*2]
.y_loop: ; do {
%if cpuflag(avx2)
vpbroadcastb m0, [dstq+start_xq] mov wq, n_wordsq ; initialize w
%else
movzx wd, byte [dstq+start_xq] ; w = read(1)
imul wd, 0x01010101 ; w *= 0x01010101 movd m0, wd mov wq, n_wordsq ; initialize w
pshufd m0, m0, q0000 ; splat
%endif ; avx2
.x_loop: ; do {
movu [dstq+wq*2], m0 ; write($reg, $mmsize) add wq, mmsize/2; w -= $mmsize/2 cmp wq, -(mmsize/2) ; } while (w > $mmsize/2) jl .x_loop
movu [dstq-mmsize], m0 ; write($reg, $mmsize) add dstq, dst_strideq ; dst += dst_stride dec hq ; } while (h--) jnz .y_loop RET
%endmacro
; macro to read/write a horizontal number of pixels (%2) to/from registers ; on sse, - fills xmm0-15 for consecutive sets of 16 pixels ; - if (%2 & 8) fills 8 bytes into xmm$next ; - if (%2 & 4) fills 4 bytes into xmm$next ; - if (%2 & 3) fills 1, 2 or 4 bytes in eax ; writing data out is in the same way
%macro READ_NUM_BYTES 2
%assign %%off 0; offset in source buffer
%assign %%xmm_idx 0; xmm register index
; left/right (horizontal) fast extend functions ; these are essentially identical to the vertical extend ones above, ; just left/right separated because number of pixels to extend is ; obviously not the same on both sides.
INIT_MMX mmxext
cglobal prefetch, 3, 3, 0, buf, stride, h
.loop:
prefetcht0 [bufq] add bufq, strideq dec hd jg .loop RET
Messung V0.5 in Prozent
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.14Bemerkung:
(vorverarbeitet am 2026-06-05)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.