; ; jcphuff-sse2.asm - prepare data for progressive Huffman encoding ; (64-bit SSE2) ; ; Copyright (C) 2016, 2018, Matthieu Darbois ; Copyright (C) 2023, Aliaksiej Kandracienka. ; Copyright (C) 2024, D. R. Commander. ; ; Based on the x86 SIMD extension for IJG JPEG library ; Copyright (C) 1999-2006, MIYASAKA Masaru. ; For conditions of distribution and use, see copyright notice in jsimdext.inc ; ; This file should be assembled with NASM (Netwide Assembler) or Yasm. ; ; This file contains an SSE2 implementation of data preparation for progressive ; Huffman encoding. See jcphuff.c for more details.
; -------------------------------------------------------------------------- ; Macros to load data for jsimd_encode_mcu_AC_first_prepare_sse2() and ; jsimd_encode_mcu_AC_refine_prepare_sse2()
%macro LOAD16 0
pxor N0, N0
pxor N1, N1
mov T0d, INT [LUT + 0*SIZEOF_INT] mov T1d, INT [LUT + 8*SIZEOF_INT]
pinsrw X0, word [BLOCK + T0 * 2], 0
pinsrw X1, word [BLOCK + T1 * 2], 0
mov T0d, INT [LUT + 1*SIZEOF_INT] mov T1d, INT [LUT + 9*SIZEOF_INT]
pinsrw X0, word [BLOCK + T0 * 2], 1
pinsrw X1, word [BLOCK + T1 * 2], 1
mov T0d, INT [LUT + 2*SIZEOF_INT] mov T1d, INT [LUT + 10*SIZEOF_INT]
pinsrw X0, word [BLOCK + T0 * 2], 2
pinsrw X1, word [BLOCK + T1 * 2], 2
mov T0d, INT [LUT + 3*SIZEOF_INT] mov T1d, INT [LUT + 11*SIZEOF_INT]
pinsrw X0, word [BLOCK + T0 * 2], 3
pinsrw X1, word [BLOCK + T1 * 2], 3
mov T0d, INT [LUT + 4*SIZEOF_INT] mov T1d, INT [LUT + 12*SIZEOF_INT]
pinsrw X0, word [BLOCK + T0 * 2], 4
pinsrw X1, word [BLOCK + T1 * 2], 4
mov T0d, INT [LUT + 5*SIZEOF_INT] mov T1d, INT [LUT + 13*SIZEOF_INT]
pinsrw X0, word [BLOCK + T0 * 2], 5
pinsrw X1, word [BLOCK + T1 * 2], 5
mov T0d, INT [LUT + 6*SIZEOF_INT] mov T1d, INT [LUT + 14*SIZEOF_INT]
pinsrw X0, word [BLOCK + T0 * 2], 6
pinsrw X1, word [BLOCK + T1 * 2], 6
mov T0d, INT [LUT + 7*SIZEOF_INT] mov T1d, INT [LUT + 15*SIZEOF_INT]
pinsrw X0, word [BLOCK + T0 * 2], 7
pinsrw X1, word [BLOCK + T1 * 2], 7
%endmacro
pcmpeqw xmm0, ZERO
pcmpeqw xmm1, ZERO
pcmpeqw xmm2, ZERO
pcmpeqw xmm3, ZERO
pcmpeqw xmm4, ZERO
pcmpeqw xmm5, ZERO
pcmpeqw xmm6, ZERO
pcmpeqw xmm7, ZERO
pcmpgtw N0, X0
paddw X0, N0
pxor X0, N0
psrlw X0, AL
movdqa XMMWORD [VALUES + (0) * 2], X0
pcmpeqw X0, ONE
packsswb N0, ZERO
packsswb X0, ZERO
pmovmskb T0d, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg);
pmovmskb T1d, X0 ; idx = _mm_movemask_epi8(x1); shrSIGN, 8; make room for sizebits
shl T0, 56 orSIGN, T0
bsr T1d, T1d ; idx = 16 - (__builtin_clz(idx)>>1); jz .CONTINUER8 ; if (idx) { mov EOB, KK add EOB, T1d ; EOB = k + idx;
.CONTINUER8: add VALUES, 8*2 jmp .PADDINGR
.TRYR7:
LOAD7
pcmpgtw N0, X0
paddw X0, N0
pxor X0, N0
psrlw X0, AL
movdqa XMMWORD [VALUES + (0) * 2], X0
pcmpeqw X0, ONE
packsswb N0, ZERO
packsswb X0, ZERO
pmovmskb T0d, N0 ; lsignbits.val16u[k>>4] = _mm_movemask_epi8(neg);
pmovmskb T1d, X0 ; idx = _mm_movemask_epi8(x1); shrSIGN, 8; make room for sizebits
shl T0, 56 orSIGN, T0
bsr T1d, T1d ; idx = 16 - (__builtin_clz(idx)>>1); jz .CONTINUER7 ; if (idx) { mov EOB, KK add EOB, T1d ; EOB = k + idx;
.CONTINUER7: add VALUES, 8*2
.PADDINGR: mov K, LEN add K, 7 and K, -8 shr K, 3 sub K, DCTSIZE2/8 jz .EPADDINGR align16
.ZEROLOOPR:
movdqa XMMWORD [VALUES + 0], ZERO shrSIGN, 8 add VALUES, 8*2 inc K jnz .ZEROLOOPR
.EPADDINGR:
not SIGN sub VALUES, DCTSIZE2*2 movMMWORD [r15+SIZEOF_MMWORD], SIGN
REDUCE0
moveax, EOB
UNCOLLECT_ARGS 6
movdqa ZERO, XMMWORD [rsp] mov rsp, rbp pop rbp ret
%undef ZERO
%undef ONE
%undef X0
%undef X1
%undef N0
%undef N1
%undef AL
%undef K
%undef KK
%undef EOB
%undef SIGN
%undef LUT
%undef T0
%undef T0d
%undef T1
%undef T1d
%undef BLOCK
%undef VALUES
%undef LEN
%undef LENEND
; For some reason, the OS X linker does not honor the request to align the ; segment unless we do this. align32
Messung V0.5 in Prozent
¤ Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.0.18Bemerkung:
(vorverarbeitet am 2026-06-05)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.