// Copyright (c) 2010 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include "yuv_row.h"
extern "C" {
// x64 compiler doesn't support MMX and inline assembler. Use SSE2 intrinsics.
#define kCoefficientsRgbU (reinterpret_cast <const uint8_t*>(kCoefficientsRgbY) + 2048 )
#define kCoefficientsRgbV (reinterpret_cast <const uint8_t*>(kCoefficientsRgbY) + 4096 )
#include <emmintrin.h>
static void FastConvertYUVToRGB32Row_SSE2(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* rgb_buf,
int width) {
__m128i xmm0, xmmY1, xmmY2;
__m128 xmmY;
while (width >= 2 ) {
xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast <const __m128i*>(kCoefficientsRgbU + 8 * *u_buf++)),
_mm_loadl_epi64(reinterpret_cast <const __m128i*>(kCoefficientsRgbV + 8 * *v_buf++)));
xmmY1 = _mm_loadl_epi64(reinterpret_cast <const __m128i*>(reinterpret_cast <const uint8_t*>(kCoefficientsRgbY) + 8 * *y_buf++));
xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
xmmY2 = _mm_loadl_epi64(reinterpret_cast <const __m128i*>(reinterpret_cast <const uint8_t*>(kCoefficientsRgbY) + 8 * *y_buf++));
xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
0 x44);
xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6 );
xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
_mm_storel_epi64(reinterpret_cast <__m128i*>(rgb_buf), xmmY1);
rgb_buf += 8 ;
width -= 2 ;
}
if (width) {
xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast <const __m128i*>(kCoefficientsRgbU + 8 * *u_buf)),
_mm_loadl_epi64(reinterpret_cast <const __m128i*>(kCoefficientsRgbV + 8 * *v_buf)));
xmmY1 = _mm_loadl_epi64(reinterpret_cast <const __m128i*>(reinterpret_cast <const uint8_t*>(kCoefficientsRgbY) + 8 * *y_buf));
xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
xmmY1 = _mm_srai_epi16(xmmY1, 6 );
xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
*reinterpret_cast <uint32_t*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
}
}
static void ScaleYUVToRGB32Row_SSE2(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* rgb_buf,
int width,
int source_dx) {
__m128i xmm0, xmmY1, xmmY2;
__m128 xmmY;
uint8_t u, v, y;
int x = 0 ;
while (width >= 2 ) {
u = u_buf[x >> 17 ];
v = v_buf[x >> 17 ];
y = y_buf[x >> 16 ];
x += source_dx;
xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast <const __m128i*>(kCoefficientsRgbU + 8 * u)),
_mm_loadl_epi64(reinterpret_cast <const __m128i*>(kCoefficientsRgbV + 8 * v)));
xmmY1 = _mm_loadl_epi64(reinterpret_cast <const __m128i*>(reinterpret_cast <const uint8_t*>(kCoefficientsRgbY) + 8 * y));
xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
y = y_buf[x >> 16 ];
x += source_dx;
xmmY2 = _mm_loadl_epi64(reinterpret_cast <const __m128i*>(reinterpret_cast <const uint8_t*>(kCoefficientsRgbY) + 8 * y));
xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
0 x44);
xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6 );
xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
_mm_storel_epi64(reinterpret_cast <__m128i*>(rgb_buf), xmmY1);
rgb_buf += 8 ;
width -= 2 ;
}
if (width) {
u = u_buf[x >> 17 ];
v = v_buf[x >> 17 ];
y = y_buf[x >> 16 ];
xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast <const __m128i*>(kCoefficientsRgbU + 8 * u)),
_mm_loadl_epi64(reinterpret_cast <const __m128i*>(kCoefficientsRgbV + 8 * v)));
xmmY1 = _mm_loadl_epi64(reinterpret_cast <const __m128i*>(reinterpret_cast <const uint8_t*>(kCoefficientsRgbY) + 8 * y));
xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
xmmY1 = _mm_srai_epi16(xmmY1, 6 );
xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
*reinterpret_cast <uint32_t*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
}
}
static void LinearScaleYUVToRGB32Row_SSE2(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* rgb_buf,
int width,
int source_dx) {
__m128i xmm0, xmmY1, xmmY2;
__m128 xmmY;
uint8_t u0, u1, v0, v1, y0, y1;
uint32_t uv_frac, y_frac, u, v, y;
int x = 0 ;
if (source_dx >= 0 x20000) {
x = 32768 ;
}
while (width >= 2 ) {
u0 = u_buf[x >> 17 ];
u1 = u_buf[(x >> 17 ) + 1 ];
v0 = v_buf[x >> 17 ];
v1 = v_buf[(x >> 17 ) + 1 ];
y0 = y_buf[x >> 16 ];
y1 = y_buf[(x >> 16 ) + 1 ];
uv_frac = (x & 0 x1fffe);
y_frac = (x & 0 xffff);
u = (uv_frac * u1 + (uv_frac ^ 0 x1fffe) * u0) >> 17 ;
v = (uv_frac * v1 + (uv_frac ^ 0 x1fffe) * v0) >> 17 ;
y = (y_frac * y1 + (y_frac ^ 0 xffff) * y0) >> 16 ;
x += source_dx;
xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast <const __m128i*>(kCoefficientsRgbU + 8 * u)),
_mm_loadl_epi64(reinterpret_cast <const __m128i*>(kCoefficientsRgbV + 8 * v)));
xmmY1 = _mm_loadl_epi64(reinterpret_cast <const __m128i*>(reinterpret_cast <const uint8_t*>(kCoefficientsRgbY) + 8 * y));
xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
y0 = y_buf[x >> 16 ];
y1 = y_buf[(x >> 16 ) + 1 ];
y_frac = (x & 0 xffff);
y = (y_frac * y1 + (y_frac ^ 0 xffff) * y0) >> 16 ;
x += source_dx;
xmmY2 = _mm_loadl_epi64(reinterpret_cast <const __m128i*>(reinterpret_cast <const uint8_t*>(kCoefficientsRgbY) + 8 * y));
xmmY2 = _mm_adds_epi16(xmmY2, xmm0);
xmmY = _mm_shuffle_ps(_mm_castsi128_ps(xmmY1), _mm_castsi128_ps(xmmY2),
0 x44);
xmmY1 = _mm_srai_epi16(_mm_castps_si128(xmmY), 6 );
xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
_mm_storel_epi64(reinterpret_cast <__m128i*>(rgb_buf), xmmY1);
rgb_buf += 8 ;
width -= 2 ;
}
if (width) {
u = u_buf[x >> 17 ];
v = v_buf[x >> 17 ];
y = y_buf[x >> 16 ];
xmm0 = _mm_adds_epi16(_mm_loadl_epi64(reinterpret_cast <const __m128i*>(kCoefficientsRgbU + 8 * u)),
_mm_loadl_epi64(reinterpret_cast <const __m128i*>(kCoefficientsRgbV + 8 * v)));
xmmY1 = _mm_loadl_epi64(reinterpret_cast <const __m128i*>(reinterpret_cast <const uint8_t*>(kCoefficientsRgbY) + 8 * y));
xmmY1 = _mm_adds_epi16(xmmY1, xmm0);
xmmY1 = _mm_srai_epi16(xmmY1, 6 );
xmmY1 = _mm_packus_epi16(xmmY1, xmmY1);
*reinterpret_cast <uint32_t*>(rgb_buf) = _mm_cvtsi128_si32(xmmY1);
}
}
void FastConvertYUVToRGB32Row(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* rgb_buf,
int width) {
FastConvertYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width);
}
void ScaleYUVToRGB32Row(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* rgb_buf,
int width,
int source_dx) {
ScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width, source_dx);
}
void LinearScaleYUVToRGB32Row(const uint8_t* y_buf,
const uint8_t* u_buf,
const uint8_t* v_buf,
uint8_t* rgb_buf,
int width,
int source_dx) {
LinearScaleYUVToRGB32Row_SSE2(y_buf, u_buf, v_buf, rgb_buf, width,
source_dx);
}
} // extern "C"
Messung V0.5 in Prozent C=98 H=95 G=96
¤ Dauer der Verarbeitung: 0.14 Sekunden
(vorverarbeitet am 2026-06-09)
¤
*© Formatika GbR, Deutschland