/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved.
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include "test/hiprec_convolve_test_util.h"
#include <memory>
#include <new >
#include "av1/common/restoration.h"
using std::make_tuple;
using std::tuple;
namespace libaom_test {
// Generate a random pair of filter kernels, using the ranges
// of possible values from the loop-restoration experiment
static void generate_kernels(ACMRandom *rnd, InterpKernel hkernel,
InterpKernel vkernel, int kernel_type = 2 ) {
if (kernel_type == 0 ) {
// Low possible values for filter coefficients, 7-tap kernel
hkernel[0 ] = hkernel[6 ] = vkernel[0 ] = vkernel[6 ] = WIENER_FILT_TAP0_MINV;
hkernel[1 ] = hkernel[5 ] = vkernel[1 ] = vkernel[5 ] = WIENER_FILT_TAP1_MINV;
hkernel[2 ] = hkernel[4 ] = vkernel[2 ] = vkernel[4 ] = WIENER_FILT_TAP2_MINV;
hkernel[3 ] = vkernel[3 ] = -2 * (hkernel[0 ] + hkernel[1 ] + hkernel[2 ]);
hkernel[7 ] = vkernel[7 ] = 0 ;
} else if (kernel_type == 1 ) {
// Max possible values for filter coefficients, 7-tap kernel
hkernel[0 ] = hkernel[6 ] = vkernel[0 ] = vkernel[6 ] = WIENER_FILT_TAP0_MAXV;
hkernel[1 ] = hkernel[5 ] = vkernel[1 ] = vkernel[5 ] = WIENER_FILT_TAP1_MAXV;
hkernel[2 ] = hkernel[4 ] = vkernel[2 ] = vkernel[4 ] = WIENER_FILT_TAP2_MAXV;
hkernel[3 ] = vkernel[3 ] = -2 * (hkernel[0 ] + hkernel[1 ] + hkernel[2 ]);
hkernel[7 ] = vkernel[7 ] = 0 ;
} else if (kernel_type == 2 ) {
// Randomly generated values for filter coefficients, 7-tap kernel
hkernel[0 ] = hkernel[6 ] =
WIENER_FILT_TAP0_MINV +
rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 1 - WIENER_FILT_TAP0_MINV);
hkernel[1 ] = hkernel[5 ] =
WIENER_FILT_TAP1_MINV +
rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
hkernel[2 ] = hkernel[4 ] =
WIENER_FILT_TAP2_MINV +
rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
hkernel[3 ] = -2 * (hkernel[0 ] + hkernel[1 ] + hkernel[2 ]);
hkernel[7 ] = 0 ;
vkernel[0 ] = vkernel[6 ] =
WIENER_FILT_TAP0_MINV +
rnd->PseudoUniform(WIENER_FILT_TAP0_MAXV + 2 - WIENER_FILT_TAP0_MINV);
vkernel[1 ] = vkernel[5 ] =
WIENER_FILT_TAP1_MINV +
rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 2 - WIENER_FILT_TAP1_MINV);
vkernel[2 ] = vkernel[4 ] =
WIENER_FILT_TAP2_MINV +
rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 2 - WIENER_FILT_TAP2_MINV);
vkernel[3 ] = -2 * (vkernel[0 ] + vkernel[1 ] + vkernel[2 ]);
vkernel[7 ] = 0 ;
} else if (kernel_type == 3 ) {
// Low possible values for filter coefficients, 5-tap kernel
hkernel[0 ] = hkernel[6 ] = vkernel[0 ] = vkernel[6 ] = 0 ;
hkernel[1 ] = hkernel[5 ] = vkernel[1 ] = vkernel[5 ] = WIENER_FILT_TAP1_MINV;
hkernel[2 ] = hkernel[4 ] = vkernel[2 ] = vkernel[4 ] = WIENER_FILT_TAP2_MINV;
hkernel[3 ] = vkernel[3 ] = -2 * (hkernel[0 ] + hkernel[1 ] + hkernel[2 ]);
hkernel[7 ] = vkernel[7 ] = 0 ;
} else if (kernel_type == 4 ) {
// Max possible values for filter coefficients, 5-tap kernel
hkernel[0 ] = hkernel[6 ] = vkernel[0 ] = vkernel[6 ] = 0 ;
hkernel[1 ] = hkernel[5 ] = vkernel[1 ] = vkernel[5 ] = WIENER_FILT_TAP1_MAXV;
hkernel[2 ] = hkernel[4 ] = vkernel[2 ] = vkernel[4 ] = WIENER_FILT_TAP2_MAXV;
hkernel[3 ] = vkernel[3 ] = -2 * (hkernel[0 ] + hkernel[1 ] + hkernel[2 ]);
hkernel[7 ] = vkernel[7 ] = 0 ;
} else {
// Randomly generated values for filter coefficients, 5-tap kernel
hkernel[0 ] = hkernel[6 ] = 0 ;
hkernel[1 ] = hkernel[5 ] =
WIENER_FILT_TAP1_MINV +
rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 1 - WIENER_FILT_TAP1_MINV);
hkernel[2 ] = hkernel[4 ] =
WIENER_FILT_TAP2_MINV +
rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 1 - WIENER_FILT_TAP2_MINV);
hkernel[3 ] = -2 * (hkernel[0 ] + hkernel[1 ] + hkernel[2 ]);
hkernel[7 ] = 0 ;
vkernel[0 ] = vkernel[6 ] = 0 ;
vkernel[1 ] = vkernel[5 ] =
WIENER_FILT_TAP1_MINV +
rnd->PseudoUniform(WIENER_FILT_TAP1_MAXV + 2 - WIENER_FILT_TAP1_MINV);
vkernel[2 ] = vkernel[4 ] =
WIENER_FILT_TAP2_MINV +
rnd->PseudoUniform(WIENER_FILT_TAP2_MAXV + 2 - WIENER_FILT_TAP2_MINV);
vkernel[3 ] = -2 * (vkernel[0 ] + vkernel[1 ] + vkernel[2 ]);
vkernel[7 ] = 0 ;
}
}
namespace AV1HiprecConvolve {
::testing::internal::ParamGenerator<HiprecConvolveParam> BuildParams(
hiprec_convolve_func filter) {
const HiprecConvolveParam params[] = {
make_tuple(8 , 8 , 50000 , filter), make_tuple(8 , 4 , 50000 , filter),
make_tuple(64 , 24 , 1000 , filter), make_tuple(64 , 64 , 1000 , filter),
make_tuple(64 , 56 , 1000 , filter), make_tuple(32 , 8 , 10000 , filter),
make_tuple(32 , 28 , 10000 , filter), make_tuple(32 , 32 , 10000 , filter),
make_tuple(16 , 34 , 10000 , filter), make_tuple(32 , 34 , 10000 , filter),
make_tuple(64 , 34 , 1000 , filter), make_tuple(8 , 17 , 10000 , filter),
make_tuple(16 , 17 , 10000 , filter), make_tuple(32 , 17 , 10000 , filter)
};
return ::testing::ValuesIn(params);
}
AV1HiprecConvolveTest::~AV1HiprecConvolveTest() = default ;
void AV1HiprecConvolveTest::SetUp() {
rnd_.Reset(ACMRandom::DeterministicSeed());
}
void AV1HiprecConvolveTest::RunCheckOutput(hiprec_convolve_func test_impl) {
const int w = 128 , h = 128 ;
const int out_w = GET_PARAM(0 ), out_h = GET_PARAM(1 );
const int num_iters = GET_PARAM(2 );
int i, j, k, m;
const WienerConvolveParams conv_params = get_conv_params_wiener(8 );
std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * w]);
ASSERT_NE(input_, nullptr);
uint8_t *input = input_.get();
// The AVX2 convolve functions always write rows with widths that are
// multiples of 16. So to avoid a buffer overflow, we may need to pad
// rows to a multiple of 16.
int output_n = ALIGN_POWER_OF_TWO(out_w, 4 ) * out_h;
std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
ASSERT_NE(output, nullptr);
std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]);
ASSERT_NE(output2, nullptr);
// Generate random filter kernels
DECLARE_ALIGNED(16 , InterpKernel, hkernel);
DECLARE_ALIGNED(16 , InterpKernel, vkernel);
for (int kernel_type = 0 ; kernel_type < 6 ; kernel_type++) {
generate_kernels(&rnd_, hkernel, vkernel, kernel_type);
for (i = 0 ; i < num_iters; ++i) {
for (k = 0 ; k < h; ++k)
for (m = 0 ; m < w; ++m) input[k * w + m] = rnd_.Rand8();
// Choose random locations within the source block
int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7 );
int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7 );
av1_wiener_convolve_add_src_c(input + offset_r * w + offset_c, w,
output.get(), out_w, hkernel, 16 , vkernel,
16 , out_w, out_h, &conv_params);
test_impl(input + offset_r * w + offset_c, w, output2.get(), out_w,
hkernel, 16 , vkernel, 16 , out_w, out_h, &conv_params);
for (j = 0 ; j < out_w * out_h; ++j)
ASSERT_EQ(output[j], output2[j])
<< "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
<< (j / out_w) << ") on iteration " << i;
}
}
}
void AV1HiprecConvolveTest::RunSpeedTest(hiprec_convolve_func test_impl) {
const int w = 128 , h = 128 ;
const int out_w = GET_PARAM(0 ), out_h = GET_PARAM(1 );
const int num_iters = GET_PARAM(2 ) / 500 ;
int i, j, k;
const WienerConvolveParams conv_params = get_conv_params_wiener(8 );
std::unique_ptr<uint8_t[]> input_(new (std::nothrow) uint8_t[h * w]);
ASSERT_NE(input_, nullptr);
uint8_t *input = input_.get();
// The AVX2 convolve functions always write rows with widths that are
// multiples of 16. So to avoid a buffer overflow, we may need to pad
// rows to a multiple of 16.
int output_n = ALIGN_POWER_OF_TWO(out_w, 4 ) * out_h;
std::unique_ptr<uint8_t[]> output(new (std::nothrow) uint8_t[output_n]);
ASSERT_NE(output, nullptr);
std::unique_ptr<uint8_t[]> output2(new (std::nothrow) uint8_t[output_n]);
ASSERT_NE(output2, nullptr);
// Generate random filter kernels
DECLARE_ALIGNED(16 , InterpKernel, hkernel);
DECLARE_ALIGNED(16 , InterpKernel, vkernel);
generate_kernels(&rnd_, hkernel, vkernel);
for (i = 0 ; i < h; ++i)
for (j = 0 ; j < w; ++j) input[i * w + j] = rnd_.Rand8();
aom_usec_timer ref_timer;
aom_usec_timer_start(&ref_timer);
for (i = 0 ; i < num_iters; ++i) {
for (j = 3 ; j < h - out_h - 4 ; j++) {
for (k = 3 ; k < w - out_w - 4 ; k++) {
av1_wiener_convolve_add_src_c(input + j * w + k, w, output.get(), out_w,
hkernel, 16 , vkernel, 16 , out_w, out_h,
&conv_params);
}
}
}
aom_usec_timer_mark(&ref_timer);
const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
aom_usec_timer tst_timer;
aom_usec_timer_start(&tst_timer);
for (i = 0 ; i < num_iters; ++i) {
for (j = 3 ; j < h - out_h - 4 ; j++) {
for (k = 3 ; k < w - out_w - 4 ; k++) {
test_impl(input + j * w + k, w, output2.get(), out_w, hkernel, 16 ,
vkernel, 16 , out_w, out_h, &conv_params);
}
}
}
aom_usec_timer_mark(&tst_timer);
const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
std::cout << "[ ] C time = " << ref_time / 1000
<< " ms, SIMD time = " << tst_time / 1000 << " ms\n" ;
EXPECT_GT(ref_time, tst_time)
<< "Error: AV1HiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
<< "C time: " << ref_time << " us\n"
<< "SIMD time: " << tst_time << " us\n" ;
}
} // namespace AV1HiprecConvolve
#if CONFIG_AV1_HIGHBITDEPTH
namespace AV1HighbdHiprecConvolve {
::testing::internal::ParamGenerator<HighbdHiprecConvolveParam> BuildParams(
highbd_hiprec_convolve_func filter) {
const HighbdHiprecConvolveParam params[] = {
make_tuple(8 , 8 , 50000 , 8 , filter), make_tuple(64 , 64 , 1000 , 8 , filter),
make_tuple(32 , 8 , 10000 , 8 , filter), make_tuple(8 , 8 , 50000 , 10 , filter),
make_tuple(64 , 64 , 1000 , 10 , filter), make_tuple(32 , 8 , 10000 , 10 , filter),
make_tuple(8 , 8 , 50000 , 12 , filter), make_tuple(64 , 64 , 1000 , 12 , filter),
make_tuple(32 , 8 , 10000 , 12 , filter),
};
return ::testing::ValuesIn(params);
}
AV1HighbdHiprecConvolveTest::~AV1HighbdHiprecConvolveTest() = default ;
void AV1HighbdHiprecConvolveTest::SetUp() {
rnd_.Reset(ACMRandom::DeterministicSeed());
}
void AV1HighbdHiprecConvolveTest::RunCheckOutput(
highbd_hiprec_convolve_func test_impl) {
const int w = 128 , h = 128 ;
const int out_w = GET_PARAM(0 ), out_h = GET_PARAM(1 );
const int num_iters = GET_PARAM(2 );
const int bd = GET_PARAM(3 );
int i, j;
const WienerConvolveParams conv_params = get_conv_params_wiener(bd);
std::unique_ptr<uint16_t[]> input(new (std::nothrow) uint16_t[h * w]);
ASSERT_NE(input, nullptr);
// The AVX2 convolve functions always write rows with widths that are
// multiples of 16. So to avoid a buffer overflow, we may need to pad
// rows to a multiple of 16.
int output_n = ALIGN_POWER_OF_TWO(out_w, 4 ) * out_h;
std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
ASSERT_NE(output, nullptr);
std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]);
ASSERT_NE(output2, nullptr);
// Generate random filter kernels
DECLARE_ALIGNED(16 , InterpKernel, hkernel);
DECLARE_ALIGNED(16 , InterpKernel, vkernel);
for (i = 0 ; i < h; ++i)
for (j = 0 ; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1 );
uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input.get());
uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output.get());
uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2.get());
for (int kernel_type = 0 ; kernel_type < 6 ; kernel_type++) {
generate_kernels(&rnd_, hkernel, vkernel, kernel_type);
for (i = 0 ; i < num_iters; ++i) {
// Choose random locations within the source block
int offset_r = 3 + rnd_.PseudoUniform(h - out_h - 7 );
int offset_c = 3 + rnd_.PseudoUniform(w - out_w - 7 );
av1_highbd_wiener_convolve_add_src_c(
input_ptr + offset_r * w + offset_c, w, output_ptr, out_w, hkernel,
16 , vkernel, 16 , out_w, out_h, &conv_params, bd);
test_impl(input_ptr + offset_r * w + offset_c, w, output2_ptr, out_w,
hkernel, 16 , vkernel, 16 , out_w, out_h, &conv_params, bd);
for (j = 0 ; j < out_w * out_h; ++j)
ASSERT_EQ(output[j], output2[j])
<< "Pixel mismatch at index " << j << " = (" << (j % out_w) << ", "
<< (j / out_w) << ") on iteration " << i;
}
}
}
void AV1HighbdHiprecConvolveTest::RunSpeedTest(
highbd_hiprec_convolve_func test_impl) {
const int w = 128 , h = 128 ;
const int out_w = GET_PARAM(0 ), out_h = GET_PARAM(1 );
const int num_iters = GET_PARAM(2 ) / 500 ;
const int bd = GET_PARAM(3 );
int i, j, k;
const WienerConvolveParams conv_params = get_conv_params_wiener(bd);
std::unique_ptr<uint16_t[]> input(new (std::nothrow) uint16_t[h * w]);
ASSERT_NE(input, nullptr);
// The AVX2 convolve functions always write rows with widths that are
// multiples of 16. So to avoid a buffer overflow, we may need to pad
// rows to a multiple of 16.
int output_n = ALIGN_POWER_OF_TWO(out_w, 4 ) * out_h;
std::unique_ptr<uint16_t[]> output(new (std::nothrow) uint16_t[output_n]);
ASSERT_NE(output, nullptr);
std::unique_ptr<uint16_t[]> output2(new (std::nothrow) uint16_t[output_n]);
ASSERT_NE(output2, nullptr);
// Generate random filter kernels
DECLARE_ALIGNED(16 , InterpKernel, hkernel);
DECLARE_ALIGNED(16 , InterpKernel, vkernel);
generate_kernels(&rnd_, hkernel, vkernel);
for (i = 0 ; i < h; ++i)
for (j = 0 ; j < w; ++j) input[i * w + j] = rnd_.Rand16() & ((1 << bd) - 1 );
uint8_t *input_ptr = CONVERT_TO_BYTEPTR(input.get());
uint8_t *output_ptr = CONVERT_TO_BYTEPTR(output.get());
uint8_t *output2_ptr = CONVERT_TO_BYTEPTR(output2.get());
aom_usec_timer ref_timer;
aom_usec_timer_start(&ref_timer);
for (i = 0 ; i < num_iters; ++i) {
for (j = 3 ; j < h - out_h - 4 ; j++) {
for (k = 3 ; k < w - out_w - 4 ; k++) {
av1_highbd_wiener_convolve_add_src_c(
input_ptr + j * w + k, w, output_ptr, out_w, hkernel, 16 , vkernel,
16 , out_w, out_h, &conv_params, bd);
}
}
}
aom_usec_timer_mark(&ref_timer);
const int64_t ref_time = aom_usec_timer_elapsed(&ref_timer);
aom_usec_timer tst_timer;
aom_usec_timer_start(&tst_timer);
for (i = 0 ; i < num_iters; ++i) {
for (j = 3 ; j < h - out_h - 4 ; j++) {
for (k = 3 ; k < w - out_w - 4 ; k++) {
test_impl(input_ptr + j * w + k, w, output2_ptr, out_w, hkernel, 16 ,
vkernel, 16 , out_w, out_h, &conv_params, bd);
}
}
}
aom_usec_timer_mark(&tst_timer);
const int64_t tst_time = aom_usec_timer_elapsed(&tst_timer);
std::cout << "[ ] C time = " << ref_time / 1000
<< " ms, SIMD time = " << tst_time / 1000 << " ms\n" ;
EXPECT_GT(ref_time, tst_time)
<< "Error: AV1HighbdHiprecConvolveTest.SpeedTest, SIMD slower than C.\n"
<< "C time: " << ref_time << " us\n"
<< "SIMD time: " << tst_time << " us\n" ;
}
} // namespace AV1HighbdHiprecConvolve
#endif // CONFIG_AV1_HIGHBITDEPTH
} // namespace libaom_test
Messung V0.5 in Prozent C=95 H=88 G=91
¤ Dauer der Verarbeitung: 0.12 Sekunden
(vorverarbeitet am 2026-06-06)
¤
*© Formatika GbR, Deutschland