/*
* Copyright (c) 2016, Alliance for Open Media. All rights reserved.
*
* This source code is subject to the terms of the BSD 2 Clause License and
* the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License
* was not distributed with this source code in the LICENSE file, you can
* obtain it at www.aomedia.org/license/software. If the Alliance for Open
* Media Patent License 1.0 was not distributed with this source code in the
* PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
#include <stddef.h>
#include <stdint.h>
#include "config/aom_config.h"
#include "config/aom_dsp_rtcd.h"
#include "aom_dsp/aom_dsp_common.h"
// The 2 unused parameters are place holders for PIC enabled build.
// These definitions are for functions defined in subpel_variance.asm
#define DECL(w, opt) \
int aom_sub_pixel_variance## w## xh_## opt( \
const uint8_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \
const uint8_t *dst, ptrdiff_t dst_stride, int height, unsigned int *sse, \
void *unused0, void *unused)
#define DECLS(opt) \
DECL(4 , opt); \
DECL(8 , opt); \
DECL(16 , opt)
DECLS(ssse3);
#undef DECLS
#undef DECL
#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \
unsigned int aom_sub_pixel_variance## w## x## h## _## opt( \
const uint8_t *src, int src_stride, int x_offset, int y_offset, \
const uint8_t *dst, int dst_stride, unsigned int *sse_ptr) { \
/*Avoid overflow in helper by capping height.*/ \
const int hf = AOMMIN(h, 64 ); \
unsigned int sse = 0 ; \
int se = 0 ; \
for (int i = 0 ; i < (w / wf); ++i) { \
const uint8_t *src_ptr = src; \
const uint8_t *dst_ptr = dst; \
for (int j = 0 ; j < (h / hf); ++j) { \
unsigned int sse2; \
const int se2 = aom_sub_pixel_variance## wf## xh_## opt( \
src_ptr, src_stride, x_offset, y_offset, dst_ptr, dst_stride, hf, \
&sse2, NULL, NULL); \
dst_ptr += hf * dst_stride; \
src_ptr += hf * src_stride; \
se += se2; \
sse += sse2; \
} \
src += wf; \
dst += wf; \
} \
*sse_ptr = sse; \
return sse - (unsigned int )(cast_prod(cast se * se) >> (wlog2 + hlog2)); \
}
#if !CONFIG_REALTIME_ONLY
#define FNS(opt) \
FN(128 , 128 , 16 , 7 , 7 , opt, (int64_t), (int64_t)) \
FN(128 , 64 , 16 , 7 , 6 , opt, (int64_t), (int64_t)) \
FN(64 , 128 , 16 , 6 , 7 , opt, (int64_t), (int64_t)) \
FN(64 , 64 , 16 , 6 , 6 , opt, (int64_t), (int64_t)) \
FN(64 , 32 , 16 , 6 , 5 , opt, (int64_t), (int64_t)) \
FN(32 , 64 , 16 , 5 , 6 , opt, (int64_t), (int64_t)) \
FN(32 , 32 , 16 , 5 , 5 , opt, (int64_t), (int64_t)) \
FN(32 , 16 , 16 , 5 , 4 , opt, (int64_t), (int64_t)) \
FN(16 , 32 , 16 , 4 , 5 , opt, (int64_t), (int64_t)) \
FN(16 , 16 , 16 , 4 , 4 , opt, (uint32_t), (int64_t)) \
FN(16 , 8 , 16 , 4 , 3 , opt, (int32_t), (int32_t)) \
FN(8 , 16 , 8 , 3 , 4 , opt, (int32_t), (int32_t)) \
FN(8 , 8 , 8 , 3 , 3 , opt, (int32_t), (int32_t)) \
FN(8 , 4 , 8 , 3 , 2 , opt, (int32_t), (int32_t)) \
FN(4 , 8 , 4 , 2 , 3 , opt, (int32_t), (int32_t)) \
FN(4 , 4 , 4 , 2 , 2 , opt, (int32_t), (int32_t)) \
FN(4 , 16 , 4 , 2 , 4 , opt, (int32_t), (int32_t)) \
FN(16 , 4 , 16 , 4 , 2 , opt, (int32_t), (int32_t)) \
FN(8 , 32 , 8 , 3 , 5 , opt, (uint32_t), (int64_t)) \
FN(32 , 8 , 16 , 5 , 3 , opt, (uint32_t), (int64_t)) \
FN(16 , 64 , 16 , 4 , 6 , opt, (int64_t), (int64_t)) \
FN(64 , 16 , 16 , 6 , 4 , opt, (int64_t), (int64_t))
#else
#define FNS(opt) \
FN(128 , 128 , 16 , 7 , 7 , opt, (int64_t), (int64_t)) \
FN(128 , 64 , 16 , 7 , 6 , opt, (int64_t), (int64_t)) \
FN(64 , 128 , 16 , 6 , 7 , opt, (int64_t), (int64_t)) \
FN(64 , 64 , 16 , 6 , 6 , opt, (int64_t), (int64_t)) \
FN(64 , 32 , 16 , 6 , 5 , opt, (int64_t), (int64_t)) \
FN(32 , 64 , 16 , 5 , 6 , opt, (int64_t), (int64_t)) \
FN(32 , 32 , 16 , 5 , 5 , opt, (int64_t), (int64_t)) \
FN(32 , 16 , 16 , 5 , 4 , opt, (int64_t), (int64_t)) \
FN(16 , 32 , 16 , 4 , 5 , opt, (int64_t), (int64_t)) \
FN(16 , 16 , 16 , 4 , 4 , opt, (uint32_t), (int64_t)) \
FN(16 , 8 , 16 , 4 , 3 , opt, (int32_t), (int32_t)) \
FN(8 , 16 , 8 , 3 , 4 , opt, (int32_t), (int32_t)) \
FN(8 , 8 , 8 , 3 , 3 , opt, (int32_t), (int32_t)) \
FN(8 , 4 , 8 , 3 , 2 , opt, (int32_t), (int32_t)) \
FN(4 , 8 , 4 , 2 , 3 , opt, (int32_t), (int32_t)) \
FN(4 , 4 , 4 , 2 , 2 , opt, (int32_t), (int32_t))
#endif
FNS(ssse3)
#undef FNS
#undef FN
// The 2 unused parameters are place holders for PIC enabled build.
#define DECL(w, opt) \
int aom_sub_pixel_avg_variance## w## xh_## opt( \
const uint8_t *src, ptrdiff_t src_stride, int x_offset, int y_offset, \
const uint8_t *dst, ptrdiff_t dst_stride, const uint8_t *sec, \
ptrdiff_t sec_stride, int height, unsigned int *sse, void *unused0, \
void *unused)
#define DECLS(opt) \
DECL(4 , opt); \
DECL(8 , opt); \
DECL(16 , opt)
DECLS(ssse3);
#undef DECL
#undef DECLS
#define FN(w, h, wf, wlog2, hlog2, opt, cast_prod, cast) \
unsigned int aom_sub_pixel_avg_variance## w## x## h## _## opt( \
const uint8_t *src, int src_stride, int x_offset, int y_offset, \
const uint8_t *dst, int dst_stride, unsigned int *sse_ptr, \
const uint8_t *sec) { \
/*Avoid overflow in helper by capping height.*/ \
const int hf = AOMMIN(h, 64 ); \
unsigned int sse = 0 ; \
int se = 0 ; \
for (int i = 0 ; i < (w / wf); ++i) { \
const uint8_t *src_ptr = src; \
const uint8_t *dst_ptr = dst; \
const uint8_t *sec_ptr = sec; \
for (int j = 0 ; j < (h / hf); ++j) { \
unsigned int sse2; \
const int se2 = aom_sub_pixel_avg_variance## wf## xh_## opt( \
src_ptr, src_stride, x_offset, y_offset, dst_ptr, dst_stride, \
sec_ptr, w, hf, &sse2, NULL, NULL); \
dst_ptr += hf * dst_stride; \
src_ptr += hf * src_stride; \
sec_ptr += hf * w; \
se += se2; \
sse += sse2; \
} \
src += wf; \
dst += wf; \
sec += wf; \
} \
*sse_ptr = sse; \
return sse - (unsigned int )(cast_prod(cast se * se) >> (wlog2 + hlog2)); \
}
#if !CONFIG_REALTIME_ONLY
#define FNS(opt) \
FN(128 , 128 , 16 , 7 , 7 , opt, (int64_t), (int64_t)) \
FN(128 , 64 , 16 , 7 , 6 , opt, (int64_t), (int64_t)) \
FN(64 , 128 , 16 , 6 , 7 , opt, (int64_t), (int64_t)) \
FN(64 , 64 , 16 , 6 , 6 , opt, (int64_t), (int64_t)) \
FN(64 , 32 , 16 , 6 , 5 , opt, (int64_t), (int64_t)) \
FN(32 , 64 , 16 , 5 , 6 , opt, (int64_t), (int64_t)) \
FN(32 , 32 , 16 , 5 , 5 , opt, (int64_t), (int64_t)) \
FN(32 , 16 , 16 , 5 , 4 , opt, (int64_t), (int64_t)) \
FN(16 , 32 , 16 , 4 , 5 , opt, (int64_t), (int64_t)) \
FN(16 , 16 , 16 , 4 , 4 , opt, (uint32_t), (int64_t)) \
FN(16 , 8 , 16 , 4 , 3 , opt, (uint32_t), (int32_t)) \
FN(8 , 16 , 8 , 3 , 4 , opt, (uint32_t), (int32_t)) \
FN(8 , 8 , 8 , 3 , 3 , opt, (uint32_t), (int32_t)) \
FN(8 , 4 , 8 , 3 , 2 , opt, (uint32_t), (int32_t)) \
FN(4 , 8 , 4 , 2 , 3 , opt, (uint32_t), (int32_t)) \
FN(4 , 4 , 4 , 2 , 2 , opt, (uint32_t), (int32_t)) \
FN(4 , 16 , 4 , 2 , 4 , opt, (int32_t), (int32_t)) \
FN(16 , 4 , 16 , 4 , 2 , opt, (int32_t), (int32_t)) \
FN(8 , 32 , 8 , 3 , 5 , opt, (uint32_t), (int64_t)) \
FN(32 , 8 , 16 , 5 , 3 , opt, (uint32_t), (int64_t)) \
FN(16 , 64 , 16 , 4 , 6 , opt, (int64_t), (int64_t)) \
FN(64 , 16 , 16 , 6 , 4 , opt, (int64_t), (int64_t))
#else
#define FNS(opt) \
FN(128 , 128 , 16 , 7 , 7 , opt, (int64_t), (int64_t)) \
FN(128 , 64 , 16 , 7 , 6 , opt, (int64_t), (int64_t)) \
FN(64 , 128 , 16 , 6 , 7 , opt, (int64_t), (int64_t)) \
FN(64 , 64 , 16 , 6 , 6 , opt, (int64_t), (int64_t)) \
FN(64 , 32 , 16 , 6 , 5 , opt, (int64_t), (int64_t)) \
FN(32 , 64 , 16 , 5 , 6 , opt, (int64_t), (int64_t)) \
FN(32 , 32 , 16 , 5 , 5 , opt, (int64_t), (int64_t)) \
FN(32 , 16 , 16 , 5 , 4 , opt, (int64_t), (int64_t)) \
FN(16 , 32 , 16 , 4 , 5 , opt, (int64_t), (int64_t)) \
FN(16 , 16 , 16 , 4 , 4 , opt, (uint32_t), (int64_t)) \
FN(16 , 8 , 16 , 4 , 3 , opt, (uint32_t), (int32_t)) \
FN(8 , 16 , 8 , 3 , 4 , opt, (uint32_t), (int32_t)) \
FN(8 , 8 , 8 , 3 , 3 , opt, (uint32_t), (int32_t)) \
FN(8 , 4 , 8 , 3 , 2 , opt, (uint32_t), (int32_t)) \
FN(4 , 8 , 4 , 2 , 3 , opt, (uint32_t), (int32_t)) \
FN(4 , 4 , 4 , 2 , 2 , opt, (uint32_t), (int32_t))
#endif
FNS(ssse3)
#undef FNS
#undef FN
Messung V0.5 in Prozent C=99 H=100 G=99