/* * Copyright (c) 2016, Alliance for Open Media. All rights reserved. * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License * was not distributed with this source code in the LICENSE file, you can * obtain it at www.aomedia.org/license/software. If the Alliance for Open * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
staticinlinevoid init_mv_cost_params(MV_COST_PARAMS *mv_cost_params, const MvCosts *mv_costs, const MV *ref_mv, int errorperbit, int sadperbit) {
mv_cost_params->ref_mv = ref_mv;
mv_cost_params->full_ref_mv = get_fullmv_from_mv(ref_mv);
mv_cost_params->mv_cost_type = MV_COST_ENTROPY;
mv_cost_params->error_per_bit = errorperbit;
mv_cost_params->sad_per_bit = sadperbit; // For allintra encoding mode, 'mv_costs' is not allocated. Hence, the // population of mvjcost and mvcost are avoided. In case of IntraBC, these // values are populated from 'dv_costs' in av1_set_ms_to_intra_mode(). if (mv_costs != NULL) {
mv_cost_params->mvjcost = mv_costs->nmv_joint_cost;
mv_cost_params->mvcost[0] = mv_costs->mv_cost_stack[0];
mv_cost_params->mvcost[1] = mv_costs->mv_cost_stack[1];
}
}
// If the absolute SAD difference computed between the pred-to-src of even // and odd rows is small, skip every other row in sad computation. constint odd_to_even_diff_sad =
abs((int)start_mv_sad_even_rows - (int)start_mv_sad_odd_rows); constint mult_thresh = 4; if (odd_to_even_diff_sad * mult_thresh < (int)start_mv_sad_even_rows) {
ms_params->sdf = ms_params->vfp->sdsf;
assert(ms_params->vfp->sdsx4df != NULL);
ms_params->sdx4df = ms_params->vfp->sdsx4df;
ms_params->sdx3df = ms_params->vfp->sdsx4df;
}
}
}
void av1_set_mv_search_range(FullMvLimits *mv_limits, const MV *mv) { // Calculate the outermost full-pixel MVs which are inside the limits set by // av1_set_subpel_mv_search_range(). // // The subpel limits are simply mv->col +/- 8*MAX_FULL_PEL_VAL, and similar // for mv->row. We can then divide by 8 to find the fullpel MV limits. But // we have to be careful about the rounding. We want these bounds to be // at least as tight as the subpel limits, which means that we must round // the minimum values up and the maximum values down when dividing. int col_min = ((mv->col + 7) >> 3) - MAX_FULL_PEL_VAL; int row_min = ((mv->row + 7) >> 3) - MAX_FULL_PEL_VAL; int col_max = (mv->col >> 3) + MAX_FULL_PEL_VAL; int row_max = (mv->row >> 3) + MAX_FULL_PEL_VAL;
// Get intersection of UMV window and valid MV window to reduce # of checks // in diamond search. if (mv_limits->col_min < col_min) mv_limits->col_min = col_min; if (mv_limits->col_max > col_max) mv_limits->col_max = col_max; if (mv_limits->row_min < row_min) mv_limits->row_min = row_min; if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
int av1_init_search_range(int size) { int sr = 0; // Minimum search size no matter what the passed in value.
size = AOMMAX(16, size);
while ((size << sr) < MAX_FULL_PEL_VAL) sr++;
sr = AOMMIN(sr, MAX_MVSEARCH_STEPS - 2); return sr;
}
// ============================================================================ // Cost of motion vectors // ============================================================================ // TODO(any): Adaptively adjust the regularization strength based on image size // and motion activity instead of using hard-coded values. It seems like we // roughly half the lambda for each increase in resolution // These are multiplier used to perform regularization in motion compensation // when x->mv_cost_type is set to MV_COST_L1. // LOWRES #define SSE_LAMBDA_LOWRES 2 // Used by mv_cost_err_fn #define SAD_LAMBDA_LOWRES 32 // Used by mvsad_err_cost during full pixel search // MIDRES #define SSE_LAMBDA_MIDRES 0 // Used by mv_cost_err_fn #define SAD_LAMBDA_MIDRES 15 // Used by mvsad_err_cost during full pixel search // HDRES #define SSE_LAMBDA_HDRES 1 // Used by mv_cost_err_fn #define SAD_LAMBDA_HDRES 8 // Used by mvsad_err_cost during full pixel search
// Returns the rate of encoding the current motion vector based on the // joint_cost and comp_cost. joint_costs covers the cost of transmitting // JOINT_MV, and comp_cost covers the cost of transmitting the actual motion // vector. staticinlineint mv_cost(const MV *mv, constint *joint_cost, constint *const comp_cost[2]) { return joint_cost[av1_get_mv_joint(mv)] + comp_cost[0][mv->row] +
comp_cost[1][mv->col];
}
#define CONVERT_TO_CONST_MVCOST(ptr) ((constint *const *)(ptr)) // Returns the cost of encoding the motion vector diff := *mv - *ref. The cost // is defined as the rate required to encode diff * weight, rounded to the // nearest 2 ** 7. // This is NOT used during motion compensation. int av1_mv_bit_cost(const MV *mv, const MV *ref_mv, constint *mvjcost, int *const mvcost[2], int weight) { const MV diff = { mv->row - ref_mv->row, mv->col - ref_mv->col }; return ROUND_POWER_OF_TWO(
mv_cost(&diff, mvjcost, CONVERT_TO_CONST_MVCOST(mvcost)) * weight, 7);
}
// Returns the cost of using the current mv during the motion search. This is // used when var is used as the error metric. #define PIXEL_TRANSFORM_ERROR_SCALE 4 staticinlineint mv_err_cost(const MV *mv, const MV *ref_mv, constint *mvjcost, constint *const mvcost[2], int error_per_bit, MV_COST_TYPE mv_cost_type) { const MV diff = { mv->row - ref_mv->row, mv->col - ref_mv->col }; const MV abs_diff = { abs(diff.row), abs(diff.col) };
// Returns the cost of using the current mv during the motion search. This is // only used during full pixel motion search when sad is used as the error // metric staticinlineint mvsad_err_cost(const FULLPEL_MV *mv, const FULLPEL_MV *ref_mv, constint *mvjcost, constint *const mvcost[2], int sad_per_bit, MV_COST_TYPE mv_cost_type) { const MV diff = { GET_MV_SUBPEL(mv->row - ref_mv->row),
GET_MV_SUBPEL(mv->col - ref_mv->col) };
// ============================================================================= // Fullpixel Motion Search: Translational // ============================================================================= #define MAX_PATTERN_SCALES 11 #define MAX_PATTERN_CANDIDATES 8 // max number of candidates per scale #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates
// Search site initialization for DIAMOND / CLAMPED_DIAMOND search methods. // level = 0: DIAMOND, level = 1: CLAMPED_DIAMOND. staticvoid init_dsmotion_compensation(search_site_config *cfg, int stride, int level) { int num_search_steps = 0; int stage_index = MAX_MVSEARCH_STEPS - 1;
// Calculates and returns a sad+mvcost list around an integer best pel during // fullpixel motion search. The resulting list can be used to speed up subpel // motion search later. #define USE_SAD_COSTLIST 1
// calc_int_cost_list uses var to populate the costlist, which is more accurate // than sad but slightly slower. static AOM_FORCE_INLINE void calc_int_cost_list( const FULLPEL_MV best_mv, const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, int *cost_list) { staticconst FULLPEL_MV neighbors[4] = {
{ 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 }
}; constint br = best_mv.row; constint bc = best_mv.col;
// Refresh the costlist it does not contain valid sad if (!costlist_has_sad) {
cost_list[0] = get_mvpred_sad(
ms_params, src, get_buf_from_fullmv(ref, &best_mv), ref_stride);
if (check_bounds(&ms_params->mv_limits, br, bc, 1)) { for (int i = 0; i < 4; i++) { const FULLPEL_MV this_mv = { br + neighbors[i].row,
bc + neighbors[i].col };
cost_list[i + 1] = get_mvpred_sad(
ms_params, src, get_buf_from_fullmv(ref, &this_mv), ref_stride);
}
} else { for (int i = 0; i < 4; i++) { const FULLPEL_MV this_mv = { br + neighbors[i].row,
bc + neighbors[i].col }; if (!av1_is_fullmv_in_range(&ms_params->mv_limits, this_mv)) {
cost_list[i + 1] = INT_MAX;
} else {
cost_list[i + 1] = get_mvpred_sad(
ms_params, src, get_buf_from_fullmv(ref, &this_mv), ref_stride);
}
}
}
}
// Computes motion vector cost and adds to the sad cost. // Then updates the best sad and motion vectors. // Inputs: // this_sad: the sad to be evaluated. // mv: the current motion vector. // mv_cost_params: a structure containing information to compute mv cost. // best_sad: the current best sad. // raw_best_sad (optional): the current best sad without calculating mv cost. // best_mv: the current best motion vector. // second_best_mv (optional): the second best motion vector up to now. // Modifies: // best_sad, raw_best_sad, best_mv, second_best_mv // If the current sad is lower than the current best sad. // Returns: // Whether the input sad (mv) is better than the current best. staticinlineint update_mvs_and_sad(constunsignedint this_sad, const FULLPEL_MV *mv, const MV_COST_PARAMS *mv_cost_params, unsignedint *best_sad, unsignedint *raw_best_sad,
FULLPEL_MV *best_mv,
FULLPEL_MV *second_best_mv) { if (this_sad >= *best_sad) return 0;
// Add the motion vector cost. constunsignedint sad = this_sad + mvsad_err_cost_(mv, mv_cost_params); if (sad < *best_sad) { if (raw_best_sad) *raw_best_sad = this_sad;
*best_sad = sad; if (second_best_mv) *second_best_mv = *best_mv;
*best_mv = *mv; return 1;
} return 0;
}
// Calculate sad4 and update the bestmv information // in FAST_DIAMOND search method. staticinlinevoid calc_sad4_update_bestmv( const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const MV_COST_PARAMS *mv_cost_params, FULLPEL_MV *best_mv, const FULLPEL_MV center_mv, const uint8_t *center_address, unsignedint *bestsad, unsignedint *raw_bestsad, int search_step, int *best_site, int cand_start, int *cost_list) { conststruct buf_2d *const src = ms_params->ms_buffers.src; conststruct buf_2d *const ref = ms_params->ms_buffers.ref; const search_site *site = ms_params->search_sites->site[search_step];
// Calculate sad and update the bestmv information // in FAST_DIAMOND search method. staticinlinevoid calc_sad_update_bestmv( const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const MV_COST_PARAMS *mv_cost_params, FULLPEL_MV *best_mv, const FULLPEL_MV center_mv, const uint8_t *center_address, unsignedint *bestsad, unsignedint *raw_bestsad, int search_step, int *best_site, constint num_candidates, int cand_start, int *cost_list) { conststruct buf_2d *const src = ms_params->ms_buffers.src; conststruct buf_2d *const ref = ms_params->ms_buffers.ref; const search_site *site = ms_params->search_sites->site[search_step]; // Loop over number of candidates. for (int i = cand_start; i < num_candidates; i++) { const FULLPEL_MV this_mv = { center_mv.row + site[i].mv.row,
center_mv.col + site[i].mv.col }; if (!av1_is_fullmv_in_range(&ms_params->mv_limits, this_mv)) continue; int thissad = get_mvpred_sad(ms_params, src,
center_address + site[i].offset, ref->stride); if (cost_list) {
cost_list[i + 1] = thissad;
} constint found_better_mv = update_mvs_and_sad(
thissad, &this_mv, mv_cost_params, bestsad, raw_bestsad, best_mv, /*second_best_mv=*/NULL); if (found_better_mv) *best_site = i;
}
}
staticinlinevoid calc_sad_update_bestmv_with_indices( const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const MV_COST_PARAMS *mv_cost_params, FULLPEL_MV *best_mv, const FULLPEL_MV center_mv, const uint8_t *center_address, unsignedint *bestsad, unsignedint *raw_bestsad, int search_step, int *best_site, constint num_candidates, constint *chkpts_indices, int *cost_list) { conststruct buf_2d *const src = ms_params->ms_buffers.src; conststruct buf_2d *const ref = ms_params->ms_buffers.ref; const search_site *site = ms_params->search_sites->site[search_step]; // Loop over number of candidates. for (int i = 0; i < num_candidates; i++) { int index = chkpts_indices[i]; const FULLPEL_MV this_mv = { center_mv.row + site[index].mv.row,
center_mv.col + site[index].mv.col }; if (!av1_is_fullmv_in_range(&ms_params->mv_limits, this_mv)) { if (cost_list) {
cost_list[index + 1] = INT_MAX;
} continue;
} constint thissad = get_mvpred_sad(
ms_params, src, center_address + site[index].offset, ref->stride); if (cost_list) {
cost_list[index + 1] = thissad;
} constint found_better_mv = update_mvs_and_sad(
thissad, &this_mv, mv_cost_params, bestsad, raw_bestsad, best_mv, /*second_best_mv=*/NULL); if (found_better_mv) *best_site = i;
}
}
// Generic pattern search function that searches over multiple scales. // Each scale can have a different number of candidates and shape of // candidates as indicated in the num_candidates and candidates arrays // passed into this function staticint pattern_search(FULLPEL_MV start_mv, const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, int search_step, constint do_init_search, int *cost_list, FULLPEL_MV *best_mv,
FULLPEL_MV_STATS *best_mv_stats) { staticconstint search_steps[MAX_MVSEARCH_STEPS] = {
10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
}; int i, s, t;
conststruct buf_2d *const src = ms_params->ms_buffers.src; conststruct buf_2d *const ref = ms_params->ms_buffers.ref; const search_site_config *search_sites = ms_params->search_sites; constint *num_candidates = search_sites->searches_per_step; constint ref_stride = ref->stride; constint last_is_4 = num_candidates[0] == 4; int br, bc; unsignedint bestsad = UINT_MAX, raw_bestsad = UINT_MAX; int k = -1; const MV_COST_PARAMS *mv_cost_params = &ms_params->mv_cost_params;
search_step = AOMMIN(search_step, MAX_MVSEARCH_STEPS - 1);
assert(search_step >= 0); int best_init_s = search_steps[search_step]; // adjust ref_mv to make sure it is within MV range
clamp_fullmv(&start_mv, &ms_params->mv_limits);
br = start_mv.row;
bc = start_mv.col; if (cost_list != NULL) {
cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
INT_MAX;
} int costlist_has_sad = 0;
// Work out the start point for the search
raw_bestsad = get_mvpred_sad(ms_params, src,
get_buf_from_fullmv(ref, &start_mv), ref_stride);
bestsad = raw_bestsad + mvsad_err_cost_(&start_mv, mv_cost_params);
// Search all possible scales up to the search param around the center point // pick the scale of the point that is best as the starting scale of // further steps around it. const uint8_t *center_address = get_buf_from_fullmv(ref, &start_mv); if (do_init_search) {
s = best_init_s;
best_init_s = -1; for (t = 0; t <= s; ++t) { int best_site = -1;
FULLPEL_MV center_mv = { br, bc }; if (check_bounds(&ms_params->mv_limits, br, bc, 1 << t)) { // Call 4-point sad for multiples of 4 candidates. constint no_of_4_cand_loops = num_candidates[t] >> 2; for (i = 0; i < no_of_4_cand_loops; i++) {
calc_sad4_update_bestmv(ms_params, mv_cost_params, best_mv, center_mv,
center_address, &bestsad, &raw_bestsad, t,
&best_site, i * 4, /*cost_list=*/NULL);
} // Rest of the candidates constint remaining_cand = num_candidates[t] % 4;
calc_sad_update_bestmv(ms_params, mv_cost_params, best_mv, center_mv,
center_address, &bestsad, &raw_bestsad, t,
&best_site, remaining_cand,
no_of_4_cand_loops * 4, NULL);
} else {
calc_sad_update_bestmv(ms_params, mv_cost_params, best_mv, center_mv,
center_address, &bestsad, &raw_bestsad, t,
&best_site, num_candidates[t], 0, NULL);
} if (best_site == -1) { continue;
} else {
best_init_s = t;
k = best_site;
}
} if (best_init_s != -1) {
br += search_sites->site[best_init_s][k].mv.row;
bc += search_sites->site[best_init_s][k].mv.col;
center_address += search_sites->site[best_init_s][k].offset;
}
}
// If the center point is still the best, just skip this and move to // the refinement step. if (best_init_s != -1) { constint last_s = (last_is_4 && cost_list != NULL); int best_site = -1;
s = best_init_s;
for (; s >= last_s; s--) { // No need to search all points the 1st time if initial search was used if (!do_init_search || s != best_init_s) {
FULLPEL_MV center_mv = { br, bc }; if (check_bounds(&ms_params->mv_limits, br, bc, 1 << s)) { // Call 4-point sad for multiples of 4 candidates. constint no_of_4_cand_loops = num_candidates[s] >> 2; for (i = 0; i < no_of_4_cand_loops; i++) {
calc_sad4_update_bestmv(ms_params, mv_cost_params, best_mv,
center_mv, center_address, &bestsad,
&raw_bestsad, s, &best_site, i * 4, /*cost_list=*/NULL);
} // Rest of the candidates constint remaining_cand = num_candidates[s] % 4;
calc_sad_update_bestmv(ms_params, mv_cost_params, best_mv, center_mv,
center_address, &bestsad, &raw_bestsad, s,
&best_site, remaining_cand,
no_of_4_cand_loops * 4, NULL);
} else {
calc_sad_update_bestmv(ms_params, mv_cost_params, best_mv, center_mv,
center_address, &bestsad, &raw_bestsad, s,
&best_site, num_candidates[s], 0, NULL);
}
if (best_site == -1) { continue;
} else {
br += search_sites->site[s][best_site].mv.row;
bc += search_sites->site[s][best_site].mv.col;
center_address += search_sites->site[s][best_site].offset;
k = best_site;
}
}
do { int next_chkpts_indices[PATTERN_CANDIDATES_REF];
best_site = -1;
next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
next_chkpts_indices[1] = k;
next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
if (best_site != -1) {
k = next_chkpts_indices[best_site];
br += search_sites->site[s][k].mv.row;
bc += search_sites->site[s][k].mv.col;
center_address += search_sites->site[s][k].offset;
}
}
}
}
best_mv->row = br;
best_mv->col = bc;
assert(center_address == get_buf_from_fullmv(ref, best_mv) && "center address is out of sync with best_mv!\n");
// Returns the one-away integer pel cost/sad around the best as follows: // cost_list[0]: cost/sad at the best integer pel // cost_list[1]: cost/sad at delta {0, -1} (left) from the best integer pel // cost_list[2]: cost/sad at delta { 1, 0} (bottom) from the best integer pel // cost_list[3]: cost/sad at delta { 0, 1} (right) from the best integer pel // cost_list[4]: cost/sad at delta {-1, 0} (top) from the best integer pel if (cost_list) { if (USE_SAD_COSTLIST) {
calc_int_sad_list(*best_mv, ms_params, cost_list, costlist_has_sad);
} else {
calc_int_cost_list(*best_mv, ms_params, cost_list);
}
}
// For the following foo_search, the input arguments are: // start_mv: where we are starting our motion search // ms_params: a collection of motion search parameters // search_step: how many steps to skip in our motion search. For example, // a value 3 suggests that 3 search steps have already taken place prior to // this function call, so we jump directly to step 4 of the search process // do_init_search: if on, do an initial search of all possible scales around the // start_mv, and then pick the best scale. // cond_list: used to hold the cost around the best full mv so we can use it to // speed up subpel search later. // best_mv: the best mv found in the motion search staticint hex_search(const FULLPEL_MV start_mv, const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, constint search_step, constint do_init_search, int *cost_list, FULLPEL_MV *best_mv,
FULLPEL_MV_STATS *best_mv_stats) { return pattern_search(start_mv, ms_params, search_step, do_init_search,
cost_list, best_mv, best_mv_stats);
}
int is_off_center = 0; // Number of times that we have stayed in the middle. This is used to skip // search steps in the future if diamond_search_sad is called again. int num_center_steps = 0;
// search_step determines the length of the initial step and hence the number // of iterations. constint tot_steps = cfg->num_search_steps - search_step;
FULLPEL_MV tmp_second_best_mv; if (second_best_mv) {
tmp_second_best_mv = *second_best_mv;
}
*best_mv = start_mv;
// Check the starting position const uint8_t *best_address = get_buf_from_fullmv(ref, &start_mv); unsignedint bestsad = start_mv_sad;
// TODO(chiyotsai@google.com): Implement 4 points search for msdf&sdaf if (ms_params->ms_buffers.second_pred) { for (int step = tot_steps - 1; step >= 0; --step) { const search_site *site = cfg->site[step]; constint num_searches = cfg->searches_per_step[step]; int best_site = 0;
int bestsme = get_mvpred_compound_var_cost(ms_params, best_mv, best_mv_stats);
// If there won't be more n-step search, check to see if refining search is // needed. constint further_steps = cfg->num_search_steps - 1 - step_param; while (n < further_steps) {
++n;
// TODO(chiyotsai@google.com): There is another bug here where the second // best mv gets incorrectly overwritten. Fix it later.
FULLPEL_MV tmp_best_mv;
FULLPEL_MV_STATS tmp_best_mv_stats;
diamond_search_sad(start_mv, start_mv_sad, ms_params, step_param + n,
&num00, &tmp_best_mv, second_best_mv);
for (r = start_row; r <= end_row; r += step) { for (c = start_col; c <= end_col; c += col_step) { // Step > 1 means we are not checking every location in this pass. if (step > 1) { const FULLPEL_MV mv = { start_mv.row + r, start_mv.col + c }; unsignedint sad = get_mvpred_sad(
ms_params, src, get_buf_from_fullmv(ref, &mv), ref_stride);
update_mvs_and_sad(sad, &mv, mv_cost_params, &best_sad, /*raw_best_sad=*/NULL, best_mv, second_best_mv);
} else { // 4 sads in a single call if we are checking every location if (c + 3 <= end_col) { unsignedint sads[4]; const uint8_t *addrs[4]; for (i = 0; i < 4; ++i) { const FULLPEL_MV mv = { start_mv.row + r, start_mv.col + c + i };
addrs[i] = get_buf_from_fullmv(ref, &mv);
}
for (i = 0; i < 4; ++i) { if (sads[i] < best_sad) { const FULLPEL_MV mv = { start_mv.row + r, start_mv.col + c + i };
update_mvs_and_sad(sads[i], &mv, mv_cost_params, &best_sad, /*raw_best_sad=*/NULL, best_mv,
second_best_mv);
}
}
} else { for (i = 0; i < end_col - c; ++i) { const FULLPEL_MV mv = { start_mv.row + r, start_mv.col + c + i }; unsignedint sad = get_mvpred_sad(
ms_params, src, get_buf_from_fullmv(ref, &mv), ref_stride);
update_mvs_and_sad(sad, &mv, mv_cost_params, &best_sad, /*raw_best_sad=*/NULL, best_mv, second_best_mv);
}
}
}
}
}
return best_sad;
}
// Runs an limited range exhaustive mesh search using a pattern set // according to the encode speed profile. staticint full_pixel_exhaustive(const FULLPEL_MV start_mv, const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, conststruct MESH_PATTERN *const mesh_patterns, int *cost_list, FULLPEL_MV *best_mv,
FULLPEL_MV_STATS *mv_stats,
FULLPEL_MV *second_best_mv) { constint kMinRange = 7; constint kMaxRange = 256; constint kMinInterval = 1;
int bestsme; int i; int interval = mesh_patterns[0].interval; int range = mesh_patterns[0].range; int baseline_interval_divisor;
// TODO(chiyotsai@google.com): Currently exhaustive search calls single ref // version of sad and variance function. We still need to check the // performance when compound ref exhaustive search is enabled.
assert(!ms_params->ms_buffers.second_pred && "Mesh search does not support compound mode!");
*best_mv = start_mv;
// Trap illegal values for interval and range for this function. if ((range < kMinRange) || (range > kMaxRange) || (interval < kMinInterval) ||
(interval > range)) return INT_MAX;
baseline_interval_divisor = range / interval;
// Check size of proposed first range against magnitude of the centre // value used as a starting point.
range = AOMMAX(range, (5 * AOMMAX(abs(best_mv->row), abs(best_mv->col))) / 4);
range = AOMMIN(range, kMaxRange);
interval = AOMMAX(interval, range / baseline_interval_divisor); // Use a small search step/interval for certain kind of clips. // For example, screen content clips with a lot of texts. // Large interval could lead to a false matching position, and it can't find // the best global candidate in following iterations due to reduced search // range. The solution here is to use a small search iterval in the beginning // and thus reduces the chance of missing the best candidate. if (ms_params->fine_search_interval) {
interval = AOMMIN(interval, 4);
}
if ((interval > kMinInterval) && (range > kMinRange)) { // Progressive searches with range and step size decreasing each time // till we reach a step size of 1. Then break out. for (i = 1; i < MAX_MESH_STEP; ++i) { // First pass with coarser step and longer range
bestsme = exhaustive_mesh_search(
*best_mv, ms_params, mesh_patterns[i].range,
mesh_patterns[i].interval, best_mv, second_best_mv);
switch (search_method) { case FAST_BIGDIA:
var = fast_bigdia_search(start_mv, ms_params, step_param, 0, cost_list,
best_mv, best_mv_stats); break; case VFAST_DIAMOND:
var = vfast_dia_search(start_mv, ms_params, step_param, 0, cost_list,
best_mv, best_mv_stats); break; case FAST_DIAMOND:
var = fast_dia_search(start_mv, ms_params, step_param, 0, cost_list,
best_mv, best_mv_stats); break; case FAST_HEX:
var = fast_hex_search(start_mv, ms_params, step_param, 0, cost_list,
best_mv, best_mv_stats); break; case HEX:
var = hex_search(start_mv, ms_params, step_param, 1, cost_list, best_mv,
best_mv_stats); break; case SQUARE:
var = square_search(start_mv, ms_params, step_param, 1, cost_list,
best_mv, best_mv_stats); break; case BIGDIA:
var = bigdia_search(start_mv, ms_params, step_param, 1, cost_list,
best_mv, best_mv_stats); break; case NSTEP: case NSTEP_8PT: case DIAMOND: case CLAMPED_DIAMOND:
var = full_pixel_diamond(start_mv, ms_params, step_param, cost_list,
best_mv, best_mv_stats, second_best_mv); break; default: assert(0 && "Invalid search method.");
}
// Should we allow a follow on exhaustive search? if (!run_mesh_search &&
((search_method == NSTEP) || (search_method == NSTEP_8PT)) &&
!ms_params->ms_buffers.second_pred) { int exhaustive_thr = ms_params->force_mesh_thresh;
exhaustive_thr >>=
10 - (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]); // Threshold variance for an exhaustive full search. if (var > exhaustive_thr) run_mesh_search = 1;
}
// TODO(yunqing): the following is used to reduce mesh search in temporal // filtering. Can extend it to intrabc. if (!is_intra_mode && ms_params->prune_mesh_search) { constint full_pel_mv_diff = AOMMAX(abs(start_mv.row - best_mv->row),
abs(start_mv.col - best_mv->col)); if (full_pel_mv_diff <= ms_params->mesh_search_mv_diff_threshold) {
run_mesh_search = 0;
}
}
if (ms_params->sdf != ms_params->vfp->sdf) { // If we are skipping rows when we perform the motion search, we need to // check the quality of skipping. If it's bad, then we run mesh search with // skip row features off. // TODO(chiyotsai@google.com): Handle the case where we have a vertical // offset of 1 before we hit this statement to avoid having to redo // motion search. conststruct buf_2d *src = ms_params->ms_buffers.src; conststruct buf_2d *ref = ms_params->ms_buffers.ref; constint src_stride = src->stride; constint ref_stride = ref->stride;
const uint8_t *src_address = src->buf; const uint8_t *best_address = get_buf_from_fullmv(ref, best_mv); constint sad =
ms_params->vfp->sdf(src_address, src_stride, best_address, ref_stride); constint skip_sad =
ms_params->vfp->sdsf(src_address, src_stride, best_address, ref_stride); // We will keep the result of skipping rows if it's good enough. Here, good // enough means the error is less than 1 per pixel. constint kSADThresh =
1 << (mi_size_wide_log2[bsize] + mi_size_high_log2[bsize]); if (sad > kSADThresh && abs(skip_sad - sad) * 10 >= AOMMAX(sad, 1) * 9) { // There is a large discrepancy between skipping and not skipping, so we // need to redo the motion search.
FULLPEL_MOTION_SEARCH_PARAMS new_ms_params = *ms_params;
new_ms_params.sdf = new_ms_params.vfp->sdf;
new_ms_params.sdx4df = new_ms_params.vfp->sdx4df;
new_ms_params.sdx3df = new_ms_params.vfp->sdx3df;
if (run_mesh_search) { int var_ex;
FULLPEL_MV tmp_mv_ex;
FULLPEL_MV_STATS tmp_mv_stats; // Pick the mesh pattern for exhaustive search based on the toolset (intraBC // or non-intraBC) // TODO(chiyotsai@google.com): There is a bug here where the second best mv // gets overwritten without actually comparing the rdcost. const MESH_PATTERN *const mesh_patterns =
ms_params->mesh_patterns[is_intra_mode]; // TODO(chiyotsai@google.com): the second best mv is not set correctly by // full_pixel_exhaustive, which can incorrectly override it.
var_ex =
full_pixel_exhaustive(*best_mv, ms_params, mesh_patterns, cost_list,
&tmp_mv_ex, &tmp_mv_stats, second_best_mv); if (var_ex < var) {
var = var_ex;
*best_mv_stats = tmp_mv_stats;
*best_mv = tmp_mv_ex;
}
}
return var;
}
int av1_intrabc_hash_search(const AV1_COMP *cpi, const MACROBLOCKD *xd, const FULLPEL_MOTION_SEARCH_PARAMS *ms_params,
IntraBCHashInfo *intrabc_hash_info,
FULLPEL_MV *best_mv) { if (!av1_use_hash_me(cpi)) return INT_MAX;
int av1_vector_match(const int16_t *ref, const int16_t *src, int bwl, int search_size, int full_search, int *sad) { int best_sad = INT_MAX; int this_sad; int d; int center, offset = 0; int bw = search_size << 1;
if (full_search) { for (d = 0; d <= bw; d++) {
this_sad = aom_vector_var(&ref[d], src, bwl); if (this_sad < best_sad) {
best_sad = this_sad;
offset = d;
}
}
center = offset;
*sad = best_sad; return (center - (bw >> 1));
}
for (d = 0; d <= bw; d += 16) {
this_sad = aom_vector_var(&ref[d], src, bwl); if (this_sad < best_sad) {
best_sad = this_sad;
offset = d;
}
}
center = offset;
for (d = -8; d <= 8; d += 16) { int this_pos = offset + d; // check limit if (this_pos < 0 || this_pos > bw) continue;
this_sad = aom_vector_var(&ref[this_pos], src, bwl); if (this_sad < best_sad) {
best_sad = this_sad;
center = this_pos;
}
}
offset = center;
for (d = -4; d <= 4; d += 8) { int this_pos = offset + d; // check limit if (this_pos < 0 || this_pos > bw) continue;
this_sad = aom_vector_var(&ref[this_pos], src, bwl); if (this_sad < best_sad) {
best_sad = this_sad;
center = this_pos;
}
}
offset = center;
for (d = -2; d <= 2; d += 4) { int this_pos = offset + d; // check limit if (this_pos < 0 || this_pos > bw) continue;
this_sad = aom_vector_var(&ref[this_pos], src, bwl); if (this_sad < best_sad) {
best_sad = this_sad;
center = this_pos;
}
}
offset = center;
for (d = -1; d <= 1; d += 2) { int this_pos = offset + d; // check limit if (this_pos < 0 || this_pos > bw) continue;
this_sad = aom_vector_var(&ref[this_pos], src, bwl); if (this_sad < best_sad) {
best_sad = this_sad;
center = this_pos;
}
}
*sad = best_sad; return (center - (bw >> 1));
}
// A special fast version of motion search used in rt mode. // The search window along columns and row is given by: // +/- me_search_size_col/row. unsignedint av1_int_pro_motion_estimation(const AV1_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, int mi_row, int mi_col, const MV *ref_mv, unsignedint *y_sad_zero, int me_search_size_col, int me_search_size_row) { const AV1_COMMON *const cm = &cpi->common;
MACROBLOCKD *xd = &x->e_mbd;
MB_MODE_INFO *mi = xd->mi[0]; struct buf_2d backup_yv12[MAX_MB_PLANE] = { { 0, 0, 0, 0, 0 } }; int idx; constint bw = block_size_wide[bsize]; constint bh = block_size_high[bsize]; constint is_screen = cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN; constint full_search = is_screen; constbool screen_scroll_superblock =
is_screen && bsize == cm->seq_params->sb_size; // Keep border a multiple of 16. constint border = (cpi->oxcf.border_in_pixels >> 4) << 4; int search_size_width = me_search_size_col; int search_size_height = me_search_size_row; // Adjust based on boundary. if (((mi_col << 2) - search_size_width < -border) ||
((mi_col << 2) + search_size_width > cm->width + border))
search_size_width = border; if (((mi_row << 2) - search_size_height < -border) ||
((mi_row << 2) + search_size_height > cm->height + border))
search_size_height = border; constint src_stride = x->plane[0].src.stride; constint ref_stride = xd->plane[0].pre[0].stride;
uint8_t const *ref_buf, *src_buf;
int_mv *best_int_mv = &xd->mi[0]->mv[0]; unsignedint best_sad, tmp_sad, this_sad[4]; int best_sad_col, best_sad_row; constint row_norm_factor = mi_size_high_log2[bsize] + 1; constint col_norm_factor = 3 + (bw >> 5); const YV12_BUFFER_CONFIG *scaled_ref_frame =
av1_get_scaled_ref_frame(cpi, mi->ref_frame[0]); staticconst MV search_pos[4] = {
{ -1, 0 },
{ 0, -1 },
{ 0, 1 },
{ 1, 0 },
};
if (scaled_ref_frame) { int i; // Swap out the reference frame for a version that's been scaled to // match the resolution of the current frame, allowing the existing // motion search code to be used without additional modifications. for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
av1_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL,
MAX_MB_PLANE);
}
// Set up prediction 1-D reference set for rows.
ref_buf = xd->plane[0].pre[0].buf - search_size_width;
aom_int_pro_row(hbuf, ref_buf, ref_stride, width_ref_buf, bh,
row_norm_factor);
// Set up prediction 1-D reference set for cols
ref_buf = xd->plane[0].pre[0].buf - search_size_height * ref_stride;
aom_int_pro_col(vbuf, ref_buf, ref_stride, bw, height_ref_buf,
col_norm_factor);
// Set up src 1-D reference set
src_buf = x->plane[0].src.buf;
aom_int_pro_row(src_hbuf, src_buf, src_stride, bw, bh, row_norm_factor);
aom_int_pro_col(src_vbuf, src_buf, src_stride, bw, bh, col_norm_factor);
// Find the best match per 1-D search
best_int_mv->as_fullmv.col =
av1_vector_match(hbuf, src_hbuf, mi_size_wide_log2[bsize],
search_size_width, full_search, &best_sad_col);
best_int_mv->as_fullmv.row =
av1_vector_match(vbuf, src_vbuf, mi_size_high_log2[bsize],
search_size_height, full_search, &best_sad_row);
// For screen: select between horiz or vert motion. if (is_screen) { if (best_sad_col < best_sad_row)
best_int_mv->as_fullmv.row = 0; else
best_int_mv->as_fullmv.col = 0;
}
// search_step determines the length of the initial step and hence the number // of iterations. constint tot_steps = cfg->num_search_steps - search_step; const uint8_t *best_address, *init_ref; int best_sad = INT_MAX; int best_site = 0;
staticint obmc_full_pixel_diamond( const FULLPEL_MOTION_SEARCH_PARAMS *ms_params, const FULLPEL_MV start_mv, int step_param, FULLPEL_MV *best_mv) { const search_site_config *cfg = ms_params->search_sites;
FULLPEL_MV tmp_mv; int thissme, n, num00 = 0; int bestsme =
obmc_diamond_search_sad(ms_params, start_mv, &tmp_mv, step_param, &n); if (bestsme < INT_MAX) bestsme = get_obmc_mvpred_var(ms_params, &tmp_mv);
*best_mv = tmp_mv;
// If there won't be more n-step search, check to see if refining search is // needed. constint further_steps = cfg->num_search_steps - 1 - step_param;
while (n < further_steps) {
++n;
if (num00) {
num00--;
} else {
thissme = obmc_diamond_search_sad(ms_params, start_mv, &tmp_mv,
step_param + n, &num00); if (thissme < INT_MAX) thissme = get_obmc_mvpred_var(ms_params, &tmp_mv);
// ============================================================================= // Subpixel Motion Search: Translational // ============================================================================= #define INIT_SUBPEL_STEP_SIZE (4) /* * To avoid the penalty for crossing cache-line read, preload the reference * area in a small buffer, which is aligned to make sure there won't be crossing * cache-line read while reading from this buffer. This reduced the cpu * cycles spent on reading ref data in sub-pixel filter functions. * TODO: Currently, since sub-pixel search range here is -3 ~ 3, copy 22 rows x * 32 cols area that is enough for 16x16 macroblock. Later, for SPLITMV, we * could reduce the area.
*/
// Returns the subpel offset used by various subpel variance functions [m]sv[a]f staticinlineint get_subpel_part(int x) { return x & 7; }
// Gets the address of the ref buffer at subpel location (r, c), rounded to the // nearest fullpel precision toward - \infty staticinlineconst uint8_t *get_buf_from_mv(conststruct buf_2d *buf, const MV mv) { constint offset = (mv.row >> 3) * buf->stride + (mv.col >> 3); return &buf->buf[offset];
}
// Estimates the variance of prediction residue using bilinear filter for fast // search. staticinlineint estimated_pref_error( const MV *this_mv, const SUBPEL_SEARCH_VAR_PARAMS *var_params, unsignedint *sse) { const aom_variance_fn_ptr_t *vfp = var_params->vfp;
// Searches the four cardinal direction for a better mv, then follows up with a // search in the best quadrant. This uses bilinear filter to speed up the // calculation. static AOM_FORCE_INLINE MV first_level_check_fast(
MACROBLOCKD *xd, const AV1_COMMON *cm, const MV this_mv, MV *best_mv, int hstep, const SubpelMvLimits *mv_limits, const SUBPEL_SEARCH_VAR_PARAMS *var_params, const MV_COST_PARAMS *mv_cost_params, unsignedint *besterr, unsignedint *sse1, int *distortion, int is_scaled) { // Check the four cardinal directions const MV left_mv = { this_mv.row, this_mv.col - hstep }; int dummy = 0; constunsignedint left = check_better_fast(
xd, cm, &left_mv, best_mv, mv_limits, var_params, mv_cost_params, besterr,
sse1, distortion, &dummy, is_scaled);
// Check the diagonal direction with the best mv
check_better_fast(xd, cm, &diag_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion, &dummy,
is_scaled);
return diag_step;
}
// Performs a following up search after first_level_check_fast is called. This // performs two extra chess pattern searches in the best quadrant. static AOM_FORCE_INLINE void second_level_check_fast(
MACROBLOCKD *xd, const AV1_COMMON *cm, const MV this_mv, const MV diag_step,
MV *best_mv, int hstep, const SubpelMvLimits *mv_limits, const SUBPEL_SEARCH_VAR_PARAMS *var_params, const MV_COST_PARAMS *mv_cost_params, unsignedint *besterr, unsignedint *sse1, int *distortion, int is_scaled) {
assert(diag_step.row == hstep || diag_step.row == -hstep);
assert(diag_step.col == hstep || diag_step.col == -hstep); constint tr = this_mv.row; constint tc = this_mv.col; constint br = best_mv->row; constint bc = best_mv->col; int dummy = 0; if (tr != br && tc != bc) {
assert(diag_step.col == bc - tc);
assert(diag_step.row == br - tr); const MV chess_mv_1 = { br, bc + diag_step.col }; const MV chess_mv_2 = { br + diag_step.row, bc };
check_better_fast(xd, cm, &chess_mv_1, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion, &dummy,
is_scaled);
check_better_fast(xd, cm, &chess_mv_2, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion, &dummy,
is_scaled);
} elseif (tr == br && tc != bc) {
assert(diag_step.col == bc - tc); // Continue searching in the best direction const MV bottom_long_mv = { br + hstep, bc + diag_step.col }; const MV top_long_mv = { br - hstep, bc + diag_step.col };
check_better_fast(xd, cm, &bottom_long_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion, &dummy,
is_scaled);
check_better_fast(xd, cm, &top_long_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion, &dummy,
is_scaled);
// Search in the direction opposite of the best quadrant const MV rev_mv = { br - diag_step.row, bc };
check_better_fast(xd, cm, &rev_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion, &dummy,
is_scaled);
} elseif (tr != br && tc == bc) {
assert(diag_step.row == br - tr); // Continue searching in the best direction const MV right_long_mv = { br + diag_step.row, bc + hstep }; const MV left_long_mv = { br + diag_step.row, bc - hstep };
check_better_fast(xd, cm, &right_long_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion, &dummy,
is_scaled);
check_better_fast(xd, cm, &left_long_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion, &dummy,
is_scaled);
// Search in the direction opposite of the best quadrant const MV rev_mv = { br, bc - diag_step.col };
check_better_fast(xd, cm, &rev_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion, &dummy,
is_scaled);
}
}
// Combines first level check and second level check when applicable. This first // searches the four cardinal directions, and perform several // diagonal/chess-pattern searches in the best quadrant. static AOM_FORCE_INLINE void two_level_checks_fast(
MACROBLOCKD *xd, const AV1_COMMON *cm, const MV this_mv, MV *best_mv, int hstep, const SubpelMvLimits *mv_limits, const SUBPEL_SEARCH_VAR_PARAMS *var_params, const MV_COST_PARAMS *mv_cost_params, unsignedint *besterr, unsignedint *sse1, int *distortion, int iters, int is_scaled) { const MV diag_step = first_level_check_fast(
xd, cm, this_mv, best_mv, hstep, mv_limits, var_params, mv_cost_params,
besterr, sse1, distortion, is_scaled); if (iters > 1) {
second_level_check_fast(xd, cm, this_mv, diag_step, best_mv, hstep,
mv_limits, var_params, mv_cost_params, besterr,
sse1, distortion, is_scaled);
}
}
// Check the diagonal direction with the best mv
check_better(xd, cm, &diag_mv, best_mv, mv_limits, var_params, mv_cost_params,
besterr, sse1, distortion, &dummy);
return diag_step;
}
// A newer version of second level check that gives better quality. // TODO(chiyotsai@google.com): evaluate this on subpel_search_types different // from av1_find_best_sub_pixel_tree static AOM_FORCE_INLINE void second_level_check_v2(
MACROBLOCKD *xd, const AV1_COMMON *const cm, const MV this_mv, MV diag_step,
MV *best_mv, const SubpelMvLimits *mv_limits, const SUBPEL_SEARCH_VAR_PARAMS *var_params, const MV_COST_PARAMS *mv_cost_params, unsignedint *besterr, unsignedint *sse1, int *distortion, int is_scaled) {
assert(best_mv->row == this_mv.row + diag_step.row ||
best_mv->col == this_mv.col + diag_step.col); if (CHECK_MV_EQUAL(this_mv, *best_mv)) { return;
} elseif (this_mv.row == best_mv->row) { // Search away from diagonal step since diagonal search did not provide any // improvement
diag_step.row *= -1;
} elseif (this_mv.col == best_mv->col) {
diag_step.col *= -1;
}
if (var_params->subpel_search_type != USE_2_TAPS_ORIG) {
check_better(xd, cm, &row_bias_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion, &has_better_mv);
check_better(xd, cm, &col_bias_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion, &has_better_mv);
// Do an additional search if the second iteration gives a better mv if (has_better_mv) {
check_better(xd, cm, &diag_bias_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion, &has_better_mv);
}
} else {
check_better_fast(xd, cm, &row_bias_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion, &has_better_mv,
is_scaled);
check_better_fast(xd, cm, &col_bias_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion, &has_better_mv,
is_scaled);
// Do an additional search if the second iteration gives a better mv if (has_better_mv) {
check_better_fast(xd, cm, &diag_bias_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion,
&has_better_mv, is_scaled);
}
}
}
// Gets the error at the beginning when the mv has fullpel precision staticunsignedint setup_center_error( const MACROBLOCKD *xd, const MV *bestmv, const SUBPEL_SEARCH_VAR_PARAMS *var_params, const MV_COST_PARAMS *mv_cost_params, unsignedint *sse1, int *distortion) { const aom_variance_fn_ptr_t *vfp = var_params->vfp; constint w = var_params->w; constint h = var_params->h;
// Returns surface minima estimate at given precision in 1/2^n bits. // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C // For a given set of costs S0, S1, S2, S3, S4 at points // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively, // the solution for the location of the minima (x0, y0) is given by: // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0), // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0). // The code below is an integerized version of that. staticinlinevoid get_cost_surf_min(constint *cost_list, int *ir, int *ic, int bits) {
*ic = divide_and_round((cost_list[1] - cost_list[3]) * (1 << (bits - 1)),
(cost_list[1] - 2 * cost_list[0] + cost_list[3]));
*ir = divide_and_round((cost_list[4] - cost_list[2]) * (1 << (bits - 1)),
(cost_list[4] - 2 * cost_list[0] + cost_list[2]));
}
// Checks the list of mvs searched in the last iteration and see if we are // repeating it. If so, return 1. Otherwise we update the last_mv_search_list // with current_mv and return 0. staticinlineint check_repeated_mv_and_update(int_mv *last_mv_search_list, const MV current_mv, int iter) { if (last_mv_search_list) { if (CHECK_MV_EQUAL(last_mv_search_list[iter].as_mv, current_mv)) { return 1;
}
// The iteration we are current searching for. Iter 0 corresponds to fullpel // mv, iter 1 to half pel, and so on int iter = 0; int hstep = INIT_SUBPEL_STEP_SIZE; // Step size, initialized to 4/8=1/2 pel unsignedint besterr = INT_MAX;
*bestmv = start_mv;
// If forced_stop is FULL_PEL, return. if (forced_stop == FULL_PEL) return besterr;
if (check_repeated_mv_and_update(last_mv_search_list, *bestmv, iter)) { return INT_MAX;
}
iter++;
if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { int ir, ic;
get_cost_surf_min(cost_list, &ir, &ic, 1); if (ir != 0 || ic != 0) { const MV this_mv = { start_mv.row + ir * hstep,
start_mv.col + ic * hstep }; int dummy = 0;
check_better_fast(xd, cm, &this_mv, bestmv, mv_limits, var_params,
mv_cost_params, &besterr, sse1, distortion, &dummy,
is_scaled);
}
} else {
two_level_checks_fast(xd, cm, start_mv, bestmv, hstep, mv_limits,
var_params, mv_cost_params, &besterr, sse1,
distortion, iters_per_step, is_scaled);
}
// Each subsequent iteration checks at least one point in common with // the last iteration could be 2 ( if diag selected) 1/4 pel if (forced_stop < HALF_PEL) { if (check_repeated_mv_and_update(last_mv_search_list, *bestmv, iter)) { return INT_MAX;
}
iter++;
// The iteration we are current searching for. Iter 0 corresponds to fullpel // mv, iter 1 to half pel, and so on int iter = 0; int hstep = INIT_SUBPEL_STEP_SIZE; // Step size, initialized to 4/8=1/2 pel unsignedint besterr = INT_MAX;
*bestmv = start_mv;
switch (whichdir) { case 0: // bottom left quadrant
check_better_fast(xd, cm, &left_mv, bestmv, mv_limits, var_params,
mv_cost_params, &besterr, sse1, distortion, &dummy,
is_scaled);
check_better_fast(xd, cm, &bottom_mv, bestmv, mv_limits, var_params,
mv_cost_params, &besterr, sse1, distortion, &dummy,
is_scaled);
check_better_fast(xd, cm, &bottom_left_mv, bestmv, mv_limits,
var_params, mv_cost_params, &besterr, sse1,
distortion, &dummy, is_scaled); break; case 1: // bottom right quadrant
check_better_fast(xd, cm, &right_mv, bestmv, mv_limits, var_params,
mv_cost_params, &besterr, sse1, distortion, &dummy,
is_scaled);
check_better_fast(xd, cm, &bottom_mv, bestmv, mv_limits, var_params,
mv_cost_params, &besterr, sse1, distortion, &dummy,
is_scaled);
check_better_fast(xd, cm, &bottom_right_mv, bestmv, mv_limits,
var_params, mv_cost_params, &besterr, sse1,
distortion, &dummy, is_scaled); break; case 2: // top left quadrant
check_better_fast(xd, cm, &left_mv, bestmv, mv_limits, var_params,
mv_cost_params, &besterr, sse1, distortion, &dummy,
is_scaled);
check_better_fast(xd, cm, &top_mv, bestmv, mv_limits, var_params,
mv_cost_params, &besterr, sse1, distortion, &dummy,
is_scaled);
check_better_fast(xd, cm, &top_left_mv, bestmv, mv_limits, var_params,
mv_cost_params, &besterr, sse1, distortion, &dummy,
is_scaled); break; case 3: // top right quadrant
check_better_fast(xd, cm, &right_mv, bestmv, mv_limits, var_params,
mv_cost_params, &besterr, sse1, distortion, &dummy,
is_scaled);
check_better_fast(xd, cm, &top_mv, bestmv, mv_limits, var_params,
mv_cost_params, &besterr, sse1, distortion, &dummy,
is_scaled);
check_better_fast(xd, cm, &top_right_mv, bestmv, mv_limits, var_params,
mv_cost_params, &besterr, sse1, distortion, &dummy,
is_scaled); break;
}
} else {
two_level_checks_fast(xd, cm, start_mv, bestmv, hstep, mv_limits,
var_params, mv_cost_params, &besterr, sse1,
distortion, iters_per_step, is_scaled);
}
// Each subsequent iteration checks at least one point in common with // the last iteration could be 2 ( if diag selected) 1/4 pel if (forced_stop < HALF_PEL) { if (check_repeated_mv_and_update(last_mv_search_list, *bestmv, iter)) { return INT_MAX;
}
iter++;
// How many steps to take. A round of 0 means fullpel search only, 1 means // half-pel, and so on. constint round = AOMMIN(FULL_PEL - forced_stop, 3 - !allow_hp); int hstep = INIT_SUBPEL_STEP_SIZE; // Step size, initialized to 4/8=1/2 pel
// Check diagonal sub-pixel position if (!CHECK_MV_EQUAL(iter_center_mv, *bestmv) && iters_per_step > 1) {
second_level_check_v2(xd, cm, iter_center_mv, diag_step, bestmv,
mv_limits, var_params, mv_cost_params, &besterr,
sse1, distortion, is_scaled);
}
hstep >>= 1;
}
return besterr;
}
// Note(yunqingwang): The following 2 functions are only used in the motion // vector unit test, which return extreme motion vectors allowed by the MV // limits. // Returns the maximum MV. int av1_return_max_sub_pixel_mv(MACROBLOCKD *xd, const AV1_COMMON *const cm, const SUBPEL_MOTION_SEARCH_PARAMS *ms_params,
MV start_mv, const FULLPEL_MV_STATS *start_mv_stats,
MV *bestmv, int *distortion, unsignedint *sse1,
int_mv *last_mv_search_list) {
(void)xd;
(void)cm;
(void)start_mv;
(void)start_mv_stats;
(void)distortion;
(void)last_mv_search_list;
// In the sub-pel motion search, if hp is not used, then the last bit of mv // has to be 0.
lower_mv_precision(bestmv, allow_hp, 0);
*sse1 = besterr; return besterr;
}
// Returns the minimum MV. int av1_return_min_sub_pixel_mv(MACROBLOCKD *xd, const AV1_COMMON *const cm, const SUBPEL_MOTION_SEARCH_PARAMS *ms_params,
MV start_mv, const FULLPEL_MV_STATS *start_mv_stats,
MV *bestmv, int *distortion, unsignedint *sse1,
int_mv *last_mv_search_list) {
(void)xd;
(void)cm;
(void)start_mv;
(void)start_mv_stats;
(void)distortion;
(void)last_mv_search_list;
unsignedint besterr = 0; // In the sub-pel motion search, if hp is not used, then the last bit of mv // has to be 0.
lower_mv_precision(bestmv, allow_hp, 0);
*sse1 = besterr; return besterr;
}
#if !CONFIG_REALTIME_ONLY // Computes the cost of the current predictor by going through the whole // av1_enc_build_inter_predictor pipeline. This is mainly used by warped mv // during motion_mode_rd. We are going through the whole // av1_enc_build_inter_predictor because we might have changed the interpolation // filter, etc before motion_mode_rd is called. staticinlineunsignedint compute_motion_cost(
MACROBLOCKD *xd, const AV1_COMMON *const cm, const SUBPEL_MOTION_SEARCH_PARAMS *ms_params, BLOCK_SIZE bsize, const MV *this_mv) { unsignedint mse; unsignedint sse; constint mi_row = xd->mi_row; constint mi_col = xd->mi_col;
// Macros to build bitmasks which help us avoid redundant computations // // To explain the idea here, imagine that on the first iteration of the // loop below, we step rightwards. Then, on the second iteration, the neighbors // to consider are: // . . . // 0 1 . // . . . // Where 0 is the initial search point, 1 is the best candidate found in the // first iteration, and the dots are the other neighbors of point 1. // // Naively, we would now need to scan all 8 neighbors of point 1 (point 0 and // the seven points marked with dots), and compare them to see where to move // next. However, we already evaluated 5 of those 8 neighbors in the last // iteration, and decided that they are worse than point 1. So we don't need // to re-consider these points. We only really need to consider the three // points which are adjacent to point 1 but *not* to point 0. // // As the algorithm goes on, there are other ways that redundant evaluations // can happen, if the search path curls back around on itself. // // To avoid all possible redundancies, we'd have to build a set containing // every point we have already checked, and this would be quite expensive. // // So instead, we apply a 95%-effective solution with a much lower overhead: // we prune out the points which were considered during the previous // iteration, but we don't worry about any prior iteration. This can be done // as follows: // // We build a static table, called neighbor_mask, which answers the question // "if we moved in direction X last time, which neighbors are new, and which // were scanned last iteration?" // Then we can query this table to quickly determine which points we need to // evaluate, and which we can skip. // // To query the table, the logic is simply: // neighbor_mask[i] & (1 << j) == "if we moved in direction i last iteration, // do we need to scan neighbor j this iteration?" #define NEIGHBOR_MASK_DIA(left, down, right, up) \
(left | (down << 1) | (right << 2) | (up << 3))
staticconst warp_search_config warp_search_info[WARP_SEARCH_METHODS] = { // WARP_SEARCH_DIAMOND
{
.num_neighbors = 4,
.neighbors = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } },
.neighbor_mask = { // If we stepped left last time, consider all points except right
NEIGHBOR_MASK_DIA(1, 1, 0, 1), // If we stepped down last time, consider all points except up
NEIGHBOR_MASK_DIA(1, 1, 1, 0), // Stepped right last time
NEIGHBOR_MASK_DIA(0, 1, 1, 1), // Stepped up last time
NEIGHBOR_MASK_DIA(1, 0, 1, 1),
},
}, // WARP_SEARCH_SQUARE
{
.num_neighbors = 8,
.neighbors = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 },
{ 1, -1 }, { 1, 1 }, { -1, -1 }, { -1, 1 } },
.neighbor_mask = { // If we stepped left last time, then we only need to consider 3 points: // left, down+left, up+left
NEIGHBOR_MASK_SQR(1, 0, 0, 0, 1, 0, 1, 0), // If we stepped down last time, then we only need to consider 3 points: // down, down+left, down+right
NEIGHBOR_MASK_SQR(0, 1, 0, 0, 1, 1, 0, 0), // Stepped right last time
NEIGHBOR_MASK_SQR(0, 0, 1, 0, 0, 1, 0, 1), // Stepped up last time
NEIGHBOR_MASK_SQR(0, 0, 0, 1, 0, 0, 1, 1),
// If we stepped down+left last time, then we need to consider 5 points: // left, down, down+left, down+right, up+left
NEIGHBOR_MASK_SQR(1, 1, 0, 0, 1, 1, 1, 0), // Stepped down+right last time
NEIGHBOR_MASK_SQR(0, 1, 1, 0, 1, 1, 0, 1), // Stepped up+left last time
NEIGHBOR_MASK_SQR(1, 0, 0, 1, 1, 0, 1, 1), // Stepped up+right last time
NEIGHBOR_MASK_SQR(0, 0, 1, 1, 0, 1, 1, 1),
},
},
};
unsignedint av1_refine_warped_mv(MACROBLOCKD *xd, const AV1_COMMON *const cm, const SUBPEL_MOTION_SEARCH_PARAMS *ms_params,
BLOCK_SIZE bsize, constint *pts0, constint *pts_inref0, int total_samples,
WARP_SEARCH_METHOD search_method, int num_iterations) {
MB_MODE_INFO *mbmi = xd->mi[0];
// Calculate the center position's error
assert(av1_is_subpelmv_in_range(mv_limits, *best_mv));
bestmse = compute_motion_cost(xd, cm, ms_params, bsize, best_mv);
// Check the diagonal direction with the best mv
obmc_check_better(xd, cm, &diag_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion, &dummy);
// Check the diagonal direction with the best mv
obmc_check_better_fast(&diag_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion, &dummy);
return diag_step;
}
}
// A newer version of second level check for obmc that gives better quality. static AOM_FORCE_INLINE void obmc_second_level_check_v2(
MACROBLOCKD *xd, const AV1_COMMON *const cm, const MV this_mv, MV diag_step,
MV *best_mv, const SubpelMvLimits *mv_limits, const SUBPEL_SEARCH_VAR_PARAMS *var_params, const MV_COST_PARAMS *mv_cost_params, unsignedint *besterr, unsignedint *sse1, int *distortion) {
assert(best_mv->row == this_mv.row + diag_step.row ||
best_mv->col == this_mv.col + diag_step.col); if (CHECK_MV_EQUAL(this_mv, *best_mv)) { return;
} elseif (this_mv.row == best_mv->row) { // Search away from diagonal step since diagonal search did not provide any // improvement
diag_step.row *= -1;
} elseif (this_mv.col == best_mv->col) {
diag_step.col *= -1;
}
if (var_params->subpel_search_type != USE_2_TAPS_ORIG) {
obmc_check_better(xd, cm, &row_bias_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion,
&has_better_mv);
obmc_check_better(xd, cm, &col_bias_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion,
&has_better_mv);
// Do an additional search if the second iteration gives a better mv if (has_better_mv) {
obmc_check_better(xd, cm, &diag_bias_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion,
&has_better_mv);
}
} else {
obmc_check_better_fast(&row_bias_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion,
&has_better_mv);
obmc_check_better_fast(&col_bias_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion,
&has_better_mv);
// Do an additional search if the second iteration gives a better mv if (has_better_mv) {
obmc_check_better_fast(&diag_bias_mv, best_mv, mv_limits, var_params,
mv_cost_params, besterr, sse1, distortion,
&has_better_mv);
}
}
}
var = vfp->vf(src->buf, src->stride, get_buf_from_fullmv(pre, &best_mv),
pre->stride, &sse);
(void)var;
return sse + mv_err_cost_(&mv, mv_cost_params);
}
Messung V0.5 in Prozent
¤ Diese beiden folgenden Angebotsgruppen bietet das Unternehmen0.70Angebot
(Wie Sie bei der Firma Beratungs- und Dienstleistungen beauftragen können 2026-04-28)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.