/* * Copyright (c) 2010 The WebM project authors. All Rights Reserved. * * Use of this source code is governed by a BSD-style license * that can be found in the LICENSE file in the root of the source * tree. An additional intellectual property rights grant can be found * in the file PATENTS. All contributing project authors may * be found in the AUTHORS file in the root of the source tree.
*/
// Get intersection of UMV window and valid MV window to reduce # of checks // in diamond search. if (mv_limits->col_min < col_min) mv_limits->col_min = col_min; if (mv_limits->col_max > col_max) mv_limits->col_max = col_max; if (mv_limits->row_min < row_min) mv_limits->row_min = row_min; if (mv_limits->row_max > row_max) mv_limits->row_max = row_max;
}
// Returns surface minima estimate at given precision in 1/2^n bits. // Assume a model for the cost surface: S = A(x - x0)^2 + B(y - y0)^2 + C // For a given set of costs S0, S1, S2, S3, S4 at points // (y, x) = (0, 0), (0, -1), (1, 0), (0, 1) and (-1, 0) respectively, // the solution for the location of the minima (x0, y0) is given by: // x0 = 1/2 (S1 - S3)/(S1 + S3 - 2*S0), // y0 = 1/2 (S4 - S2)/(S4 + S2 - 2*S0). // The code below is an integerized version of that. staticvoid get_cost_surf_min(int *cost_list, int *ir, int *ic, int bits) { const int64_t x0 = (int64_t)cost_list[1] - cost_list[3]; const int64_t y0 = cost_list[1] - 2 * (int64_t)cost_list[0] + cost_list[3]; const int64_t x1 = (int64_t)cost_list[4] - cost_list[2]; const int64_t y1 = cost_list[4] - 2 * (int64_t)cost_list[0] + cost_list[2]; constint b = 1 << (bits - 1);
*ic = (int)divide_and_round(x0 * b, y0);
*ir = (int)divide_and_round(x1 * b, y1);
}
uint32_t vp9_skip_sub_pixel_tree( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, int h, int use_accurate_subpel_search) {
SETUP_SUBPEL_SEARCH;
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
src_stride, y, y_stride, second_pred, w, h,
offset, mvjcost, mvcost, sse1, distortion);
(void)halfiters;
(void)quarteriters;
(void)eighthiters;
(void)whichdir;
(void)allow_hp;
(void)forced_stop;
(void)hstep;
(void)rr;
(void)rc;
(void)minr;
(void)minc;
(void)maxr;
(void)maxc;
(void)tr;
(void)tc;
(void)sse;
(void)thismse;
(void)cost_list;
(void)use_accurate_subpel_search;
return besterr;
}
uint32_t vp9_find_best_sub_pixel_tree_pruned_evenmore( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, int h, int use_accurate_subpel_search) {
SETUP_SUBPEL_SEARCH;
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
src_stride, y, y_stride, second_pred, w, h,
offset, mvjcost, mvcost, sse1, distortion);
(void)halfiters;
(void)quarteriters;
(void)eighthiters;
(void)whichdir;
(void)allow_hp;
(void)forced_stop;
(void)hstep;
(void)use_accurate_subpel_search;
if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { int ir, ic; unsignedint minpt = INT_MAX;
get_cost_surf_min(cost_list, &ir, &ic, 2); if (ir != 0 || ic != 0) {
CHECK_BETTER(minpt, tr + 2 * ir, tc + 2 * ic);
}
} else {
FIRST_LEVEL_CHECKS; if (halfiters > 1) {
SECOND_LEVEL_CHECKS;
}
tr = br;
tc = bc;
// Each subsequent iteration checks at least one point in common with // the last iteration could be 2 ( if diag selected) 1/4 pel // Note forced_stop: 0 - full, 1 - qtr only, 2 - half only if (forced_stop != 2) {
hstep >>= 1;
FIRST_LEVEL_CHECKS; if (quarteriters > 1) {
SECOND_LEVEL_CHECKS;
}
}
}
uint32_t vp9_find_best_sub_pixel_tree_pruned_more( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, int h, int use_accurate_subpel_search) {
SETUP_SUBPEL_SEARCH;
(void)use_accurate_subpel_search;
besterr = setup_center_error(xd, bestmv, ref_mv, error_per_bit, vfp, z,
src_stride, y, y_stride, second_pred, w, h,
offset, mvjcost, mvcost, sse1, distortion); if (cost_list && cost_list[0] != INT_MAX && cost_list[1] != INT_MAX &&
cost_list[2] != INT_MAX && cost_list[3] != INT_MAX &&
cost_list[4] != INT_MAX && is_cost_list_wellbehaved(cost_list)) { unsignedint minpt; int ir, ic;
get_cost_surf_min(cost_list, &ir, &ic, 1); if (ir != 0 || ic != 0) {
CHECK_BETTER(minpt, tr + ir * hstep, tc + ic * hstep);
}
} else {
FIRST_LEVEL_CHECKS; if (halfiters > 1) {
SECOND_LEVEL_CHECKS;
}
}
// Each subsequent iteration checks at least one point in common with // the last iteration could be 2 ( if diag selected) 1/4 pel
if (allow_hp && use_mv_hp(ref_mv) && forced_stop == 0) {
tr = br;
tc = bc;
hstep >>= 1;
FIRST_LEVEL_CHECKS; if (eighthiters > 1) {
SECOND_LEVEL_CHECKS;
}
} // These lines insure static analysis doesn't warn that // tr and tc aren't used after the above point.
(void)tr;
(void)tc;
bestmv->row = br;
bestmv->col = bc;
return besterr;
}
uint32_t vp9_find_best_sub_pixel_tree_pruned( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, int h, int use_accurate_subpel_search) {
SETUP_SUBPEL_SEARCH;
(void)use_accurate_subpel_search;
#define MAX_PATTERN_SCALES 11 #define MAX_PATTERN_CANDIDATES 8 // max number of candidates per scale #define PATTERN_CANDIDATES_REF 3 // number of refinement candidates
// Calculate and return a sad+mvcost list around an integer best pel. staticINLINEvoid calc_int_cost_list(const MACROBLOCK *x, const MV *ref_mv, int sadpb, const vp9_variance_fn_ptr_t *fn_ptr, const MV *best_mv, int *cost_list) { staticconst MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }; conststruct buf_2d *const what = &x->plane[0].src; conststruct buf_2d *const in_what = &x->e_mbd.plane[0].pre[0]; const MV fcenter_mv = { ref_mv->row >> 3, ref_mv->col >> 3 }; int br = best_mv->row; int bc = best_mv->col; const MV mv = { br, bc }; int i; unsignedint sse;
// Generic pattern search function that searches over multiple scales. // Each scale can have a different number of candidates and shape of // candidates as indicated in the num_candidates and candidates arrays // passed into this function // staticint vp9_pattern_search( const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv, constint num_candidates[MAX_PATTERN_SCALES], const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) { const MACROBLOCKD *const xd = &x->e_mbd; staticconstint search_param_to_steps[MAX_MVSEARCH_STEPS] = {
10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
}; int i, s, t; conststruct buf_2d *const what = &x->plane[0].src; conststruct buf_2d *const in_what = &xd->plane[0].pre[0]; int br, bc; int bestsad = INT_MAX; int thissad; int k = -1; const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; int best_init_s = search_param_to_steps[search_param]; // adjust ref_mv to make sure it is within MV range
clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
x->mv_limits.row_min, x->mv_limits.row_max);
br = ref_mv->row;
bc = ref_mv->col;
// Work out the start point for the search
bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
in_what->stride) +
mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
// Search all possible scales up to the search param around the center point // pick the scale of the point that is best as the starting scale of // further steps around it. if (do_init_search) {
s = best_init_s;
best_init_s = -1; for (t = 0; t <= s; ++t) { int best_site = -1; if (check_bounds(&x->mv_limits, br, bc, 1 << t)) { for (i = 0; i < num_candidates[t]; i++) { const MV this_mv = { br + candidates[t][i].row,
bc + candidates[t][i].col };
thissad =
vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), in_what->stride);
CHECK_BETTER
}
} else { for (i = 0; i < num_candidates[t]; i++) { const MV this_mv = { br + candidates[t][i].row,
bc + candidates[t][i].col }; if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
thissad =
vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), in_what->stride);
CHECK_BETTER
}
} if (best_site == -1) { continue;
} else {
best_init_s = t;
k = best_site;
}
} if (best_init_s != -1) {
br += candidates[best_init_s][k].row;
bc += candidates[best_init_s][k].col;
}
}
// If the center point is still the best, just skip this and move to // the refinement step. if (best_init_s != -1) { int best_site = -1;
s = best_init_s;
do { // No need to search all 6 points the 1st time if initial search was used if (!do_init_search || s != best_init_s) { if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { for (i = 0; i < num_candidates[s]; i++) { const MV this_mv = { br + candidates[s][i].row,
bc + candidates[s][i].col };
thissad =
vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), in_what->stride);
CHECK_BETTER
}
} else { for (i = 0; i < num_candidates[s]; i++) { const MV this_mv = { br + candidates[s][i].row,
bc + candidates[s][i].col }; if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
thissad =
vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), in_what->stride);
CHECK_BETTER
}
}
if (best_site == -1) { continue;
} else {
br += candidates[s][best_site].row;
bc += candidates[s][best_site].col;
k = best_site;
}
}
do { int next_chkpts_indices[PATTERN_CANDIDATES_REF];
best_site = -1;
next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
next_chkpts_indices[1] = k;
next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { const MV this_mv = {
br + candidates[s][next_chkpts_indices[i]].row,
bc + candidates[s][next_chkpts_indices[i]].col
};
thissad =
vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), in_what->stride);
CHECK_BETTER
}
} else { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { const MV this_mv = {
br + candidates[s][next_chkpts_indices[i]].row,
bc + candidates[s][next_chkpts_indices[i]].col
}; if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
thissad =
vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), in_what->stride);
CHECK_BETTER
}
}
if (best_site != -1) {
k = next_chkpts_indices[best_site];
br += candidates[s][k].row;
bc += candidates[s][k].col;
}
} while (best_site != -1);
} while (s--);
}
best_mv->row = br;
best_mv->col = bc;
// Returns the one-away integer pel sad values around the best as follows: // cost_list[0]: cost at the best integer pel // cost_list[1]: cost at delta {0, -1} (left) from the best integer pel // cost_list[2]: cost at delta { 1, 0} (bottom) from the best integer pel // cost_list[3]: cost at delta { 0, 1} (right) from the best integer pel // cost_list[4]: cost at delta {-1, 0} (top) from the best integer pel if (cost_list) {
calc_int_cost_list(x, &fcenter_mv, sad_per_bit, vfp, best_mv, cost_list);
} return bestsad;
}
// A specialized function where the smallest scale search candidates // are 4 1-away neighbors, and cost_list is non-null // TODO(debargha): Merge this function with the one above. Also remove // use_mvcost option since it is always 1, to save unnecessary branches. staticint vp9_pattern_search_sad( const MACROBLOCK *x, MV *ref_mv, int search_param, int sad_per_bit, int do_init_search, int *cost_list, const vp9_variance_fn_ptr_t *vfp, int use_mvcost, const MV *center_mv, MV *best_mv, constint num_candidates[MAX_PATTERN_SCALES], const MV candidates[MAX_PATTERN_SCALES][MAX_PATTERN_CANDIDATES]) { const MACROBLOCKD *const xd = &x->e_mbd; staticconstint search_param_to_steps[MAX_MVSEARCH_STEPS] = {
10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0,
}; int i, s, t; conststruct buf_2d *const what = &x->plane[0].src; conststruct buf_2d *const in_what = &xd->plane[0].pre[0]; int br, bc; int bestsad = INT_MAX; int thissad; int k = -1; const MV fcenter_mv = { center_mv->row >> 3, center_mv->col >> 3 }; int best_init_s = search_param_to_steps[search_param]; // adjust ref_mv to make sure it is within MV range
clamp_mv(ref_mv, x->mv_limits.col_min, x->mv_limits.col_max,
x->mv_limits.row_min, x->mv_limits.row_max);
br = ref_mv->row;
bc = ref_mv->col; if (cost_list != NULL) {
cost_list[0] = cost_list[1] = cost_list[2] = cost_list[3] = cost_list[4] =
INT_MAX;
}
// Work out the start point for the search
bestsad = vfp->sdf(what->buf, what->stride, get_buf_from_mv(in_what, ref_mv),
in_what->stride) +
mvsad_err_cost(x, ref_mv, &fcenter_mv, sad_per_bit);
// Search all possible scales up to the search param around the center point // pick the scale of the point that is best as the starting scale of // further steps around it. if (do_init_search) {
s = best_init_s;
best_init_s = -1; for (t = 0; t <= s; ++t) { int best_site = -1; if (check_bounds(&x->mv_limits, br, bc, 1 << t)) { for (i = 0; i < num_candidates[t]; i++) { const MV this_mv = { br + candidates[t][i].row,
bc + candidates[t][i].col };
thissad =
vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), in_what->stride);
CHECK_BETTER
}
} else { for (i = 0; i < num_candidates[t]; i++) { const MV this_mv = { br + candidates[t][i].row,
bc + candidates[t][i].col }; if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
thissad =
vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), in_what->stride);
CHECK_BETTER
}
} if (best_site == -1) { continue;
} else {
best_init_s = t;
k = best_site;
}
} if (best_init_s != -1) {
br += candidates[best_init_s][k].row;
bc += candidates[best_init_s][k].col;
}
}
// If the center point is still the best, just skip this and move to // the refinement step. if (best_init_s != -1) { int do_sad = (num_candidates[0] == 4 && cost_list != NULL); int best_site = -1;
s = best_init_s;
for (; s >= do_sad; s--) { if (!do_init_search || s != best_init_s) { if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { for (i = 0; i < num_candidates[s]; i++) { const MV this_mv = { br + candidates[s][i].row,
bc + candidates[s][i].col };
thissad =
vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), in_what->stride);
CHECK_BETTER
}
} else { for (i = 0; i < num_candidates[s]; i++) { const MV this_mv = { br + candidates[s][i].row,
bc + candidates[s][i].col }; if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
thissad =
vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), in_what->stride);
CHECK_BETTER
}
}
if (best_site == -1) { continue;
} else {
br += candidates[s][best_site].row;
bc += candidates[s][best_site].col;
k = best_site;
}
}
do { int next_chkpts_indices[PATTERN_CANDIDATES_REF];
best_site = -1;
next_chkpts_indices[0] = (k == 0) ? num_candidates[s] - 1 : k - 1;
next_chkpts_indices[1] = k;
next_chkpts_indices[2] = (k == num_candidates[s] - 1) ? 0 : k + 1;
if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { const MV this_mv = {
br + candidates[s][next_chkpts_indices[i]].row,
bc + candidates[s][next_chkpts_indices[i]].col
};
thissad =
vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), in_what->stride);
CHECK_BETTER
}
} else { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { const MV this_mv = {
br + candidates[s][next_chkpts_indices[i]].row,
bc + candidates[s][next_chkpts_indices[i]].col
}; if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
thissad =
vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), in_what->stride);
CHECK_BETTER
}
}
if (best_site != -1) {
k = next_chkpts_indices[best_site];
br += candidates[s][k].row;
bc += candidates[s][k].col;
}
} while (best_site != -1);
}
// Note: If we enter the if below, then cost_list must be non-NULL. if (s == 0) {
cost_list[0] = bestsad; if (!do_init_search || s != best_init_s) { if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { for (i = 0; i < num_candidates[s]; i++) { const MV this_mv = { br + candidates[s][i].row,
bc + candidates[s][i].col };
cost_list[i + 1] = thissad =
vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), in_what->stride);
CHECK_BETTER
}
} else { for (i = 0; i < num_candidates[s]; i++) { const MV this_mv = { br + candidates[s][i].row,
bc + candidates[s][i].col }; if (!is_mv_in(&x->mv_limits, &this_mv)) continue;
cost_list[i + 1] = thissad =
vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), in_what->stride);
CHECK_BETTER
}
}
if (check_bounds(&x->mv_limits, br, bc, 1 << s)) { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { const MV this_mv = {
br + candidates[s][next_chkpts_indices[i]].row,
bc + candidates[s][next_chkpts_indices[i]].col
};
cost_list[next_chkpts_indices[i] + 1] = thissad =
vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), in_what->stride);
CHECK_BETTER
}
} else { for (i = 0; i < PATTERN_CANDIDATES_REF; i++) { const MV this_mv = {
br + candidates[s][next_chkpts_indices[i]].row,
bc + candidates[s][next_chkpts_indices[i]].col
}; if (!is_mv_in(&x->mv_limits, &this_mv)) {
cost_list[next_chkpts_indices[i] + 1] = INT_MAX; continue;
}
cost_list[next_chkpts_indices[i] + 1] = thissad =
vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), in_what->stride);
CHECK_BETTER
}
}
if (best_site != -1) {
k = next_chkpts_indices[best_site];
br += candidates[s][k].row;
bc += candidates[s][k].col;
}
}
}
}
// Returns the one-away integer pel sad values around the best as follows: // cost_list[0]: sad at the best integer pel // cost_list[1]: sad at delta {0, -1} (left) from the best integer pel // cost_list[2]: sad at delta { 1, 0} (bottom) from the best integer pel // cost_list[3]: sad at delta { 0, 1} (right) from the best integer pel // cost_list[4]: sad at delta {-1, 0} (top) from the best integer pel if (cost_list) { staticconst MV neighbors[4] = { { 0, -1 }, { 1, 0 }, { 0, 1 }, { -1, 0 } }; if (cost_list[0] == INT_MAX) {
cost_list[0] = bestsad; if (check_bounds(&x->mv_limits, br, bc, 1)) { for (i = 0; i < 4; i++) { const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col };
cost_list[i + 1] =
vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), in_what->stride);
}
} else { for (i = 0; i < 4; i++) { const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; if (!is_mv_in(&x->mv_limits, &this_mv))
cost_list[i + 1] = INT_MAX; else
cost_list[i + 1] =
vfp->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &this_mv), in_what->stride);
}
}
} else { if (use_mvcost) { for (i = 0; i < 4; i++) { const MV this_mv = { br + neighbors[i].row, bc + neighbors[i].col }; if (cost_list[i + 1] != INT_MAX) {
cost_list[i + 1] +=
mvsad_err_cost(x, &this_mv, &fcenter_mv, sad_per_bit);
}
}
}
}
}
best_mv->row = br;
best_mv->col = bc; return bestsad;
}
for (r = start_row; r <= end_row; r += step) { for (c = start_col; c <= end_col; c += col_step) { // Step > 1 means we are not checking every location in this pass. if (step > 1) { const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c }; unsignedint sad =
fn_ptr->sdf(what->buf, what->stride, get_buf_from_mv(in_what, &mv),
in_what->stride); if (sad < best_sad) {
sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); if (sad < best_sad) {
best_sad = sad;
*best_mv = mv;
}
}
} else { // 4 sads in a single call if we are checking every location if (c + 3 <= end_col) { unsignedint sads[4]; const uint8_t *addrs[4]; for (i = 0; i < 4; ++i) { const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i };
addrs[i] = get_buf_from_mv(in_what, &mv);
}
fn_ptr->sdx4df(what->buf, what->stride, addrs, in_what->stride, sads);
for (i = 0; i < 4; ++i) { if (sads[i] < best_sad) { const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; constunsignedint sad =
sads[i] + mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); if (sad < best_sad) {
best_sad = sad;
*best_mv = mv;
}
}
}
} else { for (i = 0; i < end_col - c; ++i) { const MV mv = { fcenter_mv.row + r, fcenter_mv.col + c + i }; unsignedint sad =
fn_ptr->sdf(what->buf, what->stride,
get_buf_from_mv(in_what, &mv), in_what->stride); if (sad < best_sad) {
sad += mvsad_err_cost(x, &mv, ref_mv, sad_per_bit); if (sad < best_sad) {
best_sad = sad;
*best_mv = mv;
}
}
}
}
}
}
}
return best_sad;
}
#define MIN_RANGE 7 #define MAX_RANGE 256 #define MIN_INTERVAL 1 #if CONFIG_NON_GREEDY_MV static int64_t exhaustive_mesh_search_multi_step(
MV *best_mv, const MV *center_mv, int range, int step, conststruct buf_2d *src, conststruct buf_2d *pre, int lambda, const int_mv *nb_full_mvs, int full_mv_num, const MvLimits *mv_limits, const vp9_variance_fn_ptr_t *fn_ptr) {
int64_t best_sad; int r, c; int start_col, end_col, start_row, end_row;
*best_mv = *center_mv;
best_sad =
((int64_t)fn_ptr->sdf(src->buf, src->stride,
get_buf_from_mv(pre, center_mv), pre->stride)
<< LOG2_PRECISION) +
lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num);
start_row = VPXMAX(center_mv->row - range, mv_limits->row_min);
start_col = VPXMAX(center_mv->col - range, mv_limits->col_min);
end_row = VPXMIN(center_mv->row + range, mv_limits->row_max);
end_col = VPXMIN(center_mv->col + range, mv_limits->col_max); for (r = start_row; r <= end_row; r += step) { for (c = start_col; c <= end_col; c += step) { const MV mv = { r, c };
int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride,
get_buf_from_mv(pre, &mv), pre->stride)
<< LOG2_PRECISION; if (sad < best_sad) {
sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); if (sad < best_sad) {
best_sad = sad;
*best_mv = mv;
}
}
}
} return best_sad;
}
static int64_t exhaustive_mesh_search_single_step(
MV *best_mv, const MV *center_mv, int range, conststruct buf_2d *src, conststruct buf_2d *pre, int lambda, const int_mv *nb_full_mvs, int full_mv_num, const MvLimits *mv_limits, const vp9_variance_fn_ptr_t *fn_ptr) {
int64_t best_sad; int r, c, i; int start_col, end_col, start_row, end_row;
*best_mv = *center_mv;
best_sad =
((int64_t)fn_ptr->sdf(src->buf, src->stride,
get_buf_from_mv(pre, center_mv), pre->stride)
<< LOG2_PRECISION) +
lambda * vp9_nb_mvs_inconsistency(best_mv, nb_full_mvs, full_mv_num);
start_row = VPXMAX(center_mv->row - range, mv_limits->row_min);
start_col = VPXMAX(center_mv->col - range, mv_limits->col_min);
end_row = VPXMIN(center_mv->row + range, mv_limits->row_max);
end_col = VPXMIN(center_mv->col + range, mv_limits->col_max); for (r = start_row; r <= end_row; r += 1) {
c = start_col; while (c + 3 <= end_col) { unsignedint sads[4]; const uint8_t *addrs[4]; for (i = 0; i < 4; ++i) { const MV mv = { r, c + i };
addrs[i] = get_buf_from_mv(pre, &mv);
}
fn_ptr->sdx4df(src->buf, src->stride, addrs, pre->stride, sads);
for (i = 0; i < 4; ++i) {
int64_t sad = (int64_t)sads[i] << LOG2_PRECISION; if (sad < best_sad) { const MV mv = { r, c + i };
sad +=
lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); if (sad < best_sad) {
best_sad = sad;
*best_mv = mv;
}
}
}
c += 4;
} while (c <= end_col) { const MV mv = { r, c };
int64_t sad = (int64_t)fn_ptr->sdf(src->buf, src->stride,
get_buf_from_mv(pre, &mv), pre->stride)
<< LOG2_PRECISION; if (sad < best_sad) {
sad += lambda * vp9_nb_mvs_inconsistency(&mv, nb_full_mvs, full_mv_num); if (sad < best_sad) {
best_sad = sad;
*best_mv = mv;
}
}
c += 1;
}
} return best_sad;
}
static int64_t full_pixel_exhaustive_new(const VP9_COMP *cpi, MACROBLOCK *x,
MV *centre_mv_full, const vp9_variance_fn_ptr_t *fn_ptr,
MV *dst_mv, int lambda, const int_mv *nb_full_mvs, int full_mv_num) { const SPEED_FEATURES *const sf = &cpi->sf;
MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
int64_t bestsme; int i; int interval = sf->mesh_patterns[0].interval; int range = sf->mesh_patterns[0].range; int baseline_interval_divisor;
// Trap illegal values for interval and range for this function. if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
(interval > range)) {
printf("ERROR: invalid range\n");
assert(0);
}
baseline_interval_divisor = range / interval;
// Check size of proposed first range against magnitude of the centre // value used as a starting point.
range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
range = VPXMIN(range, MAX_RANGE);
interval = VPXMAX(interval, range / baseline_interval_divisor);
if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) { // Progressive searches with range and step size decreasing each time // till we reach a step size of 1. Then break out. for (i = 1; i < MAX_MESH_STEP; ++i) { // First pass with coarser step and longer range
bestsme = exhaustive_mesh_search_new(
x, &temp_mv, sf->mesh_patterns[i].range,
sf->mesh_patterns[i].interval, fn_ptr, &temp_mv, lambda, nb_full_mvs,
full_mv_num);
if (sf->mesh_patterns[i].interval == 1) break;
}
}
*dst_mv = temp_mv;
return bestsme;
}
static int64_t diamond_search_sad_new(const MACROBLOCK *x, const search_site_config *cfg, const MV *init_full_mv, MV *best_full_mv, int search_param, int lambda, int *num00, const vp9_variance_fn_ptr_t *fn_ptr, const int_mv *nb_full_mvs, int full_mv_num) { int i, j, step;
// Work out the start point for the search
in_what = xd->plane[0].pre[0].buf + best_full_mv->row * in_what_stride +
best_full_mv->col;
best_address = in_what;
for (step = 0; step < tot_steps; step++) { int all_in = 1, t;
// All_in is true if every one of the points we are checking are within // the bounds of the image.
all_in &= ((best_full_mv->row + ss_mv[i].row) > x->mv_limits.row_min);
all_in &= ((best_full_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max);
all_in &= ((best_full_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min);
all_in &= ((best_full_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max);
// If all the pixels are within the bounds we don't check whether the // search point is valid in this loop, otherwise we check each point // for validity.. if (all_in) { unsignedint sad_array[4];
int vp9_prepare_nb_full_mvs(const MotionField *motion_field, int mi_row, int mi_col, int_mv *nb_full_mvs) { constint mi_width = num_8x8_blocks_wide_lookup[motion_field->bsize]; constint mi_height = num_8x8_blocks_high_lookup[motion_field->bsize]; constint dirs[NB_MVS_NUM][2] = { { -1, 0 }, { 0, -1 }, { 1, 0 }, { 0, 1 } }; int nb_full_mv_num = 0; int i;
assert(mi_row % mi_height == 0);
assert(mi_col % mi_width == 0); for (i = 0; i < NB_MVS_NUM; ++i) { int r = dirs[i][0]; int c = dirs[i][1]; int brow = mi_row / mi_height + r; int bcol = mi_col / mi_width + c; if (brow >= 0 && brow < motion_field->block_rows && bcol >= 0 &&
bcol < motion_field->block_cols) { if (vp9_motion_field_is_mv_set(motion_field, brow, bcol)) {
int_mv mv = vp9_motion_field_get_mv(motion_field, brow, bcol);
nb_full_mvs[nb_full_mv_num].as_mv = get_full_mv(&mv.as_mv);
++nb_full_mv_num;
}
}
} return nb_full_mv_num;
} #endif// CONFIG_NON_GREEDY_MV
int vp9_diamond_search_sad_c(const MACROBLOCK *x, const search_site_config *cfg,
MV *ref_mv, uint32_t start_mv_sad, MV *best_mv, int search_param, int sad_per_bit, int *num00, const vp9_sad_fn_ptr_t *sad_fn_ptr, const MV *center_mv) { int i, j, step;
// Work out the start point for the search
in_what = xd->plane[0].pre[0].buf + ref_row * in_what_stride + ref_col;
best_address = in_what;
i = 0;
for (step = 0; step < tot_steps; step++) { int all_in = 1, t;
// All_in is true if every one of the points we are checking are within // the bounds of the image.
all_in &= ((best_mv->row + ss_mv[i].row) > x->mv_limits.row_min);
all_in &= ((best_mv->row + ss_mv[i + 1].row) < x->mv_limits.row_max);
all_in &= ((best_mv->col + ss_mv[i + 2].col) > x->mv_limits.col_min);
all_in &= ((best_mv->col + ss_mv[i + 3].col) < x->mv_limits.col_max);
// If all the pixels are within the bounds we don't check whether the // search point is valid in this loop, otherwise we check each point // for validity.. if (all_in) { unsignedint sad_array[4];
staticint vector_match(int16_t *ref, int16_t *src, int bwl) { int best_sad = INT_MAX; int this_sad; int d; int center, offset = 0; int bw = 4 << bwl; // redundant variable, to be changed in the experiments. for (d = 0; d <= bw; d += 16) {
this_sad = vpx_vector_var(&ref[d], src, bwl); if (this_sad < best_sad) {
best_sad = this_sad;
offset = d;
}
}
center = offset;
for (d = -8; d <= 8; d += 16) { int this_pos = offset + d; // check limit if (this_pos < 0 || this_pos > bw) continue;
this_sad = vpx_vector_var(&ref[this_pos], src, bwl); if (this_sad < best_sad) {
best_sad = this_sad;
center = this_pos;
}
}
offset = center;
for (d = -4; d <= 4; d += 8) { int this_pos = offset + d; // check limit if (this_pos < 0 || this_pos > bw) continue;
this_sad = vpx_vector_var(&ref[this_pos], src, bwl); if (this_sad < best_sad) {
best_sad = this_sad;
center = this_pos;
}
}
offset = center;
for (d = -2; d <= 2; d += 4) { int this_pos = offset + d; // check limit if (this_pos < 0 || this_pos > bw) continue;
this_sad = vpx_vector_var(&ref[this_pos], src, bwl); if (this_sad < best_sad) {
best_sad = this_sad;
center = this_pos;
}
}
offset = center;
for (d = -1; d <= 1; d += 2) { int this_pos = offset + d; // check limit if (this_pos < 0 || this_pos > bw) continue;
this_sad = vpx_vector_var(&ref[this_pos], src, bwl); if (this_sad < best_sad) {
best_sad = this_sad;
center = this_pos;
}
}
if (scaled_ref_frame) { int i; // Swap out the reference frame for a version that's been scaled to // match the resolution of the current frame, allowing the existing // motion search code to be used without additional modifications. for (i = 0; i < MAX_MB_PLANE; i++) backup_yv12[i] = xd->plane[i].pre[0];
vp9_setup_pre_planes(xd, 0, scaled_ref_frame, mi_row, mi_col, NULL);
}
#if CONFIG_VP9_HIGHBITDEPTH // TODO(jingning): Implement integral projection functions for high bit-depth // setting and remove this part of code. if (xd->bd != 8) { constunsignedint sad = cpi->fn_ptr[bsize].sdf(
x->plane[0].src.buf, src_stride, xd->plane[0].pre[0].buf, ref_stride);
tmp_mv->row = 0;
tmp_mv->col = 0;
if (scaled_ref_frame) { int i; for (i = 0; i < MAX_MB_PLANE; i++) xd->plane[i].pre[0] = backup_yv12[i];
} return sad;
} #endif
// Set up prediction 1-D reference set
ref_buf = xd->plane[0].pre[0].buf - (bw >> 1); for (idx = 0; idx < search_width; idx += 16) {
vpx_int_pro_row(&hbuf[idx], ref_buf, ref_stride, bh);
ref_buf += 16;
}
// Find the best match per 1-D search
tmp_mv->col = vector_match(hbuf, src_hbuf, b_width_log2_lookup[bsize]);
tmp_mv->row = vector_match(vbuf, src_vbuf, b_height_log2_lookup[bsize]);
#if CONFIG_NON_GREEDY_MV // Runs sequence of diamond searches in smaller steps for RD. /* do_refine: If last step (1-away) of n-step search doesn't pick the center point as the best match, we will do a final 1-away diamond
refining search */ int vp9_full_pixel_diamond_new(const VP9_COMP *cpi, MACROBLOCK *x,
BLOCK_SIZE bsize, MV *mvp_full, int step_param, int lambda, int do_refine, const int_mv *nb_full_mvs, int full_mv_num,
MV *best_mv) { const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; const SPEED_FEATURES *const sf = &cpi->sf; int n, num00 = 0; int thissme; int bestsme; constint further_steps = MAX_MVSEARCH_STEPS - 1 - step_param; const MV center_mv = { 0, 0 };
vpx_clear_system_state();
diamond_search_sad_new(x, &cpi->ss_cfg, mvp_full, best_mv, step_param, lambda,
&n, fn_ptr, nb_full_mvs, full_mv_num);
// Runs sequence of diamond searches in smaller steps for RD. /* do_refine: If last step (1-away) of n-step search doesn't pick the center point as the best match, we will do a final 1-away diamond
refining search */ staticint full_pixel_diamond(const VP9_COMP *const cpi, const MACROBLOCK *const x, BLOCK_SIZE bsize,
MV *mvp_full, int step_param, int sadpb, int further_steps, int do_refine, int use_downsampled_sad, int *cost_list, const vp9_variance_fn_ptr_t *fn_ptr, const MV *ref_mv, MV *dst_mv) {
MV temp_mv; int thissme, n, num00 = 0; int bestsme; constint src_buf_stride = x->plane[0].src.stride; const uint8_t *const src_buf = x->plane[0].src.buf; const MACROBLOCKD *const xd = &x->e_mbd; constint pred_buf_stride = xd->plane[0].pre[0].stride;
uint8_t *pred_buf;
vp9_sad_fn_ptr_t sad_fn_ptr; unsignedint start_mv_sad, start_mv_sad_even_rows, start_mv_sad_odd_rows; const MV ref_mv_full = { ref_mv->row >> 3, ref_mv->col >> 3 };
clamp_mv(mvp_full, x->mv_limits.col_min, x->mv_limits.col_max,
x->mv_limits.row_min, x->mv_limits.row_max);
sad_fn_ptr.sdf = fn_ptr->sdf;
sad_fn_ptr.sdx4df = fn_ptr->sdx4df; if (use_downsampled_sad && num_4x4_blocks_high_lookup[bsize] >= 2) { // If the absolute difference between the pred-to-src SAD of even rows and // the pred-to-src SAD of odd rows is small, skip every other row in sad // computation. constint odd_to_even_diff_sad =
abs((int)start_mv_sad_even_rows - (int)start_mv_sad_odd_rows); constint mult_thresh = 10; if (odd_to_even_diff_sad * mult_thresh < (int)start_mv_sad_even_rows) {
sad_fn_ptr.sdf = fn_ptr->sdsf;
sad_fn_ptr.sdx4df = fn_ptr->sdsx4df;
}
}
if (sad_fn_ptr.sdf != fn_ptr->sdf) { // If we are skipping rows when we perform the motion search, we need to // check the quality of skipping. If it's bad, then we run search with // skip row features off. const uint8_t *best_address = get_buf_from_mv(&xd->plane[0].pre[0], dst_mv); constint sad =
fn_ptr->sdf(src_buf, src_buf_stride, best_address, pred_buf_stride); constint skip_sad =
fn_ptr->sdsf(src_buf, src_buf_stride, best_address, pred_buf_stride); // We will keep the result of skipping rows if it's good enough. constint kSADThresh =
1 << (b_width_log2_lookup[bsize] + b_height_log2_lookup[bsize]); if (sad > kSADThresh && abs(skip_sad - sad) * 10 >= VPXMAX(sad, 1) * 9) { // There is a large discrepancy between skipping and not skipping, so we // need to redo the motion search. return full_pixel_diamond(cpi, x, bsize, mvp_full, step_param, sadpb,
further_steps, do_refine, 0, cost_list, fn_ptr,
ref_mv, dst_mv);
}
}
// Runs an limited range exhaustive mesh search using a pattern set // according to the encode speed profile. staticint full_pixel_exhaustive(const VP9_COMP *const cpi, const MACROBLOCK *const x, MV *centre_mv_full, int sadpb, int *cost_list, const vp9_variance_fn_ptr_t *fn_ptr, const MV *ref_mv, MV *dst_mv) { const SPEED_FEATURES *const sf = &cpi->sf;
MV temp_mv = { centre_mv_full->row, centre_mv_full->col };
MV f_ref_mv = { ref_mv->row >> 3, ref_mv->col >> 3 }; int bestsme; int i; int interval = sf->mesh_patterns[0].interval; int range = sf->mesh_patterns[0].range; int baseline_interval_divisor;
// Trap illegal values for interval and range for this function. if ((range < MIN_RANGE) || (range > MAX_RANGE) || (interval < MIN_INTERVAL) ||
(interval > range)) return INT_MAX;
baseline_interval_divisor = range / interval;
// Check size of proposed first range against magnitude of the centre // value used as a starting point.
range = VPXMAX(range, (5 * VPXMAX(abs(temp_mv.row), abs(temp_mv.col))) / 4);
range = VPXMIN(range, MAX_RANGE);
interval = VPXMAX(interval, range / baseline_interval_divisor);
if ((interval > MIN_INTERVAL) && (range > MIN_RANGE)) { // Progressive searches with range and step size decreasing each time // till we reach a step size of 1. Then break out. for (i = 1; i < MAX_MESH_STEP; ++i) { // First pass with coarser step and longer range
bestsme = exhaustive_mesh_search(
x, &f_ref_mv, &temp_mv, sf->mesh_patterns[i].range,
sf->mesh_patterns[i].interval, sadpb, fn_ptr, &temp_mv);
if (sf->mesh_patterns[i].interval == 1) break;
}
}
int vp9_full_pixel_search(const VP9_COMP *const cpi, const MACROBLOCK *const x,
BLOCK_SIZE bsize, MV *mvp_full, int step_param, int search_method, int error_per_bit, int *cost_list, const MV *ref_mv, MV *tmp_mv, int var_max, int rd) { const SPEED_FEATURES *const sf = &cpi->sf; const SEARCH_METHODS method = (SEARCH_METHODS)search_method; const vp9_variance_fn_ptr_t *fn_ptr = &cpi->fn_ptr[bsize]; int var = 0; int run_exhaustive_search = 0;
if (run_exhaustive_search) { int var_ex;
MV tmp_mv_ex;
var_ex = full_pixel_exhaustive(cpi, x, tmp_mv, error_per_bit, cost_list,
fn_ptr, ref_mv, &tmp_mv_ex); if (var_ex < var) {
var = var_ex;
*tmp_mv = tmp_mv_ex;
}
}
if (method != NSTEP && method != MESH && rd && var < var_max)
var = vp9_get_mvpred_var(x, tmp_mv, ref_mv, fn_ptr, 1);
return var;
}
// Note(yunqingwang): The following 2 functions are only used in the motion // vector unit test, which return extreme motion vectors allowed by the MV // limits. #define COMMON_MV_TEST \
SETUP_SUBPEL_SEARCH; \
\
(void)error_per_bit; \
(void)vfp; \
(void)z; \
(void)src_stride; \
(void)y; \
(void)y_stride; \
(void)second_pred; \
(void)w; \
(void)h; \
(void)offset; \
(void)mvjcost; \
(void)mvcost; \
(void)sse1; \
(void)distortion; \
\
(void)halfiters; \
(void)quarteriters; \
(void)eighthiters; \
(void)whichdir; \
(void)allow_hp; \
(void)forced_stop; \
(void)hstep; \
(void)rr; \
(void)rc; \
\
(void)tr; \
(void)tc; \
(void)sse; \
(void)thismse; \
(void)cost_list; \
(void)use_accurate_subpel_search
// Return the maximum MV.
uint32_t vp9_return_max_sub_pixel_mv( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, int h, int use_accurate_subpel_search) {
COMMON_MV_TEST;
// In the sub-pel motion search, if hp is not used, then the last bit of mv // has to be 0.
lower_mv_precision(bestmv, allow_hp && use_mv_hp(ref_mv));
return besterr;
} // Return the minimum MV.
uint32_t vp9_return_min_sub_pixel_mv( const MACROBLOCK *x, MV *bestmv, const MV *ref_mv, int allow_hp, int error_per_bit, const vp9_variance_fn_ptr_t *vfp, int forced_stop, int iters_per_step, int *cost_list, int *mvjcost, int *mvcost[2],
uint32_t *distortion, uint32_t *sse1, const uint8_t *second_pred, int w, int h, int use_accurate_subpel_search) {
COMMON_MV_TEST;
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.