/* * Copyright (c) 2016, Alliance for Open Media. All rights reserved. * * This source code is subject to the terms of the BSD 2 Clause License and * the Alliance for Open Media Patent License 1.0. If the BSD 2 Clause License * was not distributed with this source code in the LICENSE file, you can * obtain it at www.aomedia.org/license/software. If the Alliance for Open * Media Patent License 1.0 was not distributed with this source code in the * PATENTS file, you can obtain it at www.aomedia.org/license/patent.
*/
// Threshold values to be used for pruning the txfm_domain_distortion // based on block MSE // Index 0: Default mode evaluation, Winner mode processing is not // applicable (Eg : IntraBc). Index 1: Mode evaluation. // Index 2: Winner mode evaluation. Index 1 and 2 are applicable when // enable_winner_mode_for_use_tx_domain_dist speed feature is ON // TODO(any): Experiment the threshold logic based on variance metric staticconstunsignedint tx_domain_dist_thresholds[4][MODE_EVAL_TYPES] = {
{ UINT_MAX, UINT_MAX, UINT_MAX },
{ 22026, 22026, 22026 },
{ 1377, 1377, 1377 },
{ 0, 0, 0 }
};
// Number of different levels of aggressiveness in using transform domain // distortion during the R-D evaluation based on the speed feature // tx_domain_dist_level. #define TX_DOMAIN_DIST_LEVELS 4
// Transform domain distortion type to be used for default, mode and winner mode // evaluation Index 0: Default mode evaluation, Winner mode processing is not // applicable (Eg : IntraBc). Index 1: Mode evaluation. Index 2: Winner mode // evaluation. Index 1 and 2 are applicable when // enable_winner_mode_for_use_tx_domain_dist speed feature is ON staticconstunsignedint
tx_domain_dist_types[TX_DOMAIN_DIST_LEVELS][MODE_EVAL_TYPES] = {
{ 0, 2, 0 }, { 1, 2, 0 }, { 2, 2, 0 }, { 2, 2, 2 }
};
// Threshold values to be used for disabling coeff RD-optimization // based on block MSE / qstep^2. // TODO(any): Experiment the threshold logic based on variance metric. // Table has satd and dist threshold value index 0 : dist,index 1: satd // For each row, the indices are as follows. // Index 0: Default mode evaluation, Winner mode processing is not applicable // (Eg : IntraBc) // Index 1: Mode evaluation. // Index 2: Winner mode evaluation. // Index 1 and 2 are applicable when enable_winner_mode_for_coeff_opt speed // feature is ON // There are 7 levels with increasing speed, mapping to vertical indices. staticconstunsignedint coeff_opt_thresholds[9][MODE_EVAL_TYPES][2] = {
{ { UINT_MAX, UINT_MAX }, { UINT_MAX, UINT_MAX }, { UINT_MAX, UINT_MAX } },
{ { 3200, UINT_MAX }, { 250, UINT_MAX }, { UINT_MAX, UINT_MAX } },
{ { 1728, UINT_MAX }, { 142, UINT_MAX }, { UINT_MAX, UINT_MAX } },
{ { 864, UINT_MAX }, { 142, UINT_MAX }, { UINT_MAX, UINT_MAX } },
{ { 432, UINT_MAX }, { 86, UINT_MAX }, { UINT_MAX, UINT_MAX } },
{ { 864, 97 }, { 142, 16 }, { UINT_MAX, UINT_MAX } },
{ { 432, 97 }, { 86, 16 }, { UINT_MAX, UINT_MAX } },
{ { 216, 25 }, { 86, 10 }, { UINT_MAX, UINT_MAX } },
{ { 216, 25 }, { 0, 10 }, { UINT_MAX, UINT_MAX } }
};
// Transform size to be used for default, mode and winner mode evaluation // Index 0: Default mode evaluation, Winner mode processing is not applicable // (Eg : IntraBc) Index 1: Mode evaluation. Index 2: Winner mode evaluation. // Index 1 and 2 are applicable when enable_winner_mode_for_tx_size_srch speed // feature is ON staticconst TX_SIZE_SEARCH_METHOD
tx_size_search_methods[4][MODE_EVAL_TYPES] = {
{ USE_FULL_RD, USE_LARGESTALL, USE_FULL_RD },
{ USE_FAST_RD, USE_LARGESTALL, USE_FULL_RD },
{ USE_LARGESTALL, USE_LARGESTALL, USE_FULL_RD },
{ USE_LARGESTALL, USE_LARGESTALL, USE_LARGESTALL }
};
// Predict transform skip levels to be used for default, mode and winner mode // evaluation. Index 0: Default mode evaluation, Winner mode processing is not // applicable. Index 1: Mode evaluation, Index 2: Winner mode evaluation // Values indicate the aggressiveness of skip flag prediction. // 0 : no early skip prediction // 1 : conservative early skip prediction using DCT_DCT // 2 : early skip prediction based on SSE staticconstunsignedint predict_skip_levels[3][MODE_EVAL_TYPES] = {
{ 0, 0, 0 }, { 1, 1, 1 }, { 1, 2, 1 }
};
// Predict skip or DC block level used during transform type search. It is // indexed using the following: // First index : Speed feature 'dc_blk_pred_level' (0 to 3) // Second index : Mode evaluation type (DEFAULT_EVAL, MODE_EVAL and // WINNER_MODE_EVAL). // // The values of predict_dc_levels[][] indicate the aggressiveness of predicting // a block as transform skip or DC only. // Type 0 : No skip block or DC only block prediction // Type 1 : Prediction of skip block based on residual mean and variance // Type 2 : Prediction of skip block or DC only block based on residual mean and // variance staticconstunsignedint predict_dc_levels[4][MODE_EVAL_TYPES] = {
{ 0, 0, 0 }, { 1, 1, 0 }, { 2, 2, 0 }, { 2, 2, 2 }
};
#if !CONFIG_FPMT_TEST // This table holds the maximum number of reference frames for global motion. // The table is indexed as per the speed feature 'gm_search_type'. // 0 : All reference frames are allowed. // 1 : All reference frames except L2 and L3 are allowed. // 2 : All reference frames except L2, L3 and ARF2 are allowed. // 3 : No reference frame is allowed. staticconstint gm_available_reference_frames[GM_DISABLE_SEARCH + 1] = {
INTER_REFS_PER_FRAME, INTER_REFS_PER_FRAME - 2, INTER_REFS_PER_FRAME - 3, 0
}; #endif
// Qindex threshold levels used for selecting full-pel motion search. // ms_qthresh[i][j][k] indicates the qindex boundary value for 'k'th qindex band // for resolution index 'j' for aggressiveness level 'i'. // Aggressiveness increases from i = 0 to 2. // j = 0: lower than 720p resolution, j = 1: 720p or larger resolution. // Currently invoked only for speed 0, 1 and 2. staticconstint ms_qindex_thresh[3][2][2] = { { { 200, 70 }, { MAXQ, 200 } },
{ { 170, 50 }, { MAXQ, 200 } },
{ { 170, 40 }, { 200, 40 } } };
// Full-pel search methods for aggressive search based on qindex. // Index 0 is for resolutions lower than 720p, index 1 for 720p or larger // resolutions. Currently invoked only for speed 1 and 2. staticconst SEARCH_METHODS motion_search_method[2] = { CLAMPED_DIAMOND,
DIAMOND };
// Intra only frames, golden frames (except alt ref overlays) and // alt ref frames tend to be coded at a higher than ambient quality staticint frame_is_boosted(const AV1_COMP *cpi) { return frame_is_kf_gf_arf(cpi);
}
// Set transform rd gate level for all transform search cases. staticinlinevoid set_txfm_rd_gate_level( int txfm_rd_gate_level[TX_SEARCH_CASES], int level) {
assert(level <= MAX_TX_RD_GATE_LEVEL); for (int idx = 0; idx < TX_SEARCH_CASES; idx++)
txfm_rd_gate_level[idx] = level;
}
if (is_720p_or_larger) { // TODO(chiyotsai@google.com): make this speed feature adaptive based on // current block's vertical texture instead of hardcoded with resolution
sf->mv_sf.use_downsampled_sad = 2;
}
if (speed >= 7) { // TODO(kyslov): add more speed features to control speed/quality
}
if (speed >= 8) { if (!is_480p_or_larger) {
sf->rt_sf.nonrd_check_partition_merge_mode = 2;
} if (is_720p_or_larger) {
sf->rt_sf.force_large_partition_blocks_intra = 1;
}
}
if (speed >= 9) { // TODO(kyslov): add more speed features to control speed/quality if (!is_4k_or_larger) { // In av1_select_sb_size(), superblock size is set to 64x64 only for // resolutions less than 4k in speed>=9, to improve the multithread // performance. If cost update levels are set to INTERNAL_COST_UPD_OFF // for resolutions >= 4k, the SB size setting can be modified for these // resolutions as well.
sf->inter_sf.coeff_cost_upd_level = INTERNAL_COST_UPD_OFF;
sf->inter_sf.mode_cost_upd_level = INTERNAL_COST_UPD_OFF;
}
}
}
// TODO(chiyotsai@google.com): the thresholds chosen for intra hog are // inherited directly from luma hog with some minor tweaking. Eventually we // should run this with a bayesian optimizer to find the Pareto frontier.
sf->intra_sf.chroma_intra_pruning_with_hog = 2;
sf->intra_sf.intra_pruning_with_hog = 3;
sf->intra_sf.prune_palette_search_level = 2;
// TODO(any): evaluate if these lpf features can be moved to speed 2. // For screen content, "prune_sgr_based_on_wiener = 2" cause large quality // loss.
sf->lpf_sf.prune_sgr_based_on_wiener = allow_screen_content_tools ? 1 : 2;
sf->lpf_sf.disable_loop_restoration_chroma = 0;
sf->lpf_sf.reduce_wiener_window_size = 1;
sf->lpf_sf.prune_wiener_based_on_src_var = 2;
}
if (speed >= 4) {
sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
sf->part_sf.prune_rectangular_split_based_on_qidx =
allow_screen_content_tools ? 0 : 2;
sf->part_sf.prune_rect_part_using_4x4_var_deviation = true;
sf->part_sf.prune_rect_part_using_none_pred_mode = true;
sf->part_sf.prune_sub_8x8_partition_level =
allow_screen_content_tools ? 0 : 1;
sf->part_sf.prune_part4_search = 3; // TODO(jingning): This might not be a good trade off if the // target image quality is very low.
sf->part_sf.default_max_partition_size = BLOCK_32X32;
sf->winner_mode_sf.multi_winner_mode_type = MULTI_WINNER_MODE_OFF;
sf->winner_mode_sf.prune_winner_mode_eval_level = 1;
sf->winner_mode_sf.dc_blk_pred_level = 1;
} // The following should make all-intra mode speed 7 approximately equal // to real-time speed 6, // all-intra speed 8 close to real-time speed 7, and all-intra speed 9 // close to real-time speed 8 if (speed >= 7) {
sf->part_sf.default_min_partition_size = BLOCK_8X8;
sf->part_sf.partition_search_type = VAR_BASED_PARTITION;
sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
sf->rt_sf.var_part_split_threshold_shift = 7;
}
if (speed >= 8) {
sf->rt_sf.hybrid_intra_pickmode = 1;
sf->rt_sf.use_nonrd_pick_mode = 1;
sf->rt_sf.nonrd_check_partition_merge_mode = 1;
sf->rt_sf.var_part_split_threshold_shift = 8; // Set mask for intra modes. for (int i = 0; i < BLOCK_SIZES; ++i) if (i >= BLOCK_32X32)
sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC; else // Use DC, H, V intra mode for block sizes < 32X32.
sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V;
}
// As the speed feature prune_chroma_modes_using_luma_winner already // constrains the number of chroma directional mode evaluations to a maximum // of 1, the HOG computation and the associated pruning logic does not seem to // help speed-up the chroma mode evaluations. Hence disable the speed feature // chroma_intra_pruning_with_hog when prune_chroma_modes_using_luma_winner is // enabled. if (sf->intra_sf.prune_chroma_modes_using_luma_winner)
sf->intra_sf.chroma_intra_pruning_with_hog = 0;
}
staticvoid set_good_speed_feature_framesize_dependent( const AV1_COMP *const cpi, SPEED_FEATURES *const sf, int speed) { const AV1_COMMON *const cm = &cpi->common; constint is_480p_or_lesser = AOMMIN(cm->width, cm->height) <= 480; constint is_480p_or_larger = AOMMIN(cm->width, cm->height) >= 480; constint is_720p_or_larger = AOMMIN(cm->width, cm->height) >= 720; constint is_1080p_or_larger = AOMMIN(cm->width, cm->height) >= 1080; constint is_4k_or_larger = AOMMIN(cm->width, cm->height) >= 2160; constbool use_hbd = cpi->oxcf.use_highbitdepth; // Speed features applicable for temporal filtering and tpl modules may be // changed based on frame type at places where the sf is applied (Example : // use_downsampled_sad). This is because temporal filtering and tpl modules // are called before this function (except for the first key frame). // TODO(deepa.kg@ittiam.com): For the speed features applicable to temporal // filtering and tpl modules, modify the sf initialization appropriately // before calling the modules. constint boosted = frame_is_boosted(cpi); constint is_boosted_arf2_bwd_type =
boosted ||
cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == INTNL_ARF_UPDATE; constint is_lf_frame =
cpi->ppi->gf_group.update_type[cpi->gf_frame_index] == LF_UPDATE; constint allow_screen_content_tools =
cm->features.allow_screen_content_tools;
if (is_480p_or_larger) {
sf->part_sf.use_square_partition_only_threshold = BLOCK_128X128; if (is_720p_or_larger)
sf->part_sf.auto_max_partition_based_on_simple_motion = ADAPT_PRED; else
sf->part_sf.auto_max_partition_based_on_simple_motion = RELAXED_PRED;
} else {
sf->part_sf.use_square_partition_only_threshold = BLOCK_64X64;
sf->part_sf.auto_max_partition_based_on_simple_motion = DIRECT_PRED; if (use_hbd) sf->tx_sf.prune_tx_size_level = 1;
}
if (is_4k_or_larger) {
sf->part_sf.default_min_partition_size = BLOCK_8X8;
}
if (is_720p_or_larger) { // TODO(chiyotsai@google.com): make this speed feature adaptive based on // current block's vertical texture instead of hardcoded with resolution
sf->mv_sf.use_downsampled_sad = 2;
}
// TODO(chiyotsai@google.com): We can get 10% speed up if we move // adaptive_rd_thresh to speed 1. But currently it performs poorly on some // clips (e.g. 5% loss on dinner_1080p). We need to examine the sequence a // bit more closely to figure out why.
sf->inter_sf.adaptive_rd_thresh = 1;
sf->inter_sf.disable_interinter_wedge_var_thresh = 100;
sf->inter_sf.fast_interintra_wedge_search = 1;
sf->inter_sf.prune_comp_search_by_single_result = boosted ? 4 : 1;
sf->inter_sf.prune_ext_comp_using_neighbors = 1;
sf->inter_sf.prune_comp_using_best_single_mode_ref = 2;
sf->inter_sf.prune_comp_type_by_comp_avg = 2;
sf->inter_sf.selective_ref_frame = 3;
sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED;
sf->inter_sf.enable_fast_compound_mode_search = 1;
sf->inter_sf.reuse_mask_search_results = 1;
set_txfm_rd_gate_level(sf->inter_sf.txfm_rd_gate_level, boosted ? 0 : 1);
sf->inter_sf.inter_mode_txfm_breakout = boosted ? 0 : 1;
sf->inter_sf.alt_ref_search_fp = 1;
// TODO(chiyotsai@google.com): the thresholds chosen for intra hog are // inherited directly from luma hog with some minor tweaking. Eventually we // should run this with a bayesian optimizer to find the Pareto frontier.
sf->intra_sf.chroma_intra_pruning_with_hog = 2;
sf->intra_sf.intra_pruning_with_hog = 3;
sf->intra_sf.prune_palette_search_level = 2;
sf->intra_sf.top_intra_model_count_allowed = 2;
// For single layers RPS: bias/adjustment for recovery frame. if (cpi->ppi->rtc_ref.bias_recovery_frame) {
sf->mv_sf.search_method = NSTEP;
sf->mv_sf.subpel_search_method = SUBPEL_TREE;
sf->rt_sf.fullpel_search_step_param = 8;
sf->rt_sf.nonrd_aggressive_skip = 0;
}
} // Screen settings. if (cpi->oxcf.tune_cfg.content == AOM_CONTENT_SCREEN) { // TODO(marpan): Check settings for speed 7 and 8. if (speed >= 7) {
sf->rt_sf.reduce_mv_pel_precision_highmotion = 1;
sf->mv_sf.use_bsize_dependent_search_method = 0;
sf->rt_sf.skip_cdef_sb = 1;
sf->rt_sf.increase_color_thresh_palette = 1; if (!frame_is_intra_only(cm)) sf->rt_sf.dct_only_palette_nonrd = 1;
} if (speed >= 8) {
sf->rt_sf.nonrd_check_partition_merge_mode = 3;
sf->rt_sf.nonrd_prune_ref_frame_search = 1;
sf->rt_sf.use_nonrd_filter_search = 0;
sf->rt_sf.prune_hv_pred_modes_using_src_sad = false;
} if (speed >= 9) {
sf->rt_sf.prune_idtx_nonrd = 1;
sf->rt_sf.part_early_exit_zeromv = 2;
sf->rt_sf.skip_lf_screen = 1;
sf->rt_sf.nonrd_prune_ref_frame_search = 3;
sf->rt_sf.var_part_split_threshold_shift = 10;
sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
sf->rt_sf.reduce_mv_pel_precision_highmotion = 3;
sf->rt_sf.reduce_mv_pel_precision_lowcomplex = 1;
sf->lpf_sf.cdef_pick_method = CDEF_PICK_FROM_Q;
sf->rt_sf.nonrd_check_partition_merge_mode = 0;
sf->interp_sf.cb_pred_filter_search = 0;
} if (speed >= 10) { if (cm->width * cm->height > 1920 * 1080)
sf->part_sf.disable_8x8_part_based_on_qidx = 1;
sf->rt_sf.screen_content_cdef_filter_qindex_thresh = 80;
sf->rt_sf.part_early_exit_zeromv = 1;
sf->rt_sf.nonrd_aggressive_skip = 1;
sf->rt_sf.thresh_active_maps_skip_lf_cdef = 90;
sf->rt_sf.hybrid_intra_pickmode = 0;
sf->rt_sf.dct_only_palette_nonrd = 1;
sf->rt_sf.prune_palette_search_nonrd = 1;
sf->rt_sf.prune_intra_mode_using_best_sad_so_far = true;
sf->rt_sf.rc_faster_convergence_static = 1;
sf->rt_sf.rc_compute_spatial_var_sc = 1;
} if (speed >= 11) {
sf->rt_sf.skip_lf_screen = 2;
sf->rt_sf.skip_cdef_sb = 2;
sf->rt_sf.prune_palette_search_nonrd = 2;
sf->rt_sf.increase_color_thresh_palette = 0;
sf->rt_sf.prune_h_pred_using_best_mode_so_far = true;
sf->rt_sf.enable_intra_mode_pruning_using_neighbors = true;
}
sf->rt_sf.skip_encoding_non_reference_slide_change =
cpi->oxcf.rc_cfg.drop_frames_water_mark > 0 ? 1 : 0;
sf->rt_sf.skip_newmv_flat_blocks_screen = 1;
sf->rt_sf.use_idtx_nonrd = 1;
sf->rt_sf.higher_thresh_scene_detection = 0;
sf->rt_sf.use_nonrd_altref_frame = 0;
sf->rt_sf.use_rtc_tf = 0;
sf->rt_sf.use_comp_ref_nonrd = 0;
sf->rt_sf.source_metrics_sb_nonrd = 1; if (cpi->rc.high_source_sad == 1) {
sf->rt_sf.prefer_large_partition_blocks = 0;
sf->part_sf.max_intra_bsize = BLOCK_128X128; for (int i = 0; i < BLOCK_SIZES; ++i) { if (i > BLOCK_32X32)
sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC; else
sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V;
}
} if (speed >= 11 && cpi->rc.high_motion_content_screen_rtc) {
sf->rt_sf.higher_thresh_scene_detection = 1;
sf->rt_sf.force_only_last_ref = 1;
sf->rt_sf.use_nonrd_filter_search = 0;
sf->part_sf.fixed_partition_size = BLOCK_32X32;
sf->rt_sf.use_fast_fixed_part = 1;
sf->rt_sf.increase_source_sad_thresh = 1;
sf->rt_sf.selective_cdf_update = 1;
sf->mv_sf.search_method = FAST_DIAMOND;
} elseif (cpi->rc.max_block_source_sad > 20000 &&
cpi->rc.frame_source_sad > 100 && speed >= 6 &&
(cpi->rc.percent_blocks_with_motion > 1 ||
cpi->svc.last_layer_dropped[0])) {
sf->mv_sf.search_method = NSTEP;
sf->rt_sf.fullpel_search_step_param = 2;
} if (cpi->rc.high_source_sad && cpi->ppi->rtc_ref.non_reference_frame) {
sf->rt_sf.use_idtx_nonrd = 0;
sf->rt_sf.prefer_large_partition_blocks = 1;
sf->mv_sf.subpel_search_method = SUBPEL_TREE_PRUNED_MORE;
sf->rt_sf.fullpel_search_step_param = 10;
}
sf->rt_sf.partition_direct_merging = 0;
sf->hl_sf.accurate_bit_estimate = 0; // This feature is for nonrd_pickmode. if (sf->rt_sf.use_nonrd_pick_mode)
sf->rt_sf.estimate_motion_for_var_based_partition = 1; else
sf->rt_sf.estimate_motion_for_var_based_partition = 0;
} if (is_lossless_requested(&cpi->oxcf.rc_cfg)) {
sf->rt_sf.use_rtc_tf = 0; // TODO(aomedia:3412): The setting accurate_bit_estimate = 0 // can be removed once it's fixed for lossless mode.
sf->hl_sf.accurate_bit_estimate = 0;
} if (cpi->oxcf.use_highbitdepth) { // Disable for use_highbitdepth = 1 to mitigate issue: b/303023614.
sf->rt_sf.estimate_motion_for_var_based_partition = 0;
} if (cpi->oxcf.superres_cfg.enable_superres) {
sf->rt_sf.use_rtc_tf = 0;
sf->rt_sf.nonrd_prune_ref_frame_search = 1;
} // rtc_tf feature allocates new source because of possible // temporal filtering which may change the input source during encoding: // this causes an issue on resized frames when psnr is calculated, // so disable it here for frames that are resized (encoding width/height // different from configured width/height). if (is_psnr_calc_enabled(cpi) && (cpi->oxcf.frm_dim_cfg.width != cm->width ||
cpi->oxcf.frm_dim_cfg.height != cm->height))
sf->rt_sf.use_rtc_tf = 0;
}
staticvoid set_rt_speed_features_framesize_independent(AV1_COMP *cpi,
SPEED_FEATURES *sf, int speed) {
AV1_COMMON *const cm = &cpi->common; constint boosted = frame_is_boosted(cpi);
// Currently, rt speed 0, 1, 2, 3, 4, 5 are the same. // Following set of speed features are not impacting encoder's decisions as // the relevant tools are disabled by default.
sf->gm_sf.gm_search_type = GM_DISABLE_SEARCH;
sf->hl_sf.recode_loop = ALLOW_RECODE_KFARFGF;
sf->inter_sf.reuse_inter_intra_mode = 1;
sf->inter_sf.prune_compound_using_single_ref = 0;
sf->inter_sf.prune_comp_search_by_single_result = 2;
sf->inter_sf.prune_comp_type_by_comp_avg = 2;
sf->inter_sf.fast_wedge_sign_estimate = 1;
sf->inter_sf.use_dist_wtd_comp_flag = DIST_WTD_COMP_DISABLED;
sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW;
sf->inter_sf.disable_interinter_wedge_var_thresh = 100;
sf->interp_sf.cb_pred_filter_search = 0;
sf->interp_sf.skip_interp_filter_search = 1;
sf->part_sf.ml_prune_partition = 1;
sf->part_sf.reuse_prev_rd_results_for_part_ab = 1;
sf->part_sf.prune_ext_partition_types_search_level = 2;
sf->part_sf.less_rectangular_check_level = 2;
sf->mv_sf.obmc_full_pixel_search_level = 1;
sf->intra_sf.dv_cost_upd_level = INTERNAL_COST_UPD_OFF;
sf->tx_sf.model_based_prune_tx_search_level = 0;
sf->lpf_sf.dual_sgr_penalty_level = 1; // Disable Wiener and Self-guided Loop restoration filters.
sf->lpf_sf.disable_wiener_filter = true;
sf->lpf_sf.disable_sgr_filter = true;
sf->intra_sf.prune_palette_search_level = 2;
sf->intra_sf.prune_luma_palette_size_search_level = 2;
sf->intra_sf.early_term_chroma_palette_size_search = 1;
// End of set
// TODO(any, yunqing): tune these features for real-time use cases.
sf->hl_sf.superres_auto_search_type = SUPERRES_AUTO_SOLO;
sf->hl_sf.frame_parameter_update = 0;
sf->inter_sf.model_based_post_interp_filter_breakout = 1; // TODO(any): As per the experiments, this speed feature is doing redundant // computation since the model rd based pruning logic is similar to model rd // based gating when inter_mode_rd_model_estimation = 2. Enable this SF if // either of the condition becomes true. // (1) inter_mode_rd_model_estimation != 2 // (2) skip_interp_filter_search == 0 // (3) Motion mode or compound mode is enabled */
sf->inter_sf.prune_mode_search_simple_translation = 0;
sf->inter_sf.prune_ref_frame_for_rect_partitions = !boosted;
sf->inter_sf.disable_interintra_wedge_var_thresh = UINT_MAX;
sf->inter_sf.selective_ref_frame = 4;
sf->inter_sf.alt_ref_search_fp = 2;
set_txfm_rd_gate_level(sf->inter_sf.txfm_rd_gate_level, boosted ? 0 : 4);
sf->inter_sf.limit_txfm_eval_per_mode = 3;
sf->inter_sf.inter_mode_rd_model_estimation = 2; // This sf is not applicable in non-rd path.
sf->inter_sf.skip_newmv_in_drl = 0;
sf->interp_sf.skip_interp_filter_search = 0;
// Disable intra_y_mode_mask pruning since the performance at speed 7 isn't // good. May need more study. for (int i = 0; i < TX_SIZES; ++i) {
sf->intra_sf.intra_y_mode_mask[i] = INTRA_ALL;
}
sf->rt_sf.mode_search_skip_flags |= FLAG_SKIP_INTRA_DIRMISMATCH;
sf->rt_sf.nonrd_prune_ref_frame_search = 1; // This is for rd path only.
sf->rt_sf.prune_inter_modes_using_temp_var = 0;
sf->rt_sf.prune_inter_modes_wrt_gf_arf_based_on_sad = 0;
sf->rt_sf.prune_intra_mode_based_on_mv_range = 0; #if !CONFIG_REALTIME_ONLY
sf->rt_sf.reuse_inter_pred_nonrd =
(cpi->oxcf.motion_mode_cfg.enable_warped_motion == 0); #else
sf->rt_sf.reuse_inter_pred_nonrd = 1; #endif #if CONFIG_AV1_TEMPORAL_DENOISING
sf->rt_sf.reuse_inter_pred_nonrd = (cpi->oxcf.noise_sensitivity == 0); #endif
sf->rt_sf.short_circuit_low_temp_var = 0; // For spatial layers, only LAST and GOLDEN are currently used in the SVC // for nonrd. The flag use_nonrd_altref_frame can disable GOLDEN in the // get_ref_frame_flags() for some patterns, so disable it here for // spatial layers.
sf->rt_sf.use_nonrd_altref_frame =
(cpi->svc.number_spatial_layers > 1) ? 0 : 1;
sf->rt_sf.use_nonrd_pick_mode = 1;
sf->rt_sf.nonrd_check_partition_merge_mode = 3;
sf->rt_sf.skip_intra_pred = 1;
sf->rt_sf.source_metrics_sb_nonrd = 1; // Set mask for intra modes. for (int i = 0; i < BLOCK_SIZES; ++i) if (i >= BLOCK_32X32)
sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC; else // Use DC, H, V intra mode for block sizes < 32X32.
sf->rt_sf.intra_y_mode_bsize_mask_nrd[i] = INTRA_DC_H_V;
// Populate appropriate sub-pel search method based on speed feature and user // specified settings staticvoid set_subpel_search_method(
MotionVectorSearchParams *mv_search_params, unsignedint motion_vector_unit_test,
SUBPEL_SEARCH_METHOD subpel_search_method) {
assert(subpel_search_method <= SUBPEL_TREE_PRUNED_MORE);
mv_search_params->find_fractional_mv_step =
fractional_mv_search[subpel_search_method];
// This is only used in motion vector unit test. if (motion_vector_unit_test == 1)
mv_search_params->find_fractional_mv_step = av1_return_max_sub_pixel_mv; elseif (motion_vector_unit_test == 2)
mv_search_params->find_fractional_mv_step = av1_return_min_sub_pixel_mv;
}
void av1_set_speed_features_framesize_dependent(AV1_COMP *cpi, int speed) {
SPEED_FEATURES *const sf = &cpi->sf; const AV1EncoderConfig *const oxcf = &cpi->oxcf;
switch (oxcf->mode) { case GOOD:
set_good_speed_feature_framesize_dependent(cpi, sf, speed); break; case ALLINTRA:
set_allintra_speed_feature_framesize_dependent(cpi, sf, speed); break; case REALTIME:
set_rt_speed_feature_framesize_dependent(cpi, sf, speed); break;
}
// For multi-thread use case with row_mt enabled, cost update for a set of // SB rows is not desirable. Hence, the sf mv_cost_upd_level is set to // INTERNAL_COST_UPD_SBROW in such cases. if ((cpi->oxcf.row_mt == 1) && (cpi->mt_info.num_workers > 1)) { if (sf->inter_sf.mv_cost_upd_level == INTERNAL_COST_UPD_SBROW_SET) { // Set mv_cost_upd_level to use row level update.
sf->inter_sf.mv_cost_upd_level = INTERNAL_COST_UPD_SBROW;
}
}
}
void av1_set_speed_features_framesize_independent(AV1_COMP *cpi, int speed) {
SPEED_FEATURES *const sf = &cpi->sf;
WinnerModeParams *const winner_mode_params = &cpi->winner_mode_params; const AV1EncoderConfig *const oxcf = &cpi->oxcf; int i;
switch (oxcf->mode) { case GOOD:
set_good_speed_features_framesize_independent(cpi, sf, speed); break; case ALLINTRA:
set_allintra_speed_features_framesize_independent(cpi, sf, speed); break; case REALTIME:
set_rt_speed_features_framesize_independent(cpi, sf, speed); break;
}
// Note: when use_nonrd_pick_mode is true, the transform size is the // minimum of 16x16 and the largest possible size of the current block, // which conflicts with the speed feature "enable_tx_size_search". if (!oxcf->txfm_cfg.enable_tx_size_search &&
sf->rt_sf.use_nonrd_pick_mode == 0) {
sf->winner_mode_sf.tx_size_search_level = 3;
}
if (cpi->mt_info.num_workers > 1) { // Loop restoration stage is conditionally disabled for speed 5, 6 when // num_workers > 1. Since av1_pick_filter_restoration() is not // multi-threaded, enabling the Loop restoration stage will cause an // increase in encode time (3% to 7% increase depends on frame // resolution). // TODO(aomedia:3446): Implement multi-threading of // av1_pick_filter_restoration() and enable Wiener filter for speed 5, 6 // similar to single thread encoding path. if (speed >= 5) {
sf->lpf_sf.disable_sgr_filter = true;
sf->lpf_sf.disable_wiener_filter = true;
}
}
if (!cpi->ppi->seq_params_locked) {
cpi->common.seq_params->order_hint_info.enable_dist_wtd_comp &=
(sf->inter_sf.use_dist_wtd_comp_flag != DIST_WTD_COMP_DISABLED);
cpi->common.seq_params->enable_dual_filter &=
!sf->interp_sf.disable_dual_filter; // Set the flag 'enable_restoration', if one the Loop restoration filters // (i.e., Wiener or Self-guided) is enabled.
cpi->common.seq_params->enable_restoration &=
(!sf->lpf_sf.disable_wiener_filter || !sf->lpf_sf.disable_sgr_filter);
constint mesh_speed = AOMMIN(speed, MAX_MESH_SPEED); for (i = 0; i < MAX_MESH_STEP; ++i) {
sf->mv_sf.mesh_patterns[i].range =
good_quality_mesh_patterns[mesh_speed][i].range;
sf->mv_sf.mesh_patterns[i].interval =
good_quality_mesh_patterns[mesh_speed][i].interval;
}
// Update the mesh pattern of exhaustive motion search for intraBC // Though intraBC mesh pattern is populated for all frame types, it is used // only for intra frames of screen contents for (i = 0; i < MAX_MESH_STEP; ++i) {
sf->mv_sf.intrabc_mesh_patterns[i].range =
intrabc_mesh_patterns[mesh_speed][i].range;
sf->mv_sf.intrabc_mesh_patterns[i].interval =
intrabc_mesh_patterns[mesh_speed][i].interval;
}
// Slow quant, dct and trellis not worthwhile for first pass // so make sure they are always turned off. if (is_stat_generation_stage(cpi))
sf->rd_sf.optimize_coefficients = NO_TRELLIS_OPT;
// No recode for 1 pass. if (oxcf->pass == AOM_RC_ONE_PASS && has_no_stats_stage(cpi))
sf->hl_sf.recode_loop = DISALLOW_RECODE;
if (cpi->oxcf.row_mt == 1 && (cpi->mt_info.num_workers > 1)) { if (sf->inter_sf.inter_mode_rd_model_estimation == 1) { // Revert to type 2
sf->inter_sf.inter_mode_rd_model_estimation = 2;
}
#if !CONFIG_FPMT_TEST // Disable the speed feature 'prune_ref_frame_for_gm_search' to achieve // better parallelism when number of threads available are greater than or // equal to maximum number of reference frames allowed for global motion. if (sf->gm_sf.gm_search_type != GM_DISABLE_SEARCH &&
(cpi->mt_info.num_workers >=
gm_available_reference_frames[sf->gm_sf.gm_search_type]))
sf->gm_sf.prune_ref_frame_for_gm_search = 0; #endif
}
// This only applies to the real time mode. Adaptive gf refresh is disabled if // gf_cbr_boost_pct that is set by the user is larger than 0. if (cpi->oxcf.rc_cfg.gf_cbr_boost_pct > 0)
sf->rt_sf.gf_refresh_based_on_qp = 0;
}
if (speed <= 2) { if (!is_stat_generation_stage(cpi)) { // Use faster full-pel motion search for high quantizers. // Also use reduced total search range for low resolutions at high // quantizers. constint aggr = speed; constint qindex_thresh1 = ms_qindex_thresh[aggr][is_720p_or_larger][0]; constint qindex_thresh2 = ms_qindex_thresh[aggr][is_720p_or_larger][1]; const SEARCH_METHODS search_method =
motion_search_method[is_720p_or_larger]; if (cm->quant_params.base_qindex > qindex_thresh1) {
sf->mv_sf.search_method = search_method;
sf->tpl_sf.search_method = search_method;
} elseif (cm->quant_params.base_qindex > qindex_thresh2) {
sf->mv_sf.search_method = NSTEP_8PT;
}
}
}
if (speed >= 4) { // Disable LR search at low and high quantizers and enable only for // mid-quantizer range. if (!boosted && !is_arf2_bwd_type) { constint qindex_low[2] = { 100, 60 }; constint qindex_high[2] = { 180, 160 }; if (cm->quant_params.base_qindex <= qindex_low[is_720p_or_larger] ||
cm->quant_params.base_qindex > qindex_high[is_720p_or_larger]) {
sf->lpf_sf.disable_loop_restoration_luma = 1;
}
}
}
if (speed == 1) { // Reuse interinter wedge mask search from first search for non-boosted // non-internal-arf frames, except at very high quantizers. if (cm->quant_params.base_qindex <= 200) { if (!boosted && !is_arf2_bwd_type)
sf->inter_sf.reuse_mask_search_results = 1;
}
}
if (speed == 5) { if (!(frame_is_intra_only(&cpi->common) ||
cm->features.allow_screen_content_tools)) { constint qindex[2] = { 256, 128 }; // Set the sf value as 3 for low resolution and // for higher resolutions with low quantizers. if (cm->quant_params.base_qindex < qindex[is_480p_or_larger])
sf->tx_sf.tx_type_search.winner_mode_tx_type_pruning = 3;
}
}
if (speed >= 5) { // Disable the sf for low quantizers in case of low resolution screen // contents. if (cm->features.allow_screen_content_tools &&
cm->quant_params.base_qindex < 128 && is_480p_or_lesser) {
sf->part_sf.prune_sub_8x8_partition_level = 0;
}
}
// Loop restoration size search // At speed 0, always search all available sizes for the maximum possible gain
sf->lpf_sf.min_lr_unit_size = RESTORATION_PROC_UNIT_SIZE;
sf->lpf_sf.max_lr_unit_size = RESTORATION_UNITSIZE_MAX;
if (speed >= 1) { // For large frames, small restoration units are almost never useful, // so prune them away if (is_1440p_or_larger) {
sf->lpf_sf.min_lr_unit_size = RESTORATION_UNITSIZE_MAX;
} elseif (is_720p_or_larger) {
sf->lpf_sf.min_lr_unit_size = RESTORATION_UNITSIZE_MAX >> 1;
}
}
if (speed >= 3 || (cpi->oxcf.mode == ALLINTRA && speed >= 1)) { // At this speed, a full search is too expensive. Instead, pick a single // size based on size and qindex. Note that, in general, higher quantizers // (== lower quality) and larger frames generally want to use larger // restoration units. int qindex_thresh = 96; if (cm->quant_params.base_qindex <= qindex_thresh && !is_1440p_or_larger) {
sf->lpf_sf.min_lr_unit_size = RESTORATION_UNITSIZE_MAX >> 1;
sf->lpf_sf.max_lr_unit_size = RESTORATION_UNITSIZE_MAX >> 1;
} else {
sf->lpf_sf.min_lr_unit_size = RESTORATION_UNITSIZE_MAX;
sf->lpf_sf.max_lr_unit_size = RESTORATION_UNITSIZE_MAX;
}
}
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.