// SPDX-License-Identifier: MIT /* * Copyright 2022 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in * all copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR * OTHER DEALINGS IN THE SOFTWARE. * * Authors: AMD *
*/
if (fabric_bw_kbytes_sec < limiting_bw_kbytes_sec)
limiting_bw_kbytes_sec = fabric_bw_kbytes_sec;
if (sdp_bw_kbytes_sec < limiting_bw_kbytes_sec)
limiting_bw_kbytes_sec = sdp_bw_kbytes_sec;
return limiting_bw_kbytes_sec;
}
staticvoid dcn321_insert_entry_into_table_sorted(struct _vcs_dpi_voltage_scaling_st *table, unsignedint *num_entries, struct _vcs_dpi_voltage_scaling_st *entry)
{ int i = 0; int index = 0;
dc_assert_fp_enabled();
if (*num_entries == 0) {
table[0] = *entry;
(*num_entries)++;
} else { while (entry->net_bw_in_kbytes_sec > table[index].net_bw_in_kbytes_sec) {
index++; if (index >= *num_entries) break;
}
for (i = *num_entries; i > index; i--)
table[i] = table[i - 1];
if (start_index != end_index) { for (int j = start_index; j < end_index; j++) { for (int k = start_index; k < end_index; k++) { if (table[k].dcfclk_mhz > table[k+1].dcfclk_mhz)
swap_table_entries(&table[k], &table[k+1]);
}
}
}
start_index = 0;
end_index = 0;
}
}
/* * remove_inconsistent_entries - Ensure entries with the same bandwidth have MEMCLK and FCLK monotonically increasing * and remove entries that do not follow this order
*/ staticvoid remove_inconsistent_entries(struct _vcs_dpi_voltage_scaling_st *table, unsignedint *num_entries)
{ for (int i = 0; i < (*num_entries - 1); i++) { if (table[i].net_bw_in_kbytes_sec == table[i+1].net_bw_in_kbytes_sec) { if ((table[i].dram_speed_mts > table[i+1].dram_speed_mts) ||
(table[i].fabricclk_mhz > table[i+1].fabricclk_mhz))
remove_entry_from_table_at_index(table, num_entries, i);
}
}
}
/* * override_max_clk_values - Overwrite the max clock frequencies with the max DC mode timings * Input: * max_clk_limit - struct containing the desired clock timings * Output: * curr_clk_limit - struct containing the timings that need to be overwritten * Return: 0 upon success, non-zero for failure
*/ staticint override_max_clk_values(struct clk_limit_table_entry *max_clk_limit, struct clk_limit_table_entry *curr_clk_limit)
{ if (NULL == max_clk_limit || NULL == curr_clk_limit) return -1; //invalid parameters
//only overwrite if desired max clock frequency is initialized if (max_clk_limit->dcfclk_mhz != 0)
curr_clk_limit->dcfclk_mhz = max_clk_limit->dcfclk_mhz;
if (max_clk_limit->fclk_mhz != 0)
curr_clk_limit->fclk_mhz = max_clk_limit->fclk_mhz;
if (max_clk_limit->memclk_mhz != 0)
curr_clk_limit->memclk_mhz = max_clk_limit->memclk_mhz;
if (max_clk_limit->socclk_mhz != 0)
curr_clk_limit->socclk_mhz = max_clk_limit->socclk_mhz;
if (max_clk_limit->dtbclk_mhz != 0)
curr_clk_limit->dtbclk_mhz = max_clk_limit->dtbclk_mhz;
if (max_clk_limit->dispclk_mhz != 0)
curr_clk_limit->dispclk_mhz = max_clk_limit->dispclk_mhz;
for (i = 0; i < MAX_NUM_DPM_LVL; i++) { if (bw_params->clk_table.entries[i].dcfclk_mhz > max_clk_data.dcfclk_mhz)
max_clk_data.dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; if (bw_params->clk_table.entries[i].fclk_mhz > max_clk_data.fclk_mhz)
max_clk_data.fclk_mhz = bw_params->clk_table.entries[i].fclk_mhz; if (bw_params->clk_table.entries[i].memclk_mhz > max_clk_data.memclk_mhz)
max_clk_data.memclk_mhz = bw_params->clk_table.entries[i].memclk_mhz; if (bw_params->clk_table.entries[i].dispclk_mhz > max_clk_data.dispclk_mhz)
max_clk_data.dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; if (bw_params->clk_table.entries[i].dppclk_mhz > max_clk_data.dppclk_mhz)
max_clk_data.dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; if (bw_params->clk_table.entries[i].phyclk_mhz > max_clk_data.phyclk_mhz)
max_clk_data.phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz; if (bw_params->clk_table.entries[i].dtbclk_mhz > max_clk_data.dtbclk_mhz)
max_clk_data.dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
if (bw_params->clk_table.entries[i].memclk_mhz > 0) {
num_uclk_dpms++; if (bw_params->clk_table.entries[i].memclk_mhz <= bw_params->dc_mode_limit.memclk_mhz)
num_dc_uclk_dpms++;
} if (bw_params->clk_table.entries[i].fclk_mhz > 0) {
num_fclk_dpms++; if (bw_params->clk_table.entries[i].fclk_mhz <= bw_params->dc_mode_limit.fclk_mhz)
num_dc_fclk_dpms++;
} if (bw_params->clk_table.entries[i].dcfclk_mhz > 0) {
num_dcfclk_dpms++; if (bw_params->clk_table.entries[i].dcfclk_mhz <= bw_params->dc_mode_limit.dcfclk_mhz)
num_dc_dcfclk_dpms++;
}
}
if (!disable_dc_mode_overwrite) { //Overwrite max frequencies with max DC mode frequencies for DC mode systems
override_max_clk_values(&bw_params->dc_mode_limit, &max_clk_data);
num_uclk_dpms = num_dc_uclk_dpms;
num_fclk_dpms = num_dc_fclk_dpms;
num_dcfclk_dpms = num_dc_dcfclk_dpms;
bw_params->clk_table.num_entries_per_clk.num_memclk_levels = num_uclk_dpms;
bw_params->clk_table.num_entries_per_clk.num_fclk_levels = num_fclk_dpms;
}
// Insert all the DCFCLK STAs for (i = 0; i < num_dcfclk_stas; i++) {
entry.dcfclk_mhz = dcfclk_sta_targets[i];
entry.fabricclk_mhz = 0;
entry.dram_speed_mts = 0;
// At this point, the table contains all "points of interest" based on // DPMs from PMFW, and STAs. Table is sorted by BW, and all clock // ratios (by derate, are exact).
// Remove states that require higher clocks than are supported for (i = *num_entries - 1; i >= 0 ; i--) { if (table[i].dcfclk_mhz > max_clk_data.dcfclk_mhz ||
table[i].fabricclk_mhz > max_clk_data.fclk_mhz ||
table[i].dram_speed_mts > max_clk_data.memclk_mhz * 16)
remove_entry_from_table_at_index(table, num_entries, i);
}
// Insert entry with all max dc limits without bandwitch matching if (!disable_dc_mode_overwrite) { struct _vcs_dpi_voltage_scaling_st max_dc_limits_entry = entry;
// At this point, the table only contains supported points of interest // it could be used as is, but some states may be redundant due to // coarse grained nature of some clocks, so we want to round up to // coarse grained DPMs and remove duplicates.
// Round up UCLKs for (i = *num_entries - 1; i >= 0 ; i--) { for (j = 0; j < num_uclk_dpms; j++) { if (bw_params->clk_table.entries[j].memclk_mhz * 16 >= table[i].dram_speed_mts) {
table[i].dram_speed_mts = bw_params->clk_table.entries[j].memclk_mhz * 16; break;
}
}
}
// If FCLK is coarse grained, round up to next DPMs if (num_fclk_dpms > 2) { for (i = *num_entries - 1; i >= 0 ; i--) { for (j = 0; j < num_fclk_dpms; j++) { if (bw_params->clk_table.entries[j].fclk_mhz >= table[i].fabricclk_mhz) {
table[i].fabricclk_mhz = bw_params->clk_table.entries[j].fclk_mhz; break;
}
}
}
} // Otherwise, round up to minimum. else { for (i = *num_entries - 1; i >= 0 ; i--) { if (table[i].fabricclk_mhz < min_fclk_mhz) {
table[i].fabricclk_mhz = min_fclk_mhz;
}
}
}
// Round DCFCLKs up to minimum for (i = *num_entries - 1; i >= 0 ; i--) { if (table[i].dcfclk_mhz < min_dcfclk_mhz) {
table[i].dcfclk_mhz = min_dcfclk_mhz;
}
}
// Remove duplicate states, note duplicate states are always neighbouring since table is sorted.
i = 0; while (i < *num_entries - 1) { if (table[i].dcfclk_mhz == table[i + 1].dcfclk_mhz &&
table[i].fabricclk_mhz == table[i + 1].fabricclk_mhz &&
table[i].dram_speed_mts == table[i + 1].dram_speed_mts)
remove_entry_from_table_at_index(table, num_entries, i + 1); else
i++;
}
// Fix up the state indicies for (i = *num_entries - 1; i >= 0 ; i--) {
table[i].state = i;
}
/** dcn321_update_bw_bounding_box * This would override some dcn3_2 ip_or_soc initial parameters hardcoded from spreadsheet * with actual values as per dGPU SKU: * -with passed few options from dc->config * -with dentist_vco_frequency from Clk Mgr (currently hardcoded, but might need to get it from PM FW) * -with passed latency values (passed in ns units) in dc-> bb override for debugging purposes * -with passed latencies from VBIOS (in 100_ns units) if available for certain dGPU SKU * -with number of DRAM channels from VBIOS (which differ for certain dGPU SKU of the same ASIC) * -clocks levels with passed clk_table entries from Clk Mgr as reported by PM FW for different * clocks (which might differ for certain dGPU SKU of the same ASIC)
*/ void dcn321_update_bw_bounding_box_fpu(struct dc *dc, struct clk_bw_params *bw_params)
{
dc_assert_fp_enabled(); /* Overrides from dc->config options */
dcn3_21_ip.clamp_min_dcfclk = dc->config.clamp_min_dcfclk;
/* Override from passed dc->bb_overrides if available*/ if ((int)(dcn3_21_soc.sr_exit_time_us * 1000) != dc->bb_overrides.sr_exit_time_ns
&& dc->bb_overrides.sr_exit_time_ns) {
dc->dml2_options.bbox_overrides.sr_exit_latency_us =
dcn3_21_soc.sr_exit_time_us = dc->bb_overrides.sr_exit_time_ns / 1000.0;
}
for (i = 0; i < MAX_NUM_DPM_LVL; i++) { if (bw_params->clk_table.entries[i].dcfclk_mhz > max_dcfclk_mhz)
max_dcfclk_mhz = bw_params->clk_table.entries[i].dcfclk_mhz; if (bw_params->clk_table.entries[i].dispclk_mhz > max_dispclk_mhz)
max_dispclk_mhz = bw_params->clk_table.entries[i].dispclk_mhz; if (bw_params->clk_table.entries[i].dppclk_mhz > max_dppclk_mhz)
max_dppclk_mhz = bw_params->clk_table.entries[i].dppclk_mhz; if (bw_params->clk_table.entries[i].phyclk_mhz > max_phyclk_mhz)
max_phyclk_mhz = bw_params->clk_table.entries[i].phyclk_mhz;
} if (!max_dcfclk_mhz)
max_dcfclk_mhz = dcn3_21_soc.clock_limits[0].dcfclk_mhz; if (!max_dispclk_mhz)
max_dispclk_mhz = dcn3_21_soc.clock_limits[0].dispclk_mhz; if (!max_dppclk_mhz)
max_dppclk_mhz = dcn3_21_soc.clock_limits[0].dppclk_mhz; if (!max_phyclk_mhz)
max_phyclk_mhz = dcn3_21_soc.clock_limits[0].phyclk_mhz;
if (max_dcfclk_mhz > dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { // If max DCFCLK is greater than the max DCFCLK STA target, insert into the DCFCLK STA target array
dcfclk_sta_targets[num_dcfclk_sta_targets] = max_dcfclk_mhz;
num_dcfclk_sta_targets++;
} elseif (max_dcfclk_mhz < dcfclk_sta_targets[num_dcfclk_sta_targets-1]) { // If max DCFCLK is less than the max DCFCLK STA target, cap values and remove duplicates for (i = 0; i < num_dcfclk_sta_targets; i++) { if (dcfclk_sta_targets[i] > max_dcfclk_mhz) {
dcfclk_sta_targets[i] = max_dcfclk_mhz; break;
}
} // Update size of array since we "removed" duplicates
num_dcfclk_sta_targets = i + 1;
}
/* bw_params->clk_table.entries[MAX_NUM_DPM_LVL]. * MAX_NUM_DPM_LVL is 8. * dcn3_02_soc.clock_limits[DC__VOLTAGE_STATES]. * DC__VOLTAGE_STATES is 40.
*/ if (num_states > MAX_NUM_DPM_LVL) {
ASSERT(0); return;
}
dcn3_21_soc.num_states = num_states; for (i = 0; i < dcn3_21_soc.num_states; i++) {
dcn3_21_soc.clock_limits[i].state = i;
dcn3_21_soc.clock_limits[i].dcfclk_mhz = dcfclk_mhz[i];
dcn3_21_soc.clock_limits[i].fabricclk_mhz = dcfclk_mhz[i];
/* Fill all states with max values of all these clocks */
dcn3_21_soc.clock_limits[i].dispclk_mhz = max_dispclk_mhz;
dcn3_21_soc.clock_limits[i].dppclk_mhz = max_dppclk_mhz;
dcn3_21_soc.clock_limits[i].phyclk_mhz = max_phyclk_mhz;
dcn3_21_soc.clock_limits[i].dscclk_mhz = max_dispclk_mhz / 3;
/* Populate from bw_params for DTBCLK, SOCCLK */ if (i > 0) { if (!bw_params->clk_table.entries[i].dtbclk_mhz) {
dcn3_21_soc.clock_limits[i].dtbclk_mhz = dcn3_21_soc.clock_limits[i-1].dtbclk_mhz;
} else {
dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
}
} elseif (bw_params->clk_table.entries[i].dtbclk_mhz) {
dcn3_21_soc.clock_limits[i].dtbclk_mhz = bw_params->clk_table.entries[i].dtbclk_mhz;
}
if (!bw_params->clk_table.entries[i].socclk_mhz && i > 0)
dcn3_21_soc.clock_limits[i].socclk_mhz = dcn3_21_soc.clock_limits[i-1].socclk_mhz; else
dcn3_21_soc.clock_limits[i].socclk_mhz = bw_params->clk_table.entries[i].socclk_mhz;
if (!dram_speed_mts[i] && i > 0)
dcn3_21_soc.clock_limits[i].dram_speed_mts = dcn3_21_soc.clock_limits[i-1].dram_speed_mts; else
dcn3_21_soc.clock_limits[i].dram_speed_mts = dram_speed_mts[i];
/* These clocks cannot come from bw_params, always fill from dcn3_21_soc[0] */ /* PHYCLK_D18, PHYCLK_D32 */
dcn3_21_soc.clock_limits[i].phyclk_d18_mhz = dcn3_21_soc.clock_limits[0].phyclk_d18_mhz;
dcn3_21_soc.clock_limits[i].phyclk_d32_mhz = dcn3_21_soc.clock_limits[0].phyclk_d32_mhz;
}
} else {
build_synthetic_soc_states(dc->debug.disable_dc_mode_overwrite, bw_params,
dcn3_21_soc.clock_limits, &dcn3_21_soc.num_states);
}
/* Re-init DML with updated bb */
dml_init_instance(&dc->dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32); if (dc->current_state)
dml_init_instance(&dc->current_state->bw_ctx.dml, &dcn3_21_soc, &dcn3_21_ip, DML_PROJECT_DCN32);
if (dc->clk_mgr->bw_params->clk_table.num_entries > 1) { unsignedint i = 0;
for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dcfclk_levels; i++) { if (dc->clk_mgr->bw_params->clk_table.entries[i].dcfclk_mhz)
dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dcfclk_mhz =
dc->clk_mgr->bw_params->clk_table.entries[i].dcfclk_mhz;
}
for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_fclk_levels; i++) { if (dc->clk_mgr->bw_params->clk_table.entries[i].fclk_mhz)
dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].fclk_mhz =
dc->clk_mgr->bw_params->clk_table.entries[i].fclk_mhz;
}
for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_memclk_levels; i++) { if (dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz)
dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].memclk_mhz =
dc->clk_mgr->bw_params->clk_table.entries[i].memclk_mhz;
}
for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_socclk_levels; i++) { if (dc->clk_mgr->bw_params->clk_table.entries[i].socclk_mhz)
dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].socclk_mhz =
dc->clk_mgr->bw_params->clk_table.entries[i].socclk_mhz;
}
for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dtbclk_levels; i++) { if (dc->clk_mgr->bw_params->clk_table.entries[i].dtbclk_mhz)
dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dtbclk_mhz =
dc->clk_mgr->bw_params->clk_table.entries[i].dtbclk_mhz;
}
for (i = 0; i < dc->clk_mgr->bw_params->clk_table.num_entries_per_clk.num_dispclk_levels; i++) { if (dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz) {
dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dispclk_mhz =
dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz;
dc->dml2_options.bbox_overrides.clks_table.clk_entries[i].dppclk_mhz =
dc->clk_mgr->bw_params->clk_table.entries[i].dispclk_mhz;
}
}
}
}
Messung V0.5
¤ Dauer der Verarbeitung: 0.16 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.