Anforderungen  |   Konzepte  |   Entwurf  |   Entwicklung  |   Qualitätssicherung  |   Lebenszyklus  |   Steuerung
 
 
 
 


Quelle  display_mode_core.c   Sprache: C

 
/* SPDX-License-Identifier: MIT */
/*
 * Copyright 2023 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: AMD
 *
 */


#include "display_mode_core.h"
#include "display_mode_util.h"
#include "display_mode_lib_defines.h"

#include "dml_assert.h"

#define DML2_MAX_FMT_420_BUFFER_WIDTH 4096
#define TB_BORROWED_MAX 400
#define DML_MAX_VSTARTUP_START 1023

// ---------------------------
//  Declaration Begins
// ---------------------------
static void CalculateBytePerPixelAndBlockSizes(
 enum dml_source_format_class SourcePixelFormat,
 enum dml_swizzle_mode SurfaceTiling,
 // Output
 dml_uint_t *BytePerPixelY,
 dml_uint_t *BytePerPixelC,
 dml_float_t *BytePerPixelDETY,
 dml_float_t *BytePerPixelDETC,
 dml_uint_t *BlockHeight256BytesY,
 dml_uint_t *BlockHeight256BytesC,
 dml_uint_t *BlockWidth256BytesY,
 dml_uint_t *BlockWidth256BytesC,
 dml_uint_t *MacroTileHeightY,
 dml_uint_t *MacroTileHeightC,
 dml_uint_t *MacroTileWidthY,
 dml_uint_t *MacroTileWidthC);

static dml_float_t CalculateWriteBackDISPCLK(
 enum dml_source_format_class WritebackPixelFormat,
 dml_float_t PixelClock,
 dml_float_t WritebackHRatio,
 dml_float_t WritebackVRatio,
 dml_uint_t WritebackHTaps,
 dml_uint_t WritebackVTaps,
 dml_uint_t WritebackSourceWidth,
 dml_uint_t WritebackDestinationWidth,
 dml_uint_t HTotal,
 dml_uint_t WritebackLineBufferSize,
 dml_float_t DISPCLKDPPCLKVCOSpeed);

static void CalculateVMRowAndSwath(
 struct display_mode_lib_scratch_st *s,
 struct CalculateVMRowAndSwath_params_st *p);

static void CalculateOutputLink(
 dml_float_t PHYCLKPerState,
 dml_float_t PHYCLKD18PerState,
 dml_float_t PHYCLKD32PerState,
 dml_float_t Downspreading,
 dml_bool_t IsMainSurfaceUsingTheIndicatedTiming,
 enum dml_output_encoder_class Output,
 enum dml_output_format_class OutputFormat,
 dml_uint_t HTotal,
 dml_uint_t HActive,
 dml_float_t PixelClockBackEnd,
 dml_float_t ForcedOutputLinkBPP,
 dml_uint_t DSCInputBitPerComponent,
 dml_uint_t NumberOfDSCSlices,
 dml_float_t AudioSampleRate,
 dml_uint_t AudioSampleLayout,
 enum dml_odm_mode ODMModeNoDSC,
 enum dml_odm_mode ODMModeDSC,
 enum dml_dsc_enable DSCEnable,
 dml_uint_t OutputLinkDPLanes,
 enum dml_output_link_dp_rate OutputLinkDPRate,

 // Output
 dml_bool_t *RequiresDSC,
 dml_bool_t *RequiresFEC,
 dml_float_t *OutBpp,
 enum dml_output_type_and_rate__type *OutputType,
 enum dml_output_type_and_rate__rate *OutputRate,
 dml_uint_t *RequiredSlots);

static void CalculateODMMode(
 dml_uint_t MaximumPixelsPerLinePerDSCUnit,
 dml_uint_t HActive,
 enum dml_output_encoder_class Output,
 enum dml_output_format_class OutputFormat,
 enum dml_odm_use_policy ODMUse,
 dml_float_t StateDispclk,
 dml_float_t MaxDispclk,
 dml_bool_t DSCEnable,
 dml_uint_t TotalNumberOfActiveDPP,
 dml_uint_t MaxNumDPP,
 dml_float_t PixelClock,
 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
 dml_float_t DISPCLKRampingMargin,
 dml_float_t DISPCLKDPPCLKVCOSpeed,
 dml_uint_t NumberOfDSCSlices,

 // Output
 dml_bool_t *TotalAvailablePipesSupport,
 dml_uint_t *NumberOfDPP,
 enum dml_odm_mode *ODMMode,
 dml_float_t *RequiredDISPCLKPerSurface);

static dml_float_t CalculateRequiredDispclk(
 enum dml_odm_mode ODMMode,
 dml_float_t PixelClock,
 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
 dml_float_t DISPCLKRampingMargin,
 dml_float_t DISPCLKDPPCLKVCOSpeed,
 dml_float_t MaxDispclkSingle);

static void CalculateSinglePipeDPPCLKAndSCLThroughput(
 dml_float_t HRatio,
 dml_float_t HRatioChroma,
 dml_float_t VRatio,
 dml_float_t VRatioChroma,
 dml_float_t MaxDCHUBToPSCLThroughput,
 dml_float_t MaxPSCLToLBThroughput,
 dml_float_t PixelClock,
 enum dml_source_format_class SourcePixelFormat,
 dml_uint_t HTaps,
 dml_uint_t HTapsChroma,
 dml_uint_t VTaps,
 dml_uint_t VTapsChroma,

 // Output
 dml_float_t *PSCL_THROUGHPUT,
 dml_float_t *PSCL_THROUGHPUT_CHROMA,
 dml_float_t *DPPCLKUsingSingleDPP);

static void CalculateDPPCLK(
 dml_uint_t NumberOfActiveSurfaces,
 dml_float_t DISPCLKDPPCLKDSCCLKDownSpreading,
 dml_float_t DISPCLKDPPCLKVCOSpeed,
 dml_float_t DPPCLKUsingSingleDPP[],
 dml_uint_t DPPPerSurface[],

 // Output
 dml_float_t *GlobalDPPCLK,
 dml_float_t Dppclk[]);

static void CalculateMALLUseForStaticScreen(
 dml_uint_t NumberOfActiveSurfaces,
 dml_uint_t MALLAllocatedForDCNFinal,
 enum dml_use_mall_for_static_screen_mode *UseMALLForStaticScreen,
 dml_uint_t SurfaceSizeInMALL[],
 dml_bool_t one_row_per_frame_fits_in_buffer[],

 // Output
 dml_bool_t UsesMALLForStaticScreen[]);

static dml_uint_t dscceComputeDelay(
 dml_uint_t bpc,
 dml_float_t BPP,
 dml_uint_t sliceWidth,
 dml_uint_t numSlices,
 enum dml_output_format_class pixelFormat,
 enum dml_output_encoder_class Output);

static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat,
 enum dml_output_encoder_class Output);

static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch,
 struct CalculatePrefetchSchedule_params_st *p);

static dml_float_t RoundToDFSGranularity(dml_float_t Clock, dml_bool_t round_up, dml_float_t VCOSpeed);

static void CalculateDCCConfiguration(
 dml_bool_t DCCEnabled,
 dml_bool_t DCCProgrammingAssumesScanDirectionUnknown,
 enum dml_source_format_class SourcePixelFormat,
 dml_uint_t SurfaceWidthLuma,
 dml_uint_t SurfaceWidthChroma,
 dml_uint_t SurfaceHeightLuma,
 dml_uint_t SurfaceHeightChroma,
 dml_uint_t nomDETInKByte,
 dml_uint_t RequestHeight256ByteLuma,
 dml_uint_t RequestHeight256ByteChroma,
 enum dml_swizzle_mode TilingFormat,
 dml_uint_t BytePerPixelY,
 dml_uint_t BytePerPixelC,
 dml_float_t BytePerPixelDETY,
 dml_float_t BytePerPixelDETC,
 enum dml_rotation_angle SourceScan,
 // Output
 dml_uint_t *MaxUncompressedBlockLuma,
 dml_uint_t *MaxUncompressedBlockChroma,
 dml_uint_t *MaxCompressedBlockLuma,
 dml_uint_t *MaxCompressedBlockChroma,
 dml_uint_t *IndependentBlockLuma,
 dml_uint_t *IndependentBlockChroma);

static dml_uint_t CalculatePrefetchSourceLines(
 dml_float_t VRatio,
 dml_uint_t VTaps,
 dml_bool_t Interlace,
 dml_bool_t ProgressiveToInterlaceUnitInOPP,
 dml_uint_t SwathHeight,
 enum dml_rotation_angle SourceScan,
 dml_bool_t ViewportStationary,
 dml_uint_t SwathWidth,
 dml_uint_t ViewportHeight,
 dml_uint_t ViewportXStart,
 dml_uint_t ViewportYStart,

 // Output
 dml_uint_t *VInitPreFill,
 dml_uint_t *MaxNumSwath);

static dml_uint_t CalculateVMAndRowBytes(
 dml_bool_t ViewportStationary,
 dml_bool_t DCCEnable,
 dml_uint_t NumberOfDPPs,
 dml_uint_t BlockHeight256Bytes,
 dml_uint_t BlockWidth256Bytes,
 enum dml_source_format_class SourcePixelFormat,
 dml_uint_t SurfaceTiling,
 dml_uint_t BytePerPixel,
 enum dml_rotation_angle SourceScan,
 dml_uint_t SwathWidth,
 dml_uint_t ViewportHeight,
 dml_uint_t ViewportXStart,
 dml_uint_t ViewportYStart,
 dml_bool_t GPUVMEnable,
 dml_uint_t GPUVMMaxPageTableLevels,
 dml_uint_t GPUVMMinPageSizeKBytes,
 dml_uint_t PTEBufferSizeInRequests,
 dml_uint_t Pitch,
 dml_uint_t DCCMetaPitch,
 dml_uint_t MacroTileWidth,
 dml_uint_t MacroTileHeight,

 // Output
 dml_uint_t *MetaRowByte,
 dml_uint_t *PixelPTEBytesPerRow,
 dml_uint_t *PixelPTEBytesPerRowStorage, // for PTE buffer size check
 dml_uint_t *dpte_row_width_ub,
 dml_uint_t *dpte_row_height,
 dml_uint_t *dpte_row_height_linear,
 dml_uint_t *PixelPTEBytesPerRow_one_row_per_frame,
 dml_uint_t *dpte_row_width_ub_one_row_per_frame,
 dml_uint_t *dpte_row_height_one_row_per_frame,
 dml_uint_t *MetaRequestWidth,
 dml_uint_t *MetaRequestHeight,
 dml_uint_t *meta_row_width,
 dml_uint_t *meta_row_height,
 dml_uint_t *PixelPTEReqWidth,
 dml_uint_t *PixelPTEReqHeight,
 dml_uint_t *PTERequestSize,
 dml_uint_t *DPDE0BytesFrame,
 dml_uint_t *MetaPTEBytesFrame);

static dml_float_t CalculateTWait(
 dml_uint_t PrefetchMode,
 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
 dml_bool_t SynchronizeDRRDisplaysForUCLKPStateChangeFinal,
 dml_bool_t DRRDisplay,
 dml_float_t DRAMClockChangeLatency,
 dml_float_t FCLKChangeLatency,
 dml_float_t UrgentLatency,
 dml_float_t SREnterPlusExitTime);

static void CalculatePrefetchMode(
 enum dml_prefetch_modes AllowForPStateChangeOrStutterInVBlank,
 dml_uint_t *MinPrefetchMode,
 dml_uint_t *MaxPrefetchMode);

static void CalculateRowBandwidth(
 dml_bool_t GPUVMEnable,
 enum dml_source_format_class SourcePixelFormat,
 dml_float_t VRatio,
 dml_float_t VRatioChroma,
 dml_bool_t DCCEnable,
 dml_float_t LineTime,
 dml_uint_t MetaRowByteLuma,
 dml_uint_t MetaRowByteChroma,
 dml_uint_t meta_row_height_luma,
 dml_uint_t meta_row_height_chroma,
 dml_uint_t PixelPTEBytesPerRowLuma,
 dml_uint_t PixelPTEBytesPerRowChroma,
 dml_uint_t dpte_row_height_luma,
 dml_uint_t dpte_row_height_chroma,
 // Output
 dml_float_t *meta_row_bw,
 dml_float_t *dpte_row_bw);

static void CalculateFlipSchedule(
 dml_float_t HostVMInefficiencyFactor,
 dml_float_t UrgentExtraLatency,
 dml_float_t UrgentLatency,
 dml_uint_t GPUVMMaxPageTableLevels,
 dml_bool_t HostVMEnable,
 dml_uint_t HostVMMaxNonCachedPageTableLevels,
 dml_bool_t GPUVMEnable,
 dml_uint_t HostVMMinPageSize,
 dml_float_t PDEAndMetaPTEBytesPerFrame,
 dml_float_t MetaRowBytes,
 dml_float_t DPTEBytesPerRow,
 dml_float_t BandwidthAvailableForImmediateFlip,
 dml_uint_t TotImmediateFlipBytes,
 enum dml_source_format_class SourcePixelFormat,
 dml_float_t LineTime,
 dml_float_t VRatio,
 dml_float_t VRatioChroma,
 dml_float_t Tno_bw,
 dml_bool_t DCCEnable,
 dml_uint_t dpte_row_height,
 dml_uint_t meta_row_height,
 dml_uint_t dpte_row_height_chroma,
 dml_uint_t meta_row_height_chroma,
 dml_bool_t use_one_row_for_frame_flip,

 // Output
 dml_float_t *DestinationLinesToRequestVMInImmediateFlip,
 dml_float_t *DestinationLinesToRequestRowInImmediateFlip,
 dml_float_t *final_flip_bw,
 dml_bool_t *ImmediateFlipSupportedForPipe);

static dml_float_t CalculateWriteBackDelay(
 enum dml_source_format_class WritebackPixelFormat,
 dml_float_t WritebackHRatio,
 dml_float_t WritebackVRatio,
 dml_uint_t WritebackVTaps,
 dml_uint_t WritebackDestinationWidth,
 dml_uint_t WritebackDestinationHeight,
 dml_uint_t WritebackSourceHeight,
 dml_uint_t HTotal);

static void CalculateVUpdateAndDynamicMetadataParameters(
 dml_uint_t MaxInterDCNTileRepeaters,
 dml_float_t Dppclk,
 dml_float_t DISPCLK,
 dml_float_t DCFClkDeepSleep,
 dml_float_t PixelClock,
 dml_uint_t HTotal,
 dml_uint_t VBlank,
 dml_uint_t DynamicMetadataTransmittedBytes,
 dml_uint_t DynamicMetadataLinesBeforeActiveRequired,
 dml_uint_t InterlaceEnable,
 dml_bool_t ProgressiveToInterlaceUnitInOPP,
 dml_float_t *TSetup,
 dml_float_t *Tdmbf,
 dml_float_t *Tdmec,
 dml_float_t *Tdmsks,
 dml_uint_t *VUpdateOffsetPix,
 dml_uint_t *VUpdateWidthPix,
 dml_uint_t *VReadyOffsetPix);

static void PixelClockAdjustmentForProgressiveToInterlaceUnit(struct dml_display_cfg_st *display_cfg, dml_bool_t ptoi_supported);

static dml_float_t TruncToValidBPP(
 dml_float_t LinkBitRate,
 dml_uint_t Lanes,
 dml_uint_t HTotal,
 dml_uint_t HActive,
 dml_float_t PixelClock,
 dml_float_t DesiredBPP,
 dml_bool_t DSCEnable,
 enum dml_output_encoder_class Output,
 enum dml_output_format_class Format,
 dml_uint_t DSCInputBitPerComponent,
 dml_uint_t DSCSlices,
 dml_uint_t AudioRate,
 dml_uint_t AudioLayout,
 enum dml_odm_mode ODMModeNoDSC,
 enum dml_odm_mode ODMModeDSC,
 // Output
 dml_uint_t *RequiredSlotsSingle);

static void CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport(
 struct display_mode_lib_scratch_st *s,
 struct CalculateWatermarksMALLUseAndDRAMSpeedChangeSupport_params_st *p);

static void CalculateDCFCLKDeepSleep(
 dml_uint_t NumberOfActiveSurfaces,
 dml_uint_t BytePerPixelY[],
 dml_uint_t BytePerPixelC[],
 dml_float_t VRatio[],
 dml_float_t VRatioChroma[],
 dml_uint_t SwathWidthY[],
 dml_uint_t SwathWidthC[],
 dml_uint_t DPPPerSurface[],
 dml_float_t HRatio[],
 dml_float_t HRatioChroma[],
 dml_float_t PixelClock[],
 dml_float_t PSCL_THROUGHPUT[],
 dml_float_t PSCL_THROUGHPUT_CHROMA[],
 dml_float_t Dppclk[],
 dml_float_t ReadBandwidthLuma[],
 dml_float_t ReadBandwidthChroma[],
 dml_uint_t ReturnBusWidth,

 // Output
 dml_float_t *DCFCLKDeepSleep);

static void CalculateUrgentBurstFactor(
 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange,
 dml_uint_t swath_width_luma_ub,
 dml_uint_t swath_width_chroma_ub,
 dml_uint_t SwathHeightY,
 dml_uint_t SwathHeightC,
 dml_float_t LineTime,
 dml_float_t UrgentLatency,
 dml_float_t CursorBufferSize,
 dml_uint_t CursorWidth,
 dml_uint_t CursorBPP,
 dml_float_t VRatio,
 dml_float_t VRatioC,
 dml_float_t BytePerPixelInDETY,
 dml_float_t BytePerPixelInDETC,
 dml_uint_t DETBufferSizeY,
 dml_uint_t DETBufferSizeC,
 // Output
 dml_float_t *UrgentBurstFactorCursor,
 dml_float_t *UrgentBurstFactorLuma,
 dml_float_t *UrgentBurstFactorChroma,
 dml_bool_t *NotEnoughUrgentLatencyHiding);

static dml_float_t RequiredDTBCLK(
 dml_bool_t DSCEnable,
 dml_float_t PixelClock,
 enum dml_output_format_class OutputFormat,
 dml_float_t OutputBpp,
 dml_uint_t DSCSlices,
 dml_uint_t HTotal,
 dml_uint_t HActive,
 dml_uint_t AudioRate,
 dml_uint_t AudioLayoutSingle);

static void UseMinimumDCFCLK(
 struct display_mode_lib_scratch_st *scratch,
 struct UseMinimumDCFCLK_params_st *p);

static void CalculatePixelDeliveryTimes(
 dml_uint_t NumberOfActiveSurfaces,
 dml_float_t VRatio[],
 dml_float_t VRatioChroma[],
 dml_float_t VRatioPrefetchY[],
 dml_float_t VRatioPrefetchC[],
 dml_uint_t swath_width_luma_ub[],
 dml_uint_t swath_width_chroma_ub[],
 dml_uint_t DPPPerSurface[],
 dml_float_t HRatio[],
 dml_float_t HRatioChroma[],
 dml_float_t PixelClock[],
 dml_float_t PSCL_THROUGHPUT[],
 dml_float_t PSCL_THROUGHPUT_CHROMA[],
 dml_float_t Dppclk[],
 dml_uint_t BytePerPixelC[],
 enum dml_rotation_angle SourceScan[],
 dml_uint_t NumberOfCursors[],
 dml_uint_t CursorWidth[],
 dml_uint_t CursorBPP[],
 dml_uint_t BlockWidth256BytesY[],
 dml_uint_t BlockHeight256BytesY[],
 dml_uint_t BlockWidth256BytesC[],
 dml_uint_t BlockHeight256BytesC[],

 // Output
 dml_float_t DisplayPipeLineDeliveryTimeLuma[],
 dml_float_t DisplayPipeLineDeliveryTimeChroma[],
 dml_float_t DisplayPipeLineDeliveryTimeLumaPrefetch[],
 dml_float_t DisplayPipeLineDeliveryTimeChromaPrefetch[],
 dml_float_t DisplayPipeRequestDeliveryTimeLuma[],
 dml_float_t DisplayPipeRequestDeliveryTimeChroma[],
 dml_float_t DisplayPipeRequestDeliveryTimeLumaPrefetch[],
 dml_float_t DisplayPipeRequestDeliveryTimeChromaPrefetch[],
 dml_float_t CursorRequestDeliveryTime[],
 dml_float_t CursorRequestDeliveryTimePrefetch[]);

static void CalculateMetaAndPTETimes(
 dml_bool_t use_one_row_for_frame[],
 dml_uint_t NumberOfActiveSurfaces,
 dml_bool_t GPUVMEnable,
 dml_uint_t MetaChunkSize,
 dml_uint_t MinMetaChunkSizeBytes,
 dml_uint_t HTotal[],
 dml_float_t VRatio[],
 dml_float_t VRatioChroma[],
 dml_float_t DestinationLinesToRequestRowInVBlank[],
 dml_float_t DestinationLinesToRequestRowInImmediateFlip[],
 dml_bool_t DCCEnable[],
 dml_float_t PixelClock[],
 dml_uint_t BytePerPixelY[],
 dml_uint_t BytePerPixelC[],
 enum dml_rotation_angle SourceScan[],
 dml_uint_t dpte_row_height[],
 dml_uint_t dpte_row_height_chroma[],
 dml_uint_t meta_row_width[],
 dml_uint_t meta_row_width_chroma[],
 dml_uint_t meta_row_height[],
 dml_uint_t meta_row_height_chroma[],
 dml_uint_t meta_req_width[],
 dml_uint_t meta_req_width_chroma[],
 dml_uint_t meta_req_height[],
 dml_uint_t meta_req_height_chroma[],
 dml_uint_t dpte_group_bytes[],
 dml_uint_t PTERequestSizeY[],
 dml_uint_t PTERequestSizeC[],
 dml_uint_t PixelPTEReqWidthY[],
 dml_uint_t PixelPTEReqHeightY[],
 dml_uint_t PixelPTEReqWidthC[],
 dml_uint_t PixelPTEReqHeightC[],
 dml_uint_t dpte_row_width_luma_ub[],
 dml_uint_t dpte_row_width_chroma_ub[],

 // Output
 dml_float_t DST_Y_PER_PTE_ROW_NOM_L[],
 dml_float_t DST_Y_PER_PTE_ROW_NOM_C[],
 dml_float_t DST_Y_PER_META_ROW_NOM_L[],
 dml_float_t DST_Y_PER_META_ROW_NOM_C[],
 dml_float_t TimePerMetaChunkNominal[],
 dml_float_t TimePerChromaMetaChunkNominal[],
 dml_float_t TimePerMetaChunkVBlank[],
 dml_float_t TimePerChromaMetaChunkVBlank[],
 dml_float_t TimePerMetaChunkFlip[],
 dml_float_t TimePerChromaMetaChunkFlip[],
 dml_float_t time_per_pte_group_nom_luma[],
 dml_float_t time_per_pte_group_vblank_luma[],
 dml_float_t time_per_pte_group_flip_luma[],
 dml_float_t time_per_pte_group_nom_chroma[],
 dml_float_t time_per_pte_group_vblank_chroma[],
 dml_float_t time_per_pte_group_flip_chroma[]);

static void CalculateVMGroupAndRequestTimes(
 dml_uint_t NumberOfActiveSurfaces,
 dml_bool_t GPUVMEnable,
 dml_uint_t GPUVMMaxPageTableLevels,
 dml_uint_t HTotal[],
 dml_uint_t BytePerPixelC[],
 dml_float_t DestinationLinesToRequestVMInVBlank[],
 dml_float_t DestinationLinesToRequestVMInImmediateFlip[],
 dml_bool_t DCCEnable[],
 dml_float_t PixelClock[],
 dml_uint_t dpte_row_width_luma_ub[],
 dml_uint_t dpte_row_width_chroma_ub[],
 dml_uint_t vm_group_bytes[],
 dml_uint_t dpde0_bytes_per_frame_ub_l[],
 dml_uint_t dpde0_bytes_per_frame_ub_c[],
 dml_uint_t meta_pte_bytes_per_frame_ub_l[],
 dml_uint_t meta_pte_bytes_per_frame_ub_c[],

 // Output
 dml_float_t TimePerVMGroupVBlank[],
 dml_float_t TimePerVMGroupFlip[],
 dml_float_t TimePerVMRequestVBlank[],
 dml_float_t TimePerVMRequestFlip[]);

static void CalculateStutterEfficiency(
 struct display_mode_lib_scratch_st *scratch,
 struct CalculateStutterEfficiency_params_st *p);

static void CalculateSwathAndDETConfiguration(
 struct display_mode_lib_scratch_st *scratch,
 struct CalculateSwathAndDETConfiguration_params_st *p);

static void CalculateSwathWidth(
 dml_bool_t ForceSingleDPP,
 dml_uint_t NumberOfActiveSurfaces,
 enum dml_source_format_class SourcePixelFormat[],
 enum dml_rotation_angle SourceScan[],
 dml_bool_t ViewportStationary[],
 dml_uint_t ViewportWidth[],
 dml_uint_t ViewportHeight[],
 dml_uint_t ViewportXStart[],
 dml_uint_t ViewportYStart[],
 dml_uint_t ViewportXStartC[],
 dml_uint_t ViewportYStartC[],
 dml_uint_t SurfaceWidthY[],
 dml_uint_t SurfaceWidthC[],
 dml_uint_t SurfaceHeightY[],
 dml_uint_t SurfaceHeightC[],
 enum dml_odm_mode ODMMode[],
 dml_uint_t BytePerPixY[],
 dml_uint_t BytePerPixC[],
 dml_uint_t Read256BytesBlockHeightY[],
 dml_uint_t Read256BytesBlockHeightC[],
 dml_uint_t Read256BytesBlockWidthY[],
 dml_uint_t Read256BytesBlockWidthC[],
 dml_uint_t BlendingAndTiming[],
 dml_uint_t HActive[],
 dml_float_t HRatio[],
 dml_uint_t DPPPerSurface[],

 // Output
 dml_uint_t SwathWidthSingleDPPY[],
 dml_uint_t SwathWidthSingleDPPC[],
 dml_uint_t SwathWidthY[],
 dml_uint_t SwathWidthC[],
 dml_uint_t MaximumSwathHeightY[],
 dml_uint_t MaximumSwathHeightC[],
 dml_uint_t swath_width_luma_ub[],
 dml_uint_t swath_width_chroma_ub[]);

static dml_float_t CalculateExtraLatency(
 dml_uint_t RoundTripPingLatencyCycles,
 dml_uint_t ReorderingBytes,
 dml_float_t DCFCLK,
 dml_uint_t TotalNumberOfActiveDPP,
 dml_uint_t PixelChunkSizeInKByte,
 dml_uint_t TotalNumberOfDCCActiveDPP,
 dml_uint_t MetaChunkSize,
 dml_float_t ReturnBW,
 dml_bool_t GPUVMEnable,
 dml_bool_t HostVMEnable,
 dml_uint_t NumberOfActiveSurfaces,
 dml_uint_t NumberOfDPP[],
 dml_uint_t dpte_group_bytes[],
 dml_float_t HostVMInefficiencyFactor,
 dml_uint_t HostVMMinPageSize,
 dml_uint_t HostVMMaxNonCachedPageTableLevels);

static dml_uint_t CalculateExtraLatencyBytes(
 dml_uint_t ReorderingBytes,
 dml_uint_t TotalNumberOfActiveDPP,
 dml_uint_t PixelChunkSizeInKByte,
 dml_uint_t TotalNumberOfDCCActiveDPP,
 dml_uint_t MetaChunkSize,
 dml_bool_t GPUVMEnable,
 dml_bool_t HostVMEnable,
 dml_uint_t NumberOfActiveSurfaces,
 dml_uint_t NumberOfDPP[],
 dml_uint_t dpte_group_bytes[],
 dml_float_t HostVMInefficiencyFactor,
 dml_uint_t HostVMMinPageSize,
 dml_uint_t HostVMMaxNonCachedPageTableLevels);

static dml_float_t CalculateUrgentLatency(
 dml_float_t UrgentLatencyPixelDataOnly,
 dml_float_t UrgentLatencyPixelMixedWithVMData,
 dml_float_t UrgentLatencyVMDataOnly,
 dml_bool_t DoUrgentLatencyAdjustment,
 dml_float_t UrgentLatencyAdjustmentFabricClockComponent,
 dml_float_t UrgentLatencyAdjustmentFabricClockReference,
 dml_float_t FabricClockSingle);

static dml_bool_t UnboundedRequest(
 enum dml_unbounded_requesting_policy UseUnboundedRequestingFinal,
 dml_uint_t TotalNumberOfActiveDPP,
 dml_bool_t NoChromaOrLinear,
 enum dml_output_encoder_class Output);

static void CalculateSurfaceSizeInMall(
 dml_uint_t NumberOfActiveSurfaces,
 dml_uint_t MALLAllocatedForDCN,
 enum dml_use_mall_for_static_screen_mode UseMALLForStaticScreen[],
 dml_bool_t DCCEnable[],
 dml_bool_t ViewportStationary[],
 dml_uint_t ViewportXStartY[],
 dml_uint_t ViewportYStartY[],
 dml_uint_t ViewportXStartC[],
 dml_uint_t ViewportYStartC[],
 dml_uint_t ViewportWidthY[],
 dml_uint_t ViewportHeightY[],
 dml_uint_t BytesPerPixelY[],
 dml_uint_t ViewportWidthC[],
 dml_uint_t ViewportHeightC[],
 dml_uint_t BytesPerPixelC[],
 dml_uint_t SurfaceWidthY[],
 dml_uint_t SurfaceWidthC[],
 dml_uint_t SurfaceHeightY[],
 dml_uint_t SurfaceHeightC[],
 dml_uint_t Read256BytesBlockWidthY[],
 dml_uint_t Read256BytesBlockWidthC[],
 dml_uint_t Read256BytesBlockHeightY[],
 dml_uint_t Read256BytesBlockHeightC[],
 dml_uint_t ReadBlockWidthY[],
 dml_uint_t ReadBlockWidthC[],
 dml_uint_t ReadBlockHeightY[],
 dml_uint_t ReadBlockHeightC[],

 // Output
 dml_uint_t SurfaceSizeInMALL[],
 dml_bool_t *ExceededMALLSize);

static void CalculateDETBufferSize(
 dml_uint_t DETSizeOverride[],
 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
 dml_bool_t ForceSingleDPP,
 dml_uint_t NumberOfActiveSurfaces,
 dml_bool_t UnboundedRequestEnabled,
 dml_uint_t nomDETInKByte,
 dml_uint_t MaxTotalDETInKByte,
 dml_uint_t ConfigReturnBufferSizeInKByte,
 dml_uint_t MinCompressedBufferSizeInKByte,
 dml_uint_t ConfigReturnBufferSegmentSizeInkByte,
 dml_uint_t CompressedBufferSegmentSizeInkByteFinal,
 enum dml_source_format_class SourcePixelFormat[],
 dml_float_t ReadBandwidthLuma[],
 dml_float_t ReadBandwidthChroma[],
 dml_uint_t RotesY[],
 dml_uint_t RoundedUpMaxSwathSizeBytesC[],
 dml_uint_t DPPPerSurface[],
 // Output
 dml_uint_t DETBufferSizeInKByte[],
 dml_uint_t *CompressedBufferSizeInkByte);

static void CalculateMaxDETAndMinCompressedBufferSize(
 dml_uint_t ConfigReturnBufferSizeInKByte,
 dml_uint_t ConfigReturnBufferSegmentSizeInKByte,
 dml_uint_t ROBBufferSizeInKByte,
 dml_uint_t MaxNumDPP,
 dml_bool_t nomDETInKByteOverrideEnable,
 dml_uint_t nomDETInKByteOverrideValue,

 // Output
 dml_uint_t *MaxTotalDETInKByte,
 dml_uint_t *nomDETInKByte,
 dml_uint_t *MinCompressedBufferSizeInKByte);

static dml_uint_t DSCDelayRequirement(
 dml_bool_t DSCEnabled,
 enum dml_odm_mode ODMMode,
 dml_uint_t DSCInputBitPerComponent,
 dml_float_t OutputBpp,
 dml_uint_t HActive,
 dml_uint_t HTotal,
 dml_uint_t NumberOfDSCSlices,
 enum dml_output_format_class OutputFormat,
 enum dml_output_encoder_class Output,
 dml_float_t PixelClock,
 dml_float_t PixelClockBackEnd);

static dml_bool_t CalculateVActiveBandwithSupport(
 dml_uint_t NumberOfActiveSurfaces,
 dml_float_t ReturnBW,
 dml_bool_t NotUrgentLatencyHiding[],
 dml_float_t ReadBandwidthLuma[],
 dml_float_t ReadBandwidthChroma[],
 dml_float_t cursor_bw[],
 dml_float_t meta_row_bandwidth[],
 dml_float_t dpte_row_bandwidth[],
 dml_uint_t NumberOfDPP[],
 dml_float_t UrgentBurstFactorLuma[],
 dml_float_t UrgentBurstFactorChroma[],
 dml_float_t UrgentBurstFactorCursor[]);

static void CalculatePrefetchBandwithSupport(
 dml_uint_t NumberOfActiveSurfaces,
 dml_float_t ReturnBW,
 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
 dml_bool_t NotUrgentLatencyHiding[],
 dml_float_t ReadBandwidthLuma[],
 dml_float_t ReadBandwidthChroma[],
 dml_float_t PrefetchBandwidthLuma[],
 dml_float_t PrefetchBandwidthChroma[],
 dml_float_t cursor_bw[],
 dml_float_t meta_row_bandwidth[],
 dml_float_t dpte_row_bandwidth[],
 dml_float_t cursor_bw_pre[],
 dml_float_t prefetch_vmrow_bw[],
 dml_uint_t NumberOfDPP[],
 dml_float_t UrgentBurstFactorLuma[],
 dml_float_t UrgentBurstFactorChroma[],
 dml_float_t UrgentBurstFactorCursor[],
 dml_float_t UrgentBurstFactorLumaPre[],
 dml_float_t UrgentBurstFactorChromaPre[],
 dml_float_t UrgentBurstFactorCursorPre[],

 // Output
 dml_float_t *PrefetchBandwidth,
 dml_float_t *PrefetchBandwidthNotIncludingMALLPrefetch,
 dml_float_t *FractionOfUrgentBandwidth,
 dml_bool_t *PrefetchBandwidthSupport);

static dml_float_t CalculateBandwidthAvailableForImmediateFlip(
 dml_uint_t NumberOfActiveSurfaces,
 dml_float_t ReturnBW,
 dml_float_t ReadBandwidthLuma[],
 dml_float_t ReadBandwidthChroma[],
 dml_float_t PrefetchBandwidthLuma[],
 dml_float_t PrefetchBandwidthChroma[],
 dml_float_t cursor_bw[],
 dml_float_t cursor_bw_pre[],
 dml_uint_t NumberOfDPP[],
 dml_float_t UrgentBurstFactorLuma[],
 dml_float_t UrgentBurstFactorChroma[],
 dml_float_t UrgentBurstFactorCursor[],
 dml_float_t UrgentBurstFactorLumaPre[],
 dml_float_t UrgentBurstFactorChromaPre[],
 dml_float_t UrgentBurstFactorCursorPre[]);

static void CalculateImmediateFlipBandwithSupport(
 dml_uint_t NumberOfActiveSurfaces,
 dml_float_t ReturnBW,
 enum dml_use_mall_for_pstate_change_mode UseMALLForPStateChange[],
 enum dml_immediate_flip_requirement ImmediateFlipRequirement[],
 dml_float_t final_flip_bw[],
 dml_float_t ReadBandwidthLuma[],
 dml_float_t ReadBandwidthChroma[],
 dml_float_t PrefetchBandwidthLuma[],
 dml_float_t PrefetchBandwidthChroma[],
 dml_float_t cursor_bw[],
 dml_float_t meta_row_bandwidth[],
 dml_float_t dpte_row_bandwidth[],
 dml_float_t cursor_bw_pre[],
 dml_float_t prefetch_vmrow_bw[],
 dml_uint_t NumberOfDPP[],
 dml_float_t UrgentBurstFactorLuma[],
 dml_float_t UrgentBurstFactorChroma[],
 dml_float_t UrgentBurstFactorCursor[],
 dml_float_t UrgentBurstFactorLumaPre[],
 dml_float_t UrgentBurstFactorChromaPre[],
 dml_float_t UrgentBurstFactorCursorPre[],

 // Output
 dml_float_t *TotalBandwidth,
 dml_float_t *TotalBandwidthNotIncludingMALLPrefetch,
 dml_float_t *FractionOfUrgentBandwidth,
 dml_bool_t *ImmediateFlipBandwidthSupport);

// ---------------------------
//  Declaration Ends
// ---------------------------

static dml_uint_t dscceComputeDelay(
 dml_uint_t bpc,
 dml_float_t BPP,
 dml_uint_t sliceWidth,
 dml_uint_t numSlices,
 enum dml_output_format_class pixelFormat,
 enum dml_output_encoder_class Output)
{
 // valid bpc = source bits per component in the set of {8, 10, 12}
 // valid bpp = increments of 1/16 of a bit
 // min = 6/7/8 in N420/N422/444, respectively
 // max = such that compression is 1:1
 //valid sliceWidth = number of pixels per slice line, must be less than or equal to 5184/numSlices (or 4096/numSlices in 420 mode)
 //valid numSlices = number of slices in the horiziontal direction per DSC engine in the set of {1, 2, 3, 4}
 //valid pixelFormat = pixel/color format in the set of {:N444_RGB, :S422, :N422, :N420}

 // fixed value
 dml_uint_t rcModelSize = 8192;

 // N422/N420 operate at 2 pixels per clock
 dml_uint_t pixelsPerClock, lstall, D, initalXmitDelay, w, s, ix, wx, p, l0, a, ax, L,
  Delay, pixels;

 if (pixelFormat == dml_420)
  pixelsPerClock = 2;
 // #all other modes operate at 1 pixel per clock
 else if (pixelFormat == dml_444)
  pixelsPerClock = 1;
 else if (pixelFormat == dml_n422)
  pixelsPerClock = 2;
 else
  pixelsPerClock = 1;

 //initial transmit delay as per PPS
 initalXmitDelay = (dml_uint_t)(dml_round(rcModelSize / 2.0 / BPP / pixelsPerClock, 1));

 //compute ssm delay
 if (bpc == 8)
  D = 81;
 else if (bpc == 10)
  D = 89;
 else
  D = 113;

 //divide by pixel per cycle to compute slice width as seen by DSC
 w = sliceWidth / pixelsPerClock;

 //422 mode has an additional cycle of delay
 if (pixelFormat == dml_420 || pixelFormat == dml_444 || pixelFormat == dml_n422)
  s = 0;
 else
  s = 1;

 //main calculation for the dscce
 ix = initalXmitDelay + 45;
 wx = (w + 2) / 3;
 p = 3 * wx - w;
 l0 = ix / w;
 a = ix + p * l0;
 ax = (a + 2) / 3 + D + 6 + 1;
 L = (ax + wx - 1) / wx;
 if ((ix % w) == 0 && p != 0)
  lstall = 1;
 else
  lstall = 0;
 Delay = L * wx * (numSlices - 1) + ax + s + lstall + 22;

 //dsc processes 3 pixel containers per cycle and a container can contain 1 or 2 pixels
 pixels = Delay * 3 * pixelsPerClock;

#ifdef __DML_VBA_DEBUG__
 dml_print("DML::%s: bpc: %u\n", __func__, bpc);
 dml_print("DML::%s: BPP: %f\n", __func__, BPP);
 dml_print("DML::%s: sliceWidth: %u\n", __func__, sliceWidth);
 dml_print("DML::%s: numSlices: %u\n", __func__, numSlices);
 dml_print("DML::%s: pixelFormat: %u\n", __func__, pixelFormat);
 dml_print("DML::%s: Output: %u\n", __func__, Output);
 dml_print("DML::%s: pixels: %u\n", __func__, pixels);
#endif
 return pixels;
}

static dml_uint_t dscComputeDelay(enum dml_output_format_class pixelFormat, enum dml_output_encoder_class Output)
{
 dml_uint_t Delay = 0;

 if (pixelFormat == dml_420) {
  // sfr
  Delay = Delay + 2;
  // dsccif
  Delay = Delay + 0;
  // dscc - input deserializer
  Delay = Delay + 3;
  // dscc gets pixels every other cycle
  Delay = Delay + 2;
  // dscc - input cdc fifo
  Delay = Delay + 12;
  // dscc gets pixels every other cycle
  Delay = Delay + 13;
  // dscc - cdc uncertainty
  Delay = Delay + 2;
  // dscc - output cdc fifo
  Delay = Delay + 7;
  // dscc gets pixels every other cycle
  Delay = Delay + 3;
  // dscc - cdc uncertainty
  Delay = Delay + 2;
  // dscc - output serializer
  Delay = Delay + 1;
  // sft
  Delay = Delay + 1;
 } else if (pixelFormat == dml_n422) {
 // sfr
 Delay = Delay + 2;
 // dsccif
 Delay = Delay + 1;
 // dscc - input deserializer
 Delay = Delay + 5;
 // dscc - input cdc fifo
 Delay = Delay + 25;
 // dscc - cdc uncertainty
 Delay = Delay + 2;
 // dscc - output cdc fifo
 Delay = Delay + 10;
 // dscc - cdc uncertainty
 Delay = Delay + 2;
 // dscc - output serializer
 Delay = Delay + 1;
 // sft
 Delay = Delay + 1;
 } else {
 // sfr
 Delay = Delay + 2;
 // dsccif
 Delay = Delay + 0;
 // dscc - input deserializer
 Delay = Delay + 3;
 // dscc - input cdc fifo
 Delay = Delay + 12;
 // dscc - cdc uncertainty
 Delay = Delay + 2;
 // dscc - output cdc fifo
 Delay = Delay + 7;
 // dscc - output serializer
 Delay = Delay + 1;
 // dscc - cdc uncertainty
 Delay = Delay + 2;
 // sft
 Delay = Delay + 1;
 }
#ifdef __DML_VBA_DEBUG__
 dml_print("DML::%s: pixelFormat = %u\n", __func__, pixelFormat);
 dml_print("DML::%s: Delay = %u\n", __func__, Delay);
#endif

 return Delay;
}

static dml_bool_t CalculatePrefetchSchedule(struct display_mode_lib_scratch_st *scratch,
 struct CalculatePrefetchSchedule_params_st *p)
{
 struct CalculatePrefetchSchedule_locals_st *s = &scratch->CalculatePrefetchSchedule_locals;

 s->MyError = false;
 s->DPPCycles = 0;
 s->DISPCLKCycles = 0;
 s->DSTTotalPixelsAfterScaler = 0.0;
 s->LineTime = 0.0;
 s->dst_y_prefetch_equ = 0.0;
 s->prefetch_bw_oto = 0.0;
 s->Tvm_oto = 0.0;
 s->Tr0_oto = 0.0;
 s->Tvm_oto_lines = 0.0;
 s->Tr0_oto_lines = 0.0;
 s->dst_y_prefetch_oto = 0.0;
 s->TimeForFetchingMetaPTE = 0.0;
 s->TimeForFetchingRowInVBlank = 0.0;
 s->LinesToRequestPrefetchPixelData = 0.0;
 s->HostVMDynamicLevelsTrips = 0;
 s->trip_to_mem = 0.0;
 s->Tvm_trips = 0.0;
 s->Tr0_trips = 0.0;
 s->Tvm_trips_rounded = 0.0;
 s->Tr0_trips_rounded = 0.0;
 s->max_Tsw = 0.0;
 s->Lsw_oto = 0.0;
 s->Tpre_rounded = 0.0;
 s->prefetch_bw_equ = 0.0;
 s->Tvm_equ = 0.0;
 s->Tr0_equ = 0.0;
 s->Tdmbf = 0.0;
 s->Tdmec = 0.0;
 s->Tdmsks = 0.0;
 s->prefetch_sw_bytes = 0.0;
 s->prefetch_bw_pr = 0.0;
 s->bytes_pp = 0.0;
 s->dep_bytes = 0.0;
 s->min_Lsw_oto = 0.0;
 s->Tsw_est1 = 0.0;
 s->Tsw_est3 = 0.0;

 if (p->GPUVMEnable == true && p->HostVMEnable == true) {
  s->HostVMDynamicLevelsTrips = p->HostVMMaxNonCachedPageTableLevels;
 } else {
  s->HostVMDynamicLevelsTrips = 0;
 }
#ifdef __DML_VBA_DEBUG__
 dml_print("DML::%s: GPUVMEnable = %u\n", __func__, p->GPUVMEnable);
 dml_print("DML::%s: GPUVMPageTableLevels = %u\n", __func__, p->GPUVMPageTableLevels);
 dml_print("DML::%s: DCCEnable = %u\n", __func__, p->myPipe->DCCEnable);
 dml_print("DML::%s: VStartup = %u\n", __func__, p->VStartup);
 dml_print("DML::%s: MaxVStartup = %u\n", __func__, p->MaxVStartup);
 dml_print("DML::%s: HostVMEnable = %u\n", __func__, p->HostVMEnable);
 dml_print("DML::%s: HostVMInefficiencyFactor= %f\n", __func__, p->HostVMInefficiencyFactor);
 dml_print("DML::%s: myPipe->Dppclk = %f\n", __func__, p->myPipe->Dppclk);
#endif
 CalculateVUpdateAndDynamicMetadataParameters(
 p->MaxInterDCNTileRepeaters,
 p->myPipe->Dppclk,
 p->myPipe->Dispclk,
 p->myPipe->DCFClkDeepSleep,
 p->myPipe->PixelClock,
 p->myPipe->HTotal,
 p->myPipe->VBlank,
 p->DynamicMetadataTransmittedBytes,
 p->DynamicMetadataLinesBeforeActiveRequired,
 p->myPipe->InterlaceEnable,
 p->myPipe->ProgressiveToInterlaceUnitInOPP,
 p->TSetup,

 // Output
 &s->Tdmbf,
 &s->Tdmec,
 &s->Tdmsks,
 p->VUpdateOffsetPix,
 p->VUpdateWidthPix,
 p->VReadyOffsetPix);

 s->LineTime = p->myPipe->HTotal / p->myPipe->PixelClock;
 s->trip_to_mem = p->UrgentLatency;
 s->Tvm_trips = p->UrgentExtraLatency + s->trip_to_mem * (p->GPUVMPageTableLevels * (s->HostVMDynamicLevelsTrips + 1) - 1);

 if (p->DynamicMetadataVMEnabled == true) {
  *p->Tdmdl = p->TWait + s->Tvm_trips + s->trip_to_mem;
 } else {
  *p->Tdmdl = p->TWait + p->UrgentExtraLatency;
 }

#ifdef __DML_VBA_ALLOW_DELTA__
 if (DynamicMetadataEnable == false) {
  *Tdmdl = 0.0;
 }
#endif

 if (p->DynamicMetadataEnable == true) {
  if (p->VStartup * s->LineTime < *p->TSetup + *p->Tdmdl + s->Tdmbf + s->Tdmec + s->Tdmsks) {
   *p->NotEnoughTimeForDynamicMetadata = true;
   dml_print("DML::%s: Not Enough Time for Dynamic Meta!\n", __func__);
   dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
   dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
   dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
   dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
  } else {
   *p->NotEnoughTimeForDynamicMetadata = false;
  }
 } else {
  *p->NotEnoughTimeForDynamicMetadata = false;
 }

 *p->Tdmdl_vm = (p->DynamicMetadataEnable == true && p->DynamicMetadataVMEnabled == true && p->GPUVMEnable == true ? p->TWait + s->Tvm_trips : 0);

 if (p->myPipe->ScalerEnabled)
  s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCL);
 else
  s->DPPCycles = (dml_uint_t)(p->DPPCLKDelaySubtotalPlusCNVCFormater + p->DPPCLKDelaySCLLBOnly);

 s->DPPCycles = (dml_uint_t)(s->DPPCycles + p->myPipe->NumberOfCursors * p->DPPCLKDelayCNVCCursor);

 s->DISPCLKCycles = (dml_uint_t)p->DISPCLKDelaySubtotal;

 if (p->myPipe->Dppclk == 0.0 || p->myPipe->Dispclk == 0.0)
  return true;

 *p->DSTXAfterScaler = (dml_uint_t) dml_round(s->DPPCycles * p->myPipe->PixelClock / p->myPipe->Dppclk + s->DISPCLKCycles * p->myPipe->PixelClock / p->myPipe->Dispclk + p->DSCDelay, 1.0);
 *p->DSTXAfterScaler = (dml_uint_t) dml_round(*p->DSTXAfterScaler + (p->myPipe->ODMMode != dml_odm_mode_bypass ? 18 : 0) + (p->myPipe->DPPPerSurface - 1) * p->DPP_RECOUT_WIDTH +
      ((p->myPipe->ODMMode == dml_odm_mode_split_1to2 || p->myPipe->ODMMode == dml_odm_mode_mso_1to2) ? (dml_float_t)p->myPipe->HActive / 2.0 : 0) +
      ((p->myPipe->ODMMode == dml_odm_mode_mso_1to4) ? (dml_float_t)p->myPipe->HActive * 3.0 / 4.0 : 0), 1.0);

#ifdef __DML_VBA_DEBUG__
 dml_print("DML::%s: DPPCycles = %u\n", __func__, s->DPPCycles);
 dml_print("DML::%s: PixelClock = %f\n", __func__, p->myPipe->PixelClock);
 dml_print("DML::%s: Dppclk = %f\n", __func__, p->myPipe->Dppclk);
 dml_print("DML::%s: DISPCLKCycles = %u\n", __func__, s->DISPCLKCycles);
 dml_print("DML::%s: DISPCLK = %f\n", __func__, p->myPipe->Dispclk);
 dml_print("DML::%s: DSCDelay = %u\n", __func__, p->DSCDelay);
 dml_print("DML::%s: ODMMode = %u\n", __func__, p->myPipe->ODMMode);
 dml_print("DML::%s: DPP_RECOUT_WIDTH = %u\n", __func__, p->DPP_RECOUT_WIDTH);
 dml_print("DML::%s: DSTXAfterScaler = %u\n", __func__, *p->DSTXAfterScaler);
#endif

 if (p->OutputFormat == dml_420 || (p->myPipe->InterlaceEnable && p->myPipe->ProgressiveToInterlaceUnitInOPP))
  *p->DSTYAfterScaler = 1;
 else
  *p->DSTYAfterScaler = 0;

 s->DSTTotalPixelsAfterScaler = *p->DSTYAfterScaler * p->myPipe->HTotal + *p->DSTXAfterScaler;
 *p->DSTYAfterScaler = (dml_uint_t)(dml_floor(s->DSTTotalPixelsAfterScaler / p->myPipe->HTotal, 1));
 *p->DSTXAfterScaler = (dml_uint_t)(s->DSTTotalPixelsAfterScaler - ((dml_float_t) (*p->DSTYAfterScaler * p->myPipe->HTotal)));
#ifdef __DML_VBA_DEBUG__
 dml_print("DML::%s: DSTXAfterScaler = %u (final)\n", __func__,  *p->DSTXAfterScaler);
 dml_print("DML::%s: DSTYAfterScaler = %u (final)\n", __func__, *p->DSTYAfterScaler);
#endif

 s->MyError = false;

 s->Tr0_trips = s->trip_to_mem * (s->HostVMDynamicLevelsTrips + 1);

 if (p->GPUVMEnable == true) {
  s->Tvm_trips_rounded = dml_ceil(4.0 * s->Tvm_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
  s->Tr0_trips_rounded = dml_ceil(4.0 * s->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
  if (p->GPUVMPageTableLevels >= 3) {
   *p->Tno_bw = p->UrgentExtraLatency + s->trip_to_mem * (dml_float_t) ((p->GPUVMPageTableLevels - 2) * (s->HostVMDynamicLevelsTrips + 1) - 1);
 } else if (p->GPUVMPageTableLevels == 1 && p->myPipe->DCCEnable != true) {
   s->Tr0_trips_rounded = dml_ceil(4.0 * p->UrgentExtraLatency / s->LineTime, 1.0) / 4.0 * s->LineTime;
   *p->Tno_bw = p->UrgentExtraLatency;
  } else {
   *p->Tno_bw = 0;
  }
 } else if (p->myPipe->DCCEnable == true) {
  s->Tvm_trips_rounded = s->LineTime / 4.0;
  s->Tr0_trips_rounded = dml_ceil(4.0 * s->Tr0_trips / s->LineTime, 1.0) / 4.0 * s->LineTime;
  *p->Tno_bw = 0;
 } else {
  s->Tvm_trips_rounded = s->LineTime / 4.0;
  s->Tr0_trips_rounded = s->LineTime / 2.0;
  *p->Tno_bw = 0;
 }
 s->Tvm_trips_rounded = dml_max(s->Tvm_trips_rounded, s->LineTime / 4.0);
 s->Tr0_trips_rounded = dml_max(s->Tr0_trips_rounded, s->LineTime / 4.0);

 if (p->myPipe->SourcePixelFormat == dml_420_8 || p->myPipe->SourcePixelFormat == dml_420_10 || p->myPipe->SourcePixelFormat == dml_420_12) {
  s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC / 4;
 } else {
  s->bytes_pp = p->myPipe->BytePerPixelY + p->myPipe->BytePerPixelC;
 }

 s->prefetch_bw_pr = s->bytes_pp * p->myPipe->PixelClock / (dml_float_t)p->myPipe->DPPPerSurface;
 if (p->myPipe->VRatio < 1.0)
  s->prefetch_bw_pr = p->myPipe->VRatio * s->prefetch_bw_pr;

 s->max_Tsw = (dml_max(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) * s->LineTime);

 s->prefetch_sw_bytes = p->PrefetchSourceLinesY * p->swath_width_luma_ub * p->myPipe->BytePerPixelY + p->PrefetchSourceLinesC * p->swath_width_chroma_ub * p->myPipe->BytePerPixelC;
 s->prefetch_bw_oto = dml_max(s->prefetch_bw_pr, s->prefetch_sw_bytes / s->max_Tsw);

 s->min_Lsw_oto = dml_max(p->PrefetchSourceLinesY, p->PrefetchSourceLinesC) / __DML_MAX_VRATIO_PRE_OTO__;
 s->min_Lsw_oto = dml_max(s->min_Lsw_oto, 1.0);
 s->Lsw_oto = dml_ceil(4.0 * dml_max(s->prefetch_sw_bytes / s->prefetch_bw_oto / s->LineTime, s->min_Lsw_oto), 1.0) / 4.0;

 if (p->GPUVMEnable == true) {
  s->Tvm_oto = dml_max3(
   s->Tvm_trips,
   *p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_oto,
   s->LineTime / 4.0);
 } else
  s->Tvm_oto = s->LineTime / 4.0;

 if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) {
  s->Tr0_oto = dml_max4(
   s->Tr0_trips,
   (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto,
   (s->LineTime - s->Tvm_oto)/2.0,
   s->LineTime / 4.0);
#ifdef __DML_VBA_DEBUG__
 dml_print("DML::%s: Tr0_oto max0 = %f\n", __func__, (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_oto);
 dml_print("DML::%s: Tr0_oto max1 = %f\n", __func__, s->Tr0_trips);
 dml_print("DML::%s: Tr0_oto max2 = %f\n", __func__, s->LineTime - s->Tvm_oto);
 dml_print("DML::%s: Tr0_oto max3 = %f\n", __func__, s->LineTime / 4);
#endif
 } else
  s->Tr0_oto = (s->LineTime - s->Tvm_oto) / 2.0;

 s->Tvm_oto_lines = dml_ceil(4.0 * s->Tvm_oto / s->LineTime, 1) / 4.0;
 s->Tr0_oto_lines = dml_ceil(4.0 * s->Tr0_oto / s->LineTime, 1) / 4.0;
 s->dst_y_prefetch_oto = s->Tvm_oto_lines + 2 * s->Tr0_oto_lines + s->Lsw_oto;

 s->dst_y_prefetch_equ = p->VStartup - (*p->TSetup + dml_max(p->TWait + p->TCalc, *p->Tdmdl)) / s->LineTime - (*p->DSTYAfterScaler + (dml_float_t) *p->DSTXAfterScaler / (dml_float_t)p->myPipe->HTotal);
 s->dst_y_prefetch_equ = dml_min(s->dst_y_prefetch_equ, 63.75); // limit to the reg limit of U6.2 for DST_Y_PREFETCH

#ifdef __DML_VBA_DEBUG__
 dml_print("DML::%s: HTotal = %u\n", __func__, p->myPipe->HTotal);
 dml_print("DML::%s: min_Lsw_oto = %f\n", __func__, s->min_Lsw_oto);
 dml_print("DML::%s: *Tno_bw = %f\n", __func__, *p->Tno_bw);
 dml_print("DML::%s: UrgentExtraLatency = %f\n", __func__, p->UrgentExtraLatency);
 dml_print("DML::%s: trip_to_mem = %f\n", __func__, s->trip_to_mem);
 dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
 dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
 dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
 dml_print("DML::%s: BytePerPixelC = %u\n", __func__, p->myPipe->BytePerPixelC);
 dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
 dml_print("DML::%s: swath_width_chroma_ub = %u\n", __func__, p->swath_width_chroma_ub);
 dml_print("DML::%s: prefetch_sw_bytes = %f\n", __func__, s->prefetch_sw_bytes);
 dml_print("DML::%s: bytes_pp = %f\n", __func__, s->bytes_pp);
 dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame);
 dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte);
 dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
 dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
 dml_print("DML::%s: Tvm_trips = %f\n", __func__, s->Tvm_trips);
 dml_print("DML::%s: Tr0_trips = %f\n", __func__, s->Tr0_trips);
 dml_print("DML::%s: prefetch_bw_oto = %f\n", __func__, s->prefetch_bw_oto);
 dml_print("DML::%s: Tr0_oto = %f\n", __func__, s->Tr0_oto);
 dml_print("DML::%s: Tvm_oto = %f\n", __func__, s->Tvm_oto);
 dml_print("DML::%s: Tvm_oto_lines = %f\n", __func__, s->Tvm_oto_lines);
 dml_print("DML::%s: Tr0_oto_lines = %f\n", __func__, s->Tr0_oto_lines);
 dml_print("DML::%s: Lsw_oto = %f\n", __func__, s->Lsw_oto);
 dml_print("DML::%s: dst_y_prefetch_oto = %f\n", __func__, s->dst_y_prefetch_oto);
 dml_print("DML::%s: dst_y_prefetch_equ = %f\n", __func__, s->dst_y_prefetch_equ);
#endif

 s->dst_y_prefetch_equ = dml_floor(4.0 * (s->dst_y_prefetch_equ + 0.125), 1) / 4.0;
 s->Tpre_rounded = s->dst_y_prefetch_equ * s->LineTime;

 dml_print("DML::%s: dst_y_prefetch_equ: %f (after round)\n", __func__, s->dst_y_prefetch_equ);

 dml_print("DML::%s: LineTime: %f\n", __func__, s->LineTime);
 dml_print("DML::%s: VStartup: %u\n", __func__, p->VStartup);
 dml_print("DML::%s: Tvstartup: %fus - time between vstartup and first pixel of active\n", __func__, p->VStartup * s->LineTime);
 dml_print("DML::%s: TSetup: %fus - time from vstartup to vready\n", __func__, *p->TSetup);
 dml_print("DML::%s: TCalc: %fus - time for calculations in dchub starting at vready\n", __func__, p->TCalc);
 dml_print("DML::%s: TWait: %fus - time for fabric to become ready max(pstate exit,cstate enter/exit, urgent latency) after TCalc\n", __func__, p->TWait);
 dml_print("DML::%s: Tdmbf: %fus - time for dmd transfer from dchub to dio output buffer\n", __func__, s->Tdmbf);
 dml_print("DML::%s: Tdmec: %fus - time dio takes to transfer dmd\n", __func__, s->Tdmec);
 dml_print("DML::%s: Tdmsks: %fus - time before active dmd must complete transmission at dio\n", __func__, s->Tdmsks);
 dml_print("DML::%s: Tdmdl_vm: %fus - time for vm stages of dmd \n", __func__, *p->Tdmdl_vm);
 dml_print("DML::%s: Tdmdl: %fus - time for fabric to become ready and fetch dmd \n", __func__, *p->Tdmdl);
 dml_print("DML::%s: DSTXAfterScaler: %u pixels - number of pixel clocks pipeline and buffer delay after scaler \n", __func__, *p->DSTXAfterScaler);
 dml_print("DML::%s: DSTYAfterScaler: %u lines - number of lines of pipeline and buffer delay after scaler \n", __func__, *p->DSTYAfterScaler);

 s->dep_bytes = dml_max(p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor, p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor);

 if (s->prefetch_sw_bytes < s->dep_bytes) {
  s->prefetch_sw_bytes = 2 * s->dep_bytes;
 }

 *p->DestinationLinesToRequestVMInVBlank = 0;
 *p->DestinationLinesToRequestRowInVBlank = 0;
 *p->VRatioPrefetchY = 0;
 *p->VRatioPrefetchC = 0;
 *p->RequiredPrefetchPixDataBWLuma = 0;
 if (s->dst_y_prefetch_equ > 1) {

  if (s->Tpre_rounded - *p->Tno_bw > 0) {
  s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte
     + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor
     + s->prefetch_sw_bytes)
     / (s->Tpre_rounded - *p->Tno_bw);
   s->Tsw_est1 = s->prefetch_sw_bytes / s->PrefetchBandwidth1;
  } else
   s->PrefetchBandwidth1 = 0;

  if (p->VStartup == p->MaxVStartup && (s->Tsw_est1 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw > 0) {
   s->PrefetchBandwidth1 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + 2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) /
        (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.75 * s->LineTime - *p->Tno_bw);
  }

  if (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded > 0)
   s->PrefetchBandwidth2 = (p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) /
         (s->Tpre_rounded - *p->Tno_bw - 2 * s->Tr0_trips_rounded);
  else
   s->PrefetchBandwidth2 = 0;

  if (s->Tpre_rounded - s->Tvm_trips_rounded > 0) {
   s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor + s->prefetch_sw_bytes) /
         (s->Tpre_rounded - s->Tvm_trips_rounded);
   s->Tsw_est3 = s->prefetch_sw_bytes / s->PrefetchBandwidth3;
 }
  else
   s->PrefetchBandwidth3 = 0;


  if (p->VStartup == p->MaxVStartup && (s->Tsw_est3 / s->LineTime < s->min_Lsw_oto) && s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded > 0) {
   s->PrefetchBandwidth3 = (2 * p->MetaRowByte + 2 * p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (s->Tpre_rounded - s->min_Lsw_oto * s->LineTime - 0.5 * s->LineTime - s->Tvm_trips_rounded);
  }

  if (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded > 0)
   s->PrefetchBandwidth4 = s->prefetch_sw_bytes / (s->Tpre_rounded - s->Tvm_trips_rounded - 2 * s->Tr0_trips_rounded);
  else
   s->PrefetchBandwidth4 = 0;

#ifdef __DML_VBA_DEBUG__
  dml_print("DML::%s: Tpre_rounded: %f\n", __func__, s->Tpre_rounded);
  dml_print("DML::%s: Tno_bw: %f\n", __func__, *p->Tno_bw);
  dml_print("DML::%s: Tvm_trips_rounded: %f\n", __func__, s->Tvm_trips_rounded);
  dml_print("DML::%s: Tsw_est1: %f\n", __func__, s->Tsw_est1);
  dml_print("DML::%s: Tsw_est3: %f\n", __func__, s->Tsw_est3);
  dml_print("DML::%s: PrefetchBandwidth1: %f\n", __func__, s->PrefetchBandwidth1);
  dml_print("DML::%s: PrefetchBandwidth2: %f\n", __func__, s->PrefetchBandwidth2);
  dml_print("DML::%s: PrefetchBandwidth3: %f\n", __func__, s->PrefetchBandwidth3);
  dml_print("DML::%s: PrefetchBandwidth4: %f\n", __func__, s->PrefetchBandwidth4);
#endif
  {
   dml_bool_t Case1OK;
   dml_bool_t Case2OK;
   dml_bool_t Case3OK;

   if (s->PrefetchBandwidth1 > 0) {
    if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth1 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth1 >= s->Tr0_trips_rounded) {
     Case1OK = true;
    } else {
     Case1OK = false;
    }
   } else {
    Case1OK = false;
   }

   if (s->PrefetchBandwidth2 > 0) {
    if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth2 >= s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth2 < s->Tr0_trips_rounded) {
     Case2OK = true;
    } else {
     Case2OK = false;
    }
   } else {
    Case2OK = false;
   }

   if (s->PrefetchBandwidth3 > 0) {
    if (*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->PrefetchBandwidth3 < s->Tvm_trips_rounded && (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->PrefetchBandwidth3 >= s->Tr0_trips_rounded) {
     Case3OK = true;
    } else {
     Case3OK = false;
    }
   } else {
    Case3OK = false;
   }

   if (Case1OK) {
    s->prefetch_bw_equ = s->PrefetchBandwidth1;
   } else if (Case2OK) {
    s->prefetch_bw_equ = s->PrefetchBandwidth2;
   } else if (Case3OK) {
    s->prefetch_bw_equ = s->PrefetchBandwidth3;
   } else {
    s->prefetch_bw_equ = s->PrefetchBandwidth4;
   }

#ifdef __DML_VBA_DEBUG__
   dml_print("DML::%s: Case1OK: %u\n", __func__, Case1OK);
   dml_print("DML::%s: Case2OK: %u\n", __func__, Case2OK);
   dml_print("DML::%s: Case3OK: %u\n", __func__, Case3OK);
   dml_print("DML::%s: prefetch_bw_equ: %f\n", __func__, s->prefetch_bw_equ);
#endif

   if (s->prefetch_bw_equ > 0) {
    if (p->GPUVMEnable == true) {
     s->Tvm_equ = dml_max3(*p->Tno_bw + p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / s->prefetch_bw_equ, s->Tvm_trips, s->LineTime / 4);
    } else {
     s->Tvm_equ = s->LineTime / 4;
    }

    if ((p->GPUVMEnable == true || p->myPipe->DCCEnable == true)) {
     s->Tr0_equ = dml_max4((p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / s->prefetch_bw_equ, s->Tr0_trips, (s->LineTime - s->Tvm_equ) / 2, s->LineTime / 4);
    } else {
     s->Tr0_equ = (s->LineTime - s->Tvm_equ) / 2;
    }
   } else {
    s->Tvm_equ = 0;
    s->Tr0_equ = 0;
    dml_print("DML::%s: prefetch_bw_equ equals 0!\n", __func__);
   }
  }


  if (s->dst_y_prefetch_oto < s->dst_y_prefetch_equ) {
   *p->DestinationLinesForPrefetch = s->dst_y_prefetch_oto;
   s->TimeForFetchingMetaPTE = s->Tvm_oto;
   s->TimeForFetchingRowInVBlank = s->Tr0_oto;

   *p->DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0;
   *p->DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
  } else {
   *p->DestinationLinesForPrefetch = s->dst_y_prefetch_equ;
   s->TimeForFetchingMetaPTE = s->Tvm_equ;
   s->TimeForFetchingRowInVBlank = s->Tr0_equ;

   if (p->VStartup == p->MaxVStartup && p->EnhancedPrefetchScheduleAccelerationFinal != 0) {
    *p->DestinationLinesToRequestVMInVBlank = dml_floor(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0;
    *p->DestinationLinesToRequestRowInVBlank = dml_floor(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
   } else {
    *p->DestinationLinesToRequestVMInVBlank = dml_ceil(4.0 * s->TimeForFetchingMetaPTE / s->LineTime, 1.0) / 4.0;
    *p->DestinationLinesToRequestRowInVBlank = dml_ceil(4.0 * s->TimeForFetchingRowInVBlank / s->LineTime, 1.0) / 4.0;
   }
  }

  s->LinesToRequestPrefetchPixelData = *p->DestinationLinesForPrefetch - *p->DestinationLinesToRequestVMInVBlank - 2 * *p->DestinationLinesToRequestRowInVBlank;

#ifdef __DML_VBA_DEBUG__
  dml_print("DML::%s: DestinationLinesForPrefetch = %f\n", __func__, *p->DestinationLinesForPrefetch);
  dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
  dml_print("DML::%s: TimeForFetchingRowInVBlank = %f\n", __func__, s->TimeForFetchingRowInVBlank);
  dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
  dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
  dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
  dml_print("DML::%s: LinesToRequestPrefetchPixelData = %f\n", __func__, s->LinesToRequestPrefetchPixelData);
#endif

  if (s->LinesToRequestPrefetchPixelData >= 1 && s->prefetch_bw_equ > 0) {
   *p->VRatioPrefetchY = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData;
   *p->VRatioPrefetchY = dml_max(*p->VRatioPrefetchY, 1.0);
#ifdef __DML_VBA_DEBUG__
   dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
   dml_print("DML::%s: SwathHeightY = %u\n", __func__, p->SwathHeightY);
   dml_print("DML::%s: VInitPreFillY = %u\n", __func__, p->VInitPreFillY);
#endif
   if ((p->SwathHeightY > 4) && (p->VInitPreFillY > 3)) {
    if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillY - 3.0) / 2.0) {
     *p->VRatioPrefetchY = dml_max(*p->VRatioPrefetchY,
          (dml_float_t)p->MaxNumSwathY * p->SwathHeightY / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillY - 3.0) / 2.0));
   } else {
    s->MyError = true;
    dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VinitPreFillY=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillY);
    *p->VRatioPrefetchY = 0;
   }
#ifdef __DML_VBA_DEBUG__
   dml_print("DML::%s: VRatioPrefetchY = %f\n", __func__, *p->VRatioPrefetchY);
   dml_print("DML::%s: PrefetchSourceLinesY = %f\n", __func__, p->PrefetchSourceLinesY);
   dml_print("DML::%s: MaxNumSwathY = %u\n", __func__, p->MaxNumSwathY);
#endif
   }

   *p->VRatioPrefetchC = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData;
   *p->VRatioPrefetchC = dml_max(*p->VRatioPrefetchC, 1.0);

#ifdef __DML_VBA_DEBUG__
   dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
   dml_print("DML::%s: SwathHeightC = %u\n", __func__, p->SwathHeightC);
   dml_print("DML::%s: VInitPreFillC = %u\n", __func__, p->VInitPreFillC);
#endif
   if ((p->SwathHeightC > 4) && (p->VInitPreFillC > 3)) {
    if (s->LinesToRequestPrefetchPixelData > (p->VInitPreFillC - 3.0) / 2.0) {
     *p->VRatioPrefetchC = dml_max(*p->VRatioPrefetchC, (dml_float_t)p->MaxNumSwathC * p->SwathHeightC / (s->LinesToRequestPrefetchPixelData - (p->VInitPreFillC - 3.0) / 2.0));
    } else {
     s->MyError = true;
     dml_print("DML::%s: MyErr set. LinesToRequestPrefetchPixelData=%f VInitPreFillC=%u\n", __func__, s->LinesToRequestPrefetchPixelData, p->VInitPreFillC);
     *p->VRatioPrefetchC = 0;
    }
#ifdef __DML_VBA_DEBUG__
    dml_print("DML::%s: VRatioPrefetchC = %f\n", __func__, *p->VRatioPrefetchC);
    dml_print("DML::%s: PrefetchSourceLinesC = %f\n", __func__, p->PrefetchSourceLinesC);
    dml_print("DML::%s: MaxNumSwathC = %u\n", __func__, p->MaxNumSwathC);
#endif
   }

   *p->RequiredPrefetchPixDataBWLuma = (dml_float_t)p->PrefetchSourceLinesY / s->LinesToRequestPrefetchPixelData
    * p->myPipe->BytePerPixelY
    * p->swath_width_luma_ub / s->LineTime;

#ifdef __DML_VBA_DEBUG__
   dml_print("DML::%s: BytePerPixelY = %u\n", __func__, p->myPipe->BytePerPixelY);
   dml_print("DML::%s: swath_width_luma_ub = %u\n", __func__, p->swath_width_luma_ub);
   dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
   dml_print("DML::%s: RequiredPrefetchPixDataBWLuma = %f\n", __func__, *p->RequiredPrefetchPixDataBWLuma);
#endif
   *p->RequiredPrefetchPixDataBWChroma = (dml_float_t)p->PrefetchSourceLinesC / s->LinesToRequestPrefetchPixelData
    *p->myPipe->BytePerPixelC
    *p->swath_width_chroma_ub / s->LineTime;
  } else {
   s->MyError = true;
   dml_print("DML:%s: MyErr set. LinesToRequestPrefetchPixelData: %f, should be > 0\n", __func__, s->LinesToRequestPrefetchPixelData);
   *p->VRatioPrefetchY = 0;
   *p->VRatioPrefetchC = 0;
   *p->RequiredPrefetchPixDataBWLuma = 0;
   *p->RequiredPrefetchPixDataBWChroma = 0;
  }

  dml_print("DML: Tpre: %fus - sum of time to request meta pte, 2 x data pte + meta data, swaths\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime + 2.0 * s->TimeForFetchingRowInVBlank + s->TimeForFetchingMetaPTE);
  dml_print("DML: Tvm: %fus - time to fetch page tables for meta surface\n", s->TimeForFetchingMetaPTE);
  dml_print("DML: Tr0: %fus - time to fetch first row of data pagetables and first row of meta data (done in parallel)\n", s->TimeForFetchingRowInVBlank);
  dml_print("DML: Tsw: %fus = time to fetch enough pixel data and cursor data to feed the scalers init position and detile\n", (dml_float_t)s->LinesToRequestPrefetchPixelData * s->LineTime);
  dml_print("DML: To: %fus - time for propagation from scaler to optc\n", (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime);
  dml_print("DML: Tvstartup - TSetup - Tcalc - Twait - Tpre - To > 0\n");
  dml_print("DML: Tslack(pre): %fus - time left over in schedule\n", p->VStartup * s->LineTime - s->TimeForFetchingMetaPTE - 2 * s->TimeForFetchingRowInVBlank - (*p->DSTYAfterScaler + ((dml_float_t) (*p->DSTXAfterScaler) / (dml_float_t)p->myPipe->HTotal)) * s->LineTime - p->TWait - p->TCalc - *p->TSetup);
  dml_print("DML: row_bytes = dpte_row_bytes (per_pipe) = PixelPTEBytesPerRow = : %u\n", p->PixelPTEBytesPerRow);

 } else {
  s->MyError = true;
  dml_print("DML::%s: MyErr set, dst_y_prefetch_equ = %f (should be > 1)\n", __func__, s->dst_y_prefetch_equ);
  s->TimeForFetchingMetaPTE = 0;
  s->TimeForFetchingRowInVBlank = 0;
  *p->DestinationLinesToRequestVMInVBlank = 0;
  *p->DestinationLinesToRequestRowInVBlank = 0;
  s->LinesToRequestPrefetchPixelData = 0;
  *p->VRatioPrefetchY = 0;
  *p->VRatioPrefetchC = 0;
  *p->RequiredPrefetchPixDataBWLuma = 0;
  *p->RequiredPrefetchPixDataBWChroma = 0;
 }

 {
  dml_float_t prefetch_vm_bw;
  dml_float_t prefetch_row_bw;

  if (p->PDEAndMetaPTEBytesFrame == 0) {
   prefetch_vm_bw = 0;
  } else if (*p->DestinationLinesToRequestVMInVBlank > 0) {
#ifdef __DML_VBA_DEBUG__
   dml_print("DML::%s: PDEAndMetaPTEBytesFrame = %u\n", __func__, p->PDEAndMetaPTEBytesFrame);
   dml_print("DML::%s: HostVMInefficiencyFactor = %f\n", __func__, p->HostVMInefficiencyFactor);
   dml_print("DML::%s: DestinationLinesToRequestVMInVBlank = %f\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
   dml_print("DML::%s: LineTime = %f\n", __func__, s->LineTime);
#endif
  prefetch_vm_bw = p->PDEAndMetaPTEBytesFrame * p->HostVMInefficiencyFactor / (*p->DestinationLinesToRequestVMInVBlank * s->LineTime);
#ifdef __DML_VBA_DEBUG__
   dml_print("DML::%s: prefetch_vm_bw = %f\n", __func__, prefetch_vm_bw);
#endif
  } else {
   prefetch_vm_bw = 0;
   s->MyError = true;
   dml_print("DML::%s: MyErr set. DestinationLinesToRequestVMInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestVMInVBlank);
  }

  if (p->MetaRowByte + p->PixelPTEBytesPerRow == 0) {
   prefetch_row_bw = 0;
  } else if (*p->DestinationLinesToRequestRowInVBlank > 0) {
   prefetch_row_bw = (p->MetaRowByte + p->PixelPTEBytesPerRow * p->HostVMInefficiencyFactor) / (*p->DestinationLinesToRequestRowInVBlank * s->LineTime);

#ifdef __DML_VBA_DEBUG__
  dml_print("DML::%s: MetaRowByte = %u\n", __func__, p->MetaRowByte);
  dml_print("DML::%s: PixelPTEBytesPerRow = %u\n", __func__, p->PixelPTEBytesPerRow);
  dml_print("DML::%s: DestinationLinesToRequestRowInVBlank = %f\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
  dml_print("DML::%s: prefetch_row_bw = %f\n", __func__, prefetch_row_bw);
#endif
  } else {
   prefetch_row_bw = 0;
   s->MyError = true;
   dml_print("DML::%s: MyErr set. DestinationLinesToRequestRowInVBlank=%f (should be > 0)\n", __func__, *p->DestinationLinesToRequestRowInVBlank);
  }

  *p->prefetch_vmrow_bw = dml_max(prefetch_vm_bw, prefetch_row_bw);
 }

 if (s->MyError) {
  s->TimeForFetchingMetaPTE = 0;
  s->TimeForFetchingRowInVBlank = 0;
  *p->DestinationLinesToRequestVMInVBlank = 0;
  *p->DestinationLinesToRequestRowInVBlank = 0;
  *p->DestinationLinesForPrefetch = 0;
  s->LinesToRequestPrefetchPixelData = 0;
  *p->VRatioPrefetchY = 0;
  *p->VRatioPrefetchC = 0;
  *p->RequiredPrefetchPixDataBWLuma = 0;
  *p->RequiredPrefetchPixDataBWChroma = 0;
 }

 return s->MyError;
// CalculatePrefetchSchedule

static void CalculateBytePerPixelAndBlockSizes(
 enum dml_source_format_class SourcePixelFormat,
 enum dml_swizzle_mode SurfaceTiling,

 // Output
 dml_uint_t *BytePerPixelY,
 dml_uint_t *BytePerPixelC,
 dml_float_t *BytePerPixelDETY,
 dml_float_t *BytePerPixelDETC,
 dml_uint_t *BlockHeight256BytesY,
 dml_uint_t *BlockHeight256BytesC,
 dml_uint_t *BlockWidth256BytesY,
 dml_uint_t *BlockWidth256BytesC,
 dml_uint_t *MacroTileHeightY,
 dml_uint_t *MacroTileHeightC,
 dml_uint_t *MacroTileWidthY,
 dml_uint_t *MacroTileWidthC)
{
 if (SourcePixelFormat == dml_444_64) {
  *BytePerPixelDETY = 8;
  *BytePerPixelDETC = 0;
  *BytePerPixelY = 8;
  *BytePerPixelC = 0;
 } else if (SourcePixelFormat == dml_444_32 || SourcePixelFormat == dml_rgbe) {
  *BytePerPixelDETY = 4;
  *BytePerPixelDETC = 0;
  *BytePerPixelY = 4;
  *BytePerPixelC = 0;
 } else if (SourcePixelFormat == dml_444_16 || SourcePixelFormat == dml_mono_16) {
  *BytePerPixelDETY = 2;
  *BytePerPixelDETC = 0;
  *BytePerPixelY = 2;
  *BytePerPixelC = 0;
 } else if (SourcePixelFormat == dml_444_8 || SourcePixelFormat == dml_mono_8) {
  *BytePerPixelDETY = 1;
  *BytePerPixelDETC = 0;
  *BytePerPixelY = 1;
  *BytePerPixelC = 0;
 } else if (SourcePixelFormat == dml_rgbe_alpha) {
  *BytePerPixelDETY = 4;
  *BytePerPixelDETC = 1;
  *BytePerPixelY = 4;
  *BytePerPixelC = 1;
 } else if (SourcePixelFormat == dml_420_8) {
  *BytePerPixelDETY = 1;
  *BytePerPixelDETC = 2;
  *BytePerPixelY = 1;
  *BytePerPixelC = 2;
 } else if (SourcePixelFormat == dml_420_12) {
  *BytePerPixelDETY = 2;
  *BytePerPixelDETC = 4;
  *BytePerPixelY = 2;
  *BytePerPixelC = 4;
 } else {
  *BytePerPixelDETY = (dml_float_t) (4.0 / 3);
  *BytePerPixelDETC = (dml_float_t) (8.0 / 3);
  *BytePerPixelY = 2;
  *BytePerPixelC = 4;
 }
#ifdef __DML_VBA_DEBUG__
 dml_print("DML::%s: SourcePixelFormat = %u\n", __func__, SourcePixelFormat);
 dml_print("DML::%s: BytePerPixelDETY = %f\n", __func__, *BytePerPixelDETY);
 dml_print("DML::%s: BytePerPixelDETC = %f\n", __func__, *BytePerPixelDETC);
 dml_print("DML::%s: BytePerPixelY = %u\n", __func__, *BytePerPixelY);
 dml_print("DML::%s: BytePerPixelC = %u\n", __func__, *BytePerPixelC);
#endif
 if ((SourcePixelFormat == dml_444_64 || SourcePixelFormat == dml_444_32
  || SourcePixelFormat == dml_444_16
  || SourcePixelFormat == dml_444_8
  || SourcePixelFormat == dml_mono_16
  || SourcePixelFormat == dml_mono_8
  || SourcePixelFormat == dml_rgbe)) {
  if (SurfaceTiling == dml_sw_linear) {
   *BlockHeight256BytesY = 1;
  } else if (SourcePixelFormat == dml_444_64) {
   *BlockHeight256BytesY = 4;
  } else if (SourcePixelFormat == dml_444_8) {
   *BlockHeight256BytesY = 16;
  } else {
   *BlockHeight256BytesY = 8;
  }
  *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
  *BlockHeight256BytesC = 0;
  *BlockWidth256BytesC = 0;
 } else {
  if (SurfaceTiling == dml_sw_linear) {
   *BlockHeight256BytesY = 1;
   *BlockHeight256BytesC = 1;
  } else if (SourcePixelFormat == dml_rgbe_alpha) {
   *BlockHeight256BytesY = 8;
   *BlockHeight256BytesC = 16;
  } else if (SourcePixelFormat == dml_420_8) {
   *BlockHeight256BytesY = 16;
   *BlockHeight256BytesC = 8;
  } else {
   *BlockHeight256BytesY = 8;
   *BlockHeight256BytesC = 8;
  }
  *BlockWidth256BytesY = 256U / *BytePerPixelY / *BlockHeight256BytesY;
  *BlockWidth256BytesC = 256U / *BytePerPixelC / *BlockHeight256BytesC;
 }
#ifdef __DML_VBA_DEBUG__
 dml_print("DML::%s: BlockWidth256BytesY = %u\n", __func__, *BlockWidth256BytesY);
 dml_print("DML::%s: BlockHeight256BytesY = %u\n", __func__, *BlockHeight256BytesY);
 dml_print("DML::%s: BlockWidth256BytesC = %u\n", __func__, *BlockWidth256BytesC);
 dml_print("DML::%s: BlockHeight256BytesC = %u\n", __func__, *BlockHeight256BytesC);
#endif

 if (SurfaceTiling == dml_sw_linear) {
  *MacroTileHeightY = *BlockHeight256BytesY;
  *MacroTileWidthY = 256 / *BytePerPixelY / *MacroTileHeightY;
  *MacroTileHeightC = *BlockHeight256BytesC;
  if (*MacroTileHeightC == 0) {
   *MacroTileWidthC = 0;
  } else {
   *MacroTileWidthC = 256 / *BytePerPixelC / *MacroTileHeightC;
 }
 } else if (SurfaceTiling == dml_sw_64kb_d || SurfaceTiling == dml_sw_64kb_d_t || SurfaceTiling == dml_sw_64kb_d_x || SurfaceTiling == dml_sw_64kb_r_x) {
  *MacroTileHeightY = 16 * *BlockHeight256BytesY;
  *MacroTileWidthY = 65536 / *BytePerPixelY / *MacroTileHeightY;
  *MacroTileHeightC = 16 * *BlockHeight256BytesC;
  if (*MacroTileHeightC == 0) {
   *MacroTileWidthC = 0;
  } else {
   *MacroTileWidthC = 65536 / *BytePerPixelC / *MacroTileHeightC;
  }
 } else {
  *MacroTileHeightY = 32 * *BlockHeight256BytesY;
  *MacroTileWidthY = 65536 * 4 / *BytePerPixelY / *MacroTileHeightY;
--> --------------------

--> maximum size reached

--> --------------------

Messung V0.5
C=95 H=95 G=94

¤ Dauer der Verarbeitung: 0.7 Sekunden  (vorverarbeitet)  ¤

*© Formatika GbR, Deutschland






Wurzel

Suchen

Beweissystem der NASA

Beweissystem Isabelle

NIST Cobol Testsuite

Cephes Mathematical Library

Wiener Entwicklungsmethode

Haftungshinweis

Die Informationen auf dieser Webseite wurden nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit, noch Qualität der bereit gestellten Informationen zugesichert.

Bemerkung:

Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.






                                                                                                                                                                                                                                                                                                                                                                                                     


Neuigkeiten

     Aktuelles
     Motto des Tages

Software

     Produkte
     Quellcodebibliothek

Aktivitäten

     Artikel über Sicherheit
     Anleitung zur Aktivierung von SSL

Muße

     Gedichte
     Musik
     Bilder

Jenseits des Üblichen ....

Besucherstatistik

Besucherstatistik

Monitoring

Montastic status badge