#define HL_PENDING_RESET_PER_SEC 10 #define HL_PENDING_RESET_MAX_TRIALS 60 /* 10 minutes */ #define HL_PENDING_RESET_LONG_SEC 60 /* * In device fini, wait 10 minutes for user processes to be terminated after we kill them. * This is needed to prevent situation of clearing resources while user processes are still alive.
*/ #define HL_WAIT_PROCESS_KILL_ON_DEVICE_FINI 600
/* * Reset Flags * * - HL_DRV_RESET_HARD * If set do hard reset to all engines. If not set reset just * compute/DMA engines. * * - HL_DRV_RESET_FROM_RESET_THR * Set if the caller is the hard-reset thread * * - HL_DRV_RESET_HEARTBEAT * Set if reset is due to heartbeat * * - HL_DRV_RESET_TDR * Set if reset is due to TDR * * - HL_DRV_RESET_DEV_RELEASE * Set if reset is due to device release * * - HL_DRV_RESET_BYPASS_REQ_TO_FW * F/W will perform the reset. No need to ask it to reset the device. This is relevant * only when running with secured f/w * * - HL_DRV_RESET_FW_FATAL_ERR * Set if reset is due to a fatal error from FW * * - HL_DRV_RESET_DELAY * Set if a delay should be added before the reset * * - HL_DRV_RESET_FROM_WD_THR * Set if the caller is the device release watchdog thread
*/
/** * struct iterate_module_ctx - HW module iterator * @fn: function to apply to each HW module instance * @data: optional internal data to the function iterator * @rc: return code for optional use of iterator/iterator-caller
*/ struct iterate_module_ctx { /* * callback for the HW module iterator * @hdev: pointer to the habanalabs device structure * @block: block (ASIC specific definition can be dcore/hdcore) * @inst: HW module instance within the block * @offset: current HW module instance offset from the 1-st HW module instance * in the 1-st block * @ctx: the iterator context.
*/ void (*fn)(struct hl_device *hdev, int block, int inst, u32 offset, struct iterate_module_ctx *ctx); void *data; int rc;
};
/** * struct hl_gen_wait_properties - properties for generating a wait CB * @data: command buffer * @q_idx: queue id is used to extract fence register address * @size: offset in command buffer * @sob_base: SOB base to use in this wait CB * @sob_val: SOB value to wait for * @mon_id: monitor to use in this wait CB * @sob_mask: each bit represents a SOB offset from sob_base to be used
*/ struct hl_gen_wait_properties { void *data;
u32 q_idx;
u32 size;
u16 sob_base;
u16 sob_val;
u16 mon_id;
u8 sob_mask;
};
/** * struct pgt_info - MMU hop page info. * @node: hash linked-list node for the pgts on host (shadow pgts for device resident MMU and * actual pgts for host resident MMU). * @phys_addr: physical address of the pgt. * @virt_addr: host virtual address of the pgt (see above device/host resident). * @shadow_addr: shadow hop in the host for device resident MMU. * @ctx: pointer to the owner ctx. * @num_of_ptes: indicates how many ptes are used in the pgt. used only for dynamically * allocated HOPs (all HOPs but HOP0) * * The MMU page tables hierarchy can be placed either on the device's DRAM (in which case shadow * pgts will be stored on host memory) or on host memory (in which case no shadow is required). * * When a new level (hop) is needed during mapping this structure will be used to describe * the newly allocated hop as well as to track number of PTEs in it. * During unmapping, if no valid PTEs remained in the page of a newly allocated hop, it is * freed with its pgt_info structure.
*/ struct pgt_info { struct hlist_node node;
u64 phys_addr;
u64 virt_addr;
u64 shadow_addr; struct hl_ctx *ctx; int num_of_ptes;
};
/** * enum hl_pci_match_mode - pci match mode per region * @PCI_ADDRESS_MATCH_MODE: address match mode * @PCI_BAR_MATCH_MODE: bar match mode
*/ enum hl_pci_match_mode {
PCI_ADDRESS_MATCH_MODE,
PCI_BAR_MATCH_MODE
};
/** * enum hl_fw_component - F/W components to read version through registers. * @FW_COMP_BOOT_FIT: boot fit. * @FW_COMP_PREBOOT: preboot. * @FW_COMP_LINUX: linux.
*/ enum hl_fw_component {
FW_COMP_BOOT_FIT,
FW_COMP_PREBOOT,
FW_COMP_LINUX,
};
/** * enum hl_fw_types - F/W types present in the system * @FW_TYPE_NONE: no FW component indication * @FW_TYPE_LINUX: Linux image for device CPU * @FW_TYPE_BOOT_CPU: Boot image for device CPU * @FW_TYPE_PREBOOT_CPU: Indicates pre-loaded CPUs are present in the system * (preboot, ppboot etc...) * @FW_TYPE_ALL_TYPES: Mask for all types
*/ enum hl_fw_types {
FW_TYPE_NONE = 0x0,
FW_TYPE_LINUX = 0x1,
FW_TYPE_BOOT_CPU = 0x2,
FW_TYPE_PREBOOT_CPU = 0x4,
FW_TYPE_ALL_TYPES =
(FW_TYPE_LINUX | FW_TYPE_BOOT_CPU | FW_TYPE_PREBOOT_CPU)
};
/** * enum hl_queue_type - Supported QUEUE types. * @QUEUE_TYPE_NA: queue is not available. * @QUEUE_TYPE_EXT: external queue which is a DMA channel that may access the * host. * @QUEUE_TYPE_INT: internal queue that performs DMA inside the device's * memories and/or operates the compute engines. * @QUEUE_TYPE_CPU: S/W queue for communication with the device's CPU. * @QUEUE_TYPE_HW: queue of DMA and compute engines jobs, for which completion * notifications are sent by H/W.
*/ enum hl_queue_type {
QUEUE_TYPE_NA,
QUEUE_TYPE_EXT,
QUEUE_TYPE_INT,
QUEUE_TYPE_CPU,
QUEUE_TYPE_HW
};
/* * struct hl_inbound_pci_region - inbound region descriptor * @mode: pci match mode for this region * @addr: region target address * @size: region size in bytes * @offset_in_bar: offset within bar (address match mode) * @bar: bar id
*/ struct hl_inbound_pci_region { enum hl_pci_match_mode mode;
u64 addr;
u64 size;
u64 offset_in_bar;
u8 bar;
};
/* * struct hl_outbound_pci_region - outbound region descriptor * @addr: region target address * @size: region size in bytes
*/ struct hl_outbound_pci_region {
u64 addr;
u64 size;
};
/* * enum queue_cb_alloc_flags - Indicates queue support for CBs that * allocated by Kernel or by User * @CB_ALLOC_KERNEL: support only CBs that allocated by Kernel * @CB_ALLOC_USER: support only CBs that allocated by User
*/ enum queue_cb_alloc_flags {
CB_ALLOC_KERNEL = 0x1,
CB_ALLOC_USER = 0x2
};
/* * struct hl_hw_sob - H/W SOB info. * @hdev: habanalabs device structure. * @kref: refcount of this SOB. The SOB will reset once the refcount is zero. * @sob_id: id of this SOB. * @sob_addr: the sob offset from the base address. * @q_idx: the H/W queue that uses this SOB. * @need_reset: reset indication set when switching to the other sob.
*/ struct hl_hw_sob { struct hl_device *hdev; struct kref kref;
u32 sob_id;
u32 sob_addr;
u32 q_idx; bool need_reset;
};
/** * struct hw_queue_properties - queue information. * @type: queue type. * @cb_alloc_flags: bitmap which indicates if the hw queue supports CB * that allocated by the Kernel driver and therefore, * a CB handle can be provided for jobs on this queue. * Otherwise, a CB address must be provided. * @collective_mode: collective mode of current queue * @q_dram_bd_address: PQ dram address, used when PQ need to reside in DRAM. * @driver_only: true if only the driver is allowed to send a job to this queue, * false otherwise. * @binned: True if the queue is binned out and should not be used * @supports_sync_stream: True if queue supports sync stream * @dram_bd: True if the bd should be copied to dram, needed for PQ which has been allocated on dram
*/ struct hw_queue_properties { enum hl_queue_type type; enum queue_cb_alloc_flags cb_alloc_flags; enum hl_collective_mode collective_mode;
u64 q_dram_bd_address;
u8 driver_only;
u8 binned;
u8 supports_sync_stream;
u8 dram_bd;
};
/** * enum vm_type - virtual memory mapping request information. * @VM_TYPE_USERPTR: mapping of user memory to device virtual address. * @VM_TYPE_PHYS_PACK: mapping of DRAM memory to device virtual address.
*/ enum vm_type {
VM_TYPE_USERPTR = 0x1,
VM_TYPE_PHYS_PACK = 0x2
};
/** * enum mmu_op_flags - mmu operation relevant information. * @MMU_OP_USERPTR: operation on user memory (host resident). * @MMU_OP_PHYS_PACK: operation on DRAM (device resident). * @MMU_OP_CLEAR_MEMCACHE: operation has to clear memcache. * @MMU_OP_SKIP_LOW_CACHE_INV: operation is allowed to skip parts of cache invalidation.
*/ enum mmu_op_flags {
MMU_OP_USERPTR = 0x1,
MMU_OP_PHYS_PACK = 0x2,
MMU_OP_CLEAR_MEMCACHE = 0x4,
MMU_OP_SKIP_LOW_CACHE_INV = 0x8,
};
/** * enum hl_device_hw_state - H/W device state. use this to understand whether * to do reset before hw_init or not * @HL_DEVICE_HW_STATE_CLEAN: H/W state is clean. i.e. after hard reset * @HL_DEVICE_HW_STATE_DIRTY: H/W state is dirty. i.e. we started to execute * hw_init
*/ enum hl_device_hw_state {
HL_DEVICE_HW_STATE_CLEAN = 0,
HL_DEVICE_HW_STATE_DIRTY
};
#define HL_MMU_VA_ALIGNMENT_NOT_NEEDED 0
/** * struct hl_mmu_properties - ASIC specific MMU address translation properties. * @start_addr: virtual start address of the memory region. * @end_addr: virtual end address of the memory region. * @hop_shifts: array holds HOPs shifts. * @hop_masks: array holds HOPs masks. * @last_mask: mask to get the bit indicating this is the last hop. * @pgt_size: size for page tables. * @supported_pages_mask: bitmask for supported page size (relevant only for MMUs * supporting multiple page size). * @page_size: default page size used to allocate memory. * @num_hops: The amount of hops supported by the translation table. * @hop_table_size: HOP table size. * @hop0_tables_total_size: total size for all HOP0 tables. * @host_resident: Should the MMU page table reside in host memory or in the * device DRAM.
*/ struct hl_mmu_properties {
u64 start_addr;
u64 end_addr;
u64 hop_shifts[MMU_HOP_MAX];
u64 hop_masks[MMU_HOP_MAX];
u64 last_mask;
u64 pgt_size;
u64 supported_pages_mask;
u32 page_size;
u32 num_hops;
u32 hop_table_size;
u32 hop0_tables_total_size;
u8 host_resident;
};
/** * struct hl_hints_range - hint addresses reserved va range. * @start_addr: start address of the va range. * @end_addr: end address of the va range.
*/ struct hl_hints_range {
u64 start_addr;
u64 end_addr;
};
/** * struct asic_fixed_properties - ASIC specific immutable properties. * @hw_queues_props: H/W queues properties. * @special_blocks: points to an array containing special blocks info. * @skip_special_blocks_cfg: special blocks skip configs. * @cpucp_info: received various information from CPU-CP regarding the H/W, e.g. * available sensors. * @uboot_ver: F/W U-boot version. * @preboot_ver: F/W Preboot version. * @dmmu: DRAM MMU address translation properties. * @pmmu: PCI (host) MMU address translation properties. * @pmmu_huge: PCI (host) MMU address translation properties for memory * allocated with huge pages. * @hints_dram_reserved_va_range: dram hint addresses reserved range. * @hints_host_reserved_va_range: host hint addresses reserved range. * @hints_host_hpage_reserved_va_range: host huge page hint addresses reserved range. * @sram_base_address: SRAM physical start address. * @sram_end_address: SRAM physical end address. * @sram_user_base_address - SRAM physical start address for user access. * @dram_base_address: DRAM physical start address. * @dram_end_address: DRAM physical end address. * @dram_user_base_address: DRAM physical start address for user access. * @dram_size: DRAM total size. * @dram_pci_bar_size: size of PCI bar towards DRAM. * @max_power_default: max power of the device after reset. * @dc_power_default: power consumed by the device in mode idle. * @dram_size_for_default_page_mapping: DRAM size needed to map to avoid page * fault. * @pcie_dbi_base_address: Base address of the PCIE_DBI block. * @pcie_aux_dbi_reg_addr: Address of the PCIE_AUX DBI register. * @mmu_pgt_addr: base physical address in DRAM of MMU page tables. * @mmu_dram_default_page_addr: DRAM default page physical address. * @tpc_enabled_mask: which TPCs are enabled. * @tpc_binning_mask: which TPCs are binned. 0 means usable and 1 means binned. * @dram_enabled_mask: which DRAMs are enabled. * @dram_binning_mask: which DRAMs are binned. 0 means usable, 1 means binned. * @dram_hints_align_mask: dram va hint addresses alignment mask which is used * for hints validity check. * @cfg_base_address: config space base address. * @mmu_cache_mng_addr: address of the MMU cache. * @mmu_cache_mng_size: size of the MMU cache. * @device_dma_offset_for_host_access: the offset to add to host DMA addresses * to enable the device to access them. * @host_base_address: host physical start address for host DMA from device * @host_end_address: host physical end address for host DMA from device * @max_freq_value: current max clk frequency. * @engine_core_interrupt_reg_addr: interrupt register address for engine core to use * in order to raise events toward FW. * @clk_pll_index: clock PLL index that specify which PLL determines the clock * we display to the user * @mmu_pgt_size: MMU page tables total size. * @mmu_pte_size: PTE size in MMU page tables. * @dram_page_size: The DRAM physical page size. * @cfg_size: configuration space size on SRAM. * @sram_size: total size of SRAM. * @max_asid: maximum number of open contexts (ASIDs). * @num_of_events: number of possible internal H/W IRQs. * @psoc_pci_pll_nr: PCI PLL NR value. * @psoc_pci_pll_nf: PCI PLL NF value. * @psoc_pci_pll_od: PCI PLL OD value. * @psoc_pci_pll_div_factor: PCI PLL DIV FACTOR 1 value. * @psoc_timestamp_frequency: frequency of the psoc timestamp clock. * @high_pll: high PLL frequency used by the device. * @cb_pool_cb_cnt: number of CBs in the CB pool. * @cb_pool_cb_size: size of each CB in the CB pool. * @decoder_enabled_mask: which decoders are enabled. * @decoder_binning_mask: which decoders are binned, 0 means usable and 1 means binned. * @rotator_enabled_mask: which rotators are enabled. * @edma_enabled_mask: which EDMAs are enabled. * @edma_binning_mask: which EDMAs are binned, 0 means usable and 1 means * binned (at most one binned DMA). * @max_pending_cs: maximum of concurrent pending command submissions * @max_queues: maximum amount of queues in the system * @fw_preboot_cpu_boot_dev_sts0: bitmap representation of preboot cpu * capabilities reported by FW, bit description * can be found in CPU_BOOT_DEV_STS0 * @fw_preboot_cpu_boot_dev_sts1: bitmap representation of preboot cpu * capabilities reported by FW, bit description * can be found in CPU_BOOT_DEV_STS1 * @fw_bootfit_cpu_boot_dev_sts0: bitmap representation of boot cpu security * status reported by FW, bit description can be * found in CPU_BOOT_DEV_STS0 * @fw_bootfit_cpu_boot_dev_sts1: bitmap representation of boot cpu security * status reported by FW, bit description can be * found in CPU_BOOT_DEV_STS1 * @fw_app_cpu_boot_dev_sts0: bitmap representation of application security * status reported by FW, bit description can be * found in CPU_BOOT_DEV_STS0 * @fw_app_cpu_boot_dev_sts1: bitmap representation of application security * status reported by FW, bit description can be * found in CPU_BOOT_DEV_STS1 * @max_dec: maximum number of decoders * @hmmu_hif_enabled_mask: mask of HMMUs/HIFs that are not isolated (enabled) * 1- enabled, 0- isolated. * @faulty_dram_cluster_map: mask of faulty DRAM cluster. * 1- faulty cluster, 0- good cluster. * @xbar_edge_enabled_mask: mask of XBAR_EDGEs that are not isolated (enabled) * 1- enabled, 0- isolated. * @device_mem_alloc_default_page_size: may be different than dram_page_size only for ASICs for * which the property supports_user_set_page_size is true * (i.e. the DRAM supports multiple page sizes), otherwise * it will shall be equal to dram_page_size. * @num_engine_cores: number of engine cpu cores. * @max_num_of_engines: maximum number of all engines in the ASIC. * @num_of_special_blocks: special_blocks array size. * @glbl_err_max_cause_num: global err max cause number. * @hbw_flush_reg: register to read to generate HBW flush. value of 0 means HBW flush is * not supported. * @reserved_fw_mem_size: size of dram memory reserved for FW. * @fw_event_queue_size: queue size for events from CPU-CP. * A value of 0 means using the default HL_EQ_SIZE_IN_BYTES value. * @collective_first_sob: first sync object available for collective use * @collective_first_mon: first monitor available for collective use * @sync_stream_first_sob: first sync object available for sync stream use * @sync_stream_first_mon: first monitor available for sync stream use * @first_available_user_sob: first sob available for the user * @first_available_user_mon: first monitor available for the user * @first_available_user_interrupt: first available interrupt reserved for the user * @first_available_cq: first available CQ for the user. * @user_interrupt_count: number of user interrupts. * @user_dec_intr_count: number of decoder interrupts exposed to user. * @tpc_interrupt_id: interrupt id for TPC to use in order to raise events towards the host. * @eq_interrupt_id: interrupt id for EQ, uses to synchronize EQ interrupts in hard-reset. * @cache_line_size: device cache line size. * @server_type: Server type that the ASIC is currently installed in. * The value is according to enum hl_server_type in uapi file. * @completion_queues_count: number of completion queues. * @completion_mode: 0 - job based completion, 1 - cs based completion * @mme_master_slave_mode: 0 - Each MME works independently, 1 - MME works * in Master/Slave mode * @fw_security_enabled: true if security measures are enabled in firmware, * false otherwise * @fw_cpu_boot_dev_sts0_valid: status bits are valid and can be fetched from * BOOT_DEV_STS0 * @fw_cpu_boot_dev_sts1_valid: status bits are valid and can be fetched from * BOOT_DEV_STS1 * @dram_supports_virtual_memory: is there an MMU towards the DRAM * @hard_reset_done_by_fw: true if firmware is handling hard reset flow * @num_functional_hbms: number of functional HBMs in each DCORE. * @hints_range_reservation: device support hint addresses range reservation. * @iatu_done_by_fw: true if iATU configuration is being done by FW. * @dynamic_fw_load: is dynamic FW load is supported. * @gic_interrupts_enable: true if FW is not blocking GIC controller, * false otherwise. * @use_get_power_for_reset_history: To support backward compatibility for Goya * and Gaudi * @supports_compute_reset: is a reset which is not a hard-reset supported by this asic. * @allow_inference_soft_reset: true if the ASIC supports soft reset that is * initiated by user or TDR. This is only true * in inference ASICs, as there is no real-world * use-case of doing soft-reset in training (due * to the fact that training runs on multiple * devices) * @configurable_stop_on_err: is stop-on-error option configurable via debugfs. * @set_max_power_on_device_init: true if need to set max power in F/W on device init. * @supports_user_set_page_size: true if user can set the allocation page size. * @dma_mask: the dma mask to be set for this device. * @supports_advanced_cpucp_rc: true if new cpucp opcodes are supported. * @supports_engine_modes: true if changing engines/engine_cores modes is supported. * @support_dynamic_resereved_fw_size: true if we support dynamic reserved size for fw.
*/ struct asic_fixed_properties { struct hw_queue_properties *hw_queues_props; struct hl_special_block_info *special_blocks; struct hl_skip_blocks_cfg skip_special_blocks_cfg; struct cpucp_info cpucp_info; char uboot_ver[VERSION_MAX_LEN]; char preboot_ver[VERSION_MAX_LEN]; struct hl_mmu_properties dmmu; struct hl_mmu_properties pmmu; struct hl_mmu_properties pmmu_huge; struct hl_hints_range hints_dram_reserved_va_range; struct hl_hints_range hints_host_reserved_va_range; struct hl_hints_range hints_host_hpage_reserved_va_range;
u64 sram_base_address;
u64 sram_end_address;
u64 sram_user_base_address;
u64 dram_base_address;
u64 dram_end_address;
u64 dram_user_base_address;
u64 dram_size;
u64 dram_pci_bar_size;
u64 max_power_default;
u64 dc_power_default;
u64 dram_size_for_default_page_mapping;
u64 pcie_dbi_base_address;
u64 pcie_aux_dbi_reg_addr;
u64 mmu_pgt_addr;
u64 mmu_dram_default_page_addr;
u64 tpc_enabled_mask;
u64 tpc_binning_mask;
u64 dram_enabled_mask;
u64 dram_binning_mask;
u64 dram_hints_align_mask;
u64 cfg_base_address;
u64 mmu_cache_mng_addr;
u64 mmu_cache_mng_size;
u64 device_dma_offset_for_host_access;
u64 host_base_address;
u64 host_end_address;
u64 max_freq_value;
u64 engine_core_interrupt_reg_addr;
u32 clk_pll_index;
u32 mmu_pgt_size;
u32 mmu_pte_size;
u32 dram_page_size;
u32 cfg_size;
u32 sram_size;
u32 max_asid;
u32 num_of_events;
u32 psoc_pci_pll_nr;
u32 psoc_pci_pll_nf;
u32 psoc_pci_pll_od;
u32 psoc_pci_pll_div_factor;
u32 psoc_timestamp_frequency;
u32 high_pll;
u32 cb_pool_cb_cnt;
u32 cb_pool_cb_size;
u32 decoder_enabled_mask;
u32 decoder_binning_mask;
u32 rotator_enabled_mask;
u32 edma_enabled_mask;
u32 edma_binning_mask;
u32 max_pending_cs;
u32 max_queues;
u32 fw_preboot_cpu_boot_dev_sts0;
u32 fw_preboot_cpu_boot_dev_sts1;
u32 fw_bootfit_cpu_boot_dev_sts0;
u32 fw_bootfit_cpu_boot_dev_sts1;
u32 fw_app_cpu_boot_dev_sts0;
u32 fw_app_cpu_boot_dev_sts1;
u32 max_dec;
u32 hmmu_hif_enabled_mask;
u32 faulty_dram_cluster_map;
u32 xbar_edge_enabled_mask;
u32 device_mem_alloc_default_page_size;
u32 num_engine_cores;
u32 max_num_of_engines;
u32 num_of_special_blocks;
u32 glbl_err_max_cause_num;
u32 hbw_flush_reg;
u32 reserved_fw_mem_size;
u32 fw_event_queue_size;
u16 collective_first_sob;
u16 collective_first_mon;
u16 sync_stream_first_sob;
u16 sync_stream_first_mon;
u16 first_available_user_sob[HL_MAX_DCORES];
u16 first_available_user_mon[HL_MAX_DCORES];
u16 first_available_user_interrupt;
u16 first_available_cq[HL_MAX_DCORES];
u16 user_interrupt_count;
u16 user_dec_intr_count;
u16 tpc_interrupt_id;
u16 eq_interrupt_id;
u16 cache_line_size;
u16 server_type;
u8 completion_queues_count;
u8 completion_mode;
u8 mme_master_slave_mode;
u8 fw_security_enabled;
u8 fw_cpu_boot_dev_sts0_valid;
u8 fw_cpu_boot_dev_sts1_valid;
u8 dram_supports_virtual_memory;
u8 hard_reset_done_by_fw;
u8 num_functional_hbms;
u8 hints_range_reservation;
u8 iatu_done_by_fw;
u8 dynamic_fw_load;
u8 gic_interrupts_enable;
u8 use_get_power_for_reset_history;
u8 supports_compute_reset;
u8 allow_inference_soft_reset;
u8 configurable_stop_on_err;
u8 set_max_power_on_device_init;
u8 supports_user_set_page_size;
u8 dma_mask;
u8 supports_advanced_cpucp_rc;
u8 supports_engine_modes;
u8 support_dynamic_resereved_fw_size;
};
/** * struct hl_fence - software synchronization primitive * @completion: fence is implemented using completion * @refcount: refcount for this fence * @cs_sequence: sequence of the corresponding command submission * @stream_master_qid_map: streams masters QID bitmap to represent all streams * masters QIDs that multi cs is waiting on * @error: mark this fence with error * @timestamp: timestamp upon completion * @mcs_handling_done: indicates that corresponding command submission has * finished msc handling, this does not mean it was part * of the mcs
*/ struct hl_fence { struct completion completion; struct kref refcount;
u64 cs_sequence;
u32 stream_master_qid_map; int error;
ktime_t timestamp;
u8 mcs_handling_done;
};
/** * struct hl_cs_compl - command submission completion object. * @base_fence: hl fence object. * @lock: spinlock to protect fence. * @hdev: habanalabs device structure. * @hw_sob: the H/W SOB used in this signal/wait CS. * @encaps_sig_hdl: encaps signals handler. * @cs_seq: command submission sequence number. * @type: type of the CS - signal/wait. * @sob_val: the SOB value that is used in this signal/wait CS. * @sob_group: the SOB group that is used in this collective wait CS. * @encaps_signals: indication whether it's a completion object of cs with * encaps signals or not.
*/ struct hl_cs_compl { struct hl_fence base_fence;
spinlock_t lock; struct hl_device *hdev; struct hl_hw_sob *hw_sob; struct hl_cs_encaps_sig_handle *encaps_sig_hdl;
u64 cs_seq; enum hl_cs_type type;
u16 sob_val;
u16 sob_group; bool encaps_signals;
};
/* * Command Buffers
*/
/** * struct hl_ts_buff - describes a timestamp buffer. * @kernel_buff_address: Holds the internal buffer's kernel virtual address. * @user_buff_address: Holds the user buffer's kernel virtual address. * @kernel_buff_size: Holds the internal kernel buffer size.
*/ struct hl_ts_buff { void *kernel_buff_address; void *user_buff_address;
u32 kernel_buff_size;
};
struct hl_mmap_mem_buf;
/** * struct hl_mem_mgr - describes unified memory manager for mappable memory chunks. * @dev: back pointer to the owning device * @lock: protects handles * @handles: an idr holding all active handles to the memory buffers in the system.
*/ struct hl_mem_mgr { struct device *dev;
spinlock_t lock; struct idr handles;
};
/** * struct hl_mem_mgr_fini_stats - describes statistics returned during memory manager teardown. * @n_busy_cb: the amount of CB handles that could not be removed * @n_busy_ts: the amount of TS handles that could not be removed * @n_busy_other: the amount of any other type of handles that could not be removed
*/ struct hl_mem_mgr_fini_stats {
u32 n_busy_cb;
u32 n_busy_ts;
u32 n_busy_other;
};
/** * struct hl_mmap_mem_buf_behavior - describes unified memory manager buffer behavior * @topic: string identifier used for logging * @mem_id: memory type identifier, embedded in the handle and used to identify * the memory type by handle. * @alloc: callback executed on buffer allocation, shall allocate the memory, * set it under buffer private, and set mappable size. * @mmap: callback executed on mmap, must map the buffer to vma * @release: callback executed on release, must free the resources used by the buffer
*/ struct hl_mmap_mem_buf_behavior { constchar *topic;
u64 mem_id;
/** * struct hl_mmap_mem_buf - describes a single unified memory buffer * @behavior: buffer behavior * @mmg: back pointer to the unified memory manager * @refcount: reference counter for buffer users * @private: pointer to buffer behavior private data * @mmap: atomic boolean indicating whether or not the buffer is mapped right now * @real_mapped_size: the actual size of buffer mapped, after part of it may be released, * may change at runtime. * @mappable_size: the original mappable size of the buffer, does not change after * the allocation. * @handle: the buffer id in mmg handles store
*/ struct hl_mmap_mem_buf { struct hl_mmap_mem_buf_behavior *behavior; struct hl_mem_mgr *mmg; struct kref refcount; void *private;
atomic_t mmap;
u64 real_mapped_size;
u64 mappable_size;
u64 handle;
};
/** * struct hl_cb - describes a Command Buffer. * @hdev: pointer to device this CB belongs to. * @ctx: pointer to the CB owner's context. * @buf: back pointer to the parent mappable memory buffer * @debugfs_list: node in debugfs list of command buffers. * @pool_list: node in pool list of command buffers. * @kernel_address: Holds the CB's kernel virtual address. * @virtual_addr: Holds the CB's virtual address. * @bus_address: Holds the CB's DMA address. * @size: holds the CB's size. * @roundup_size: holds the cb size after roundup to page size. * @cs_cnt: holds number of CS that this CB participates in. * @is_handle_destroyed: atomic boolean indicating whether or not the CB handle was destroyed. * @is_pool: true if CB was acquired from the pool, false otherwise. * @is_internal: internally allocated * @is_mmu_mapped: true if the CB is mapped to the device's MMU.
*/ struct hl_cb { struct hl_device *hdev; struct hl_ctx *ctx; struct hl_mmap_mem_buf *buf; struct list_head debugfs_list; struct list_head pool_list; void *kernel_address;
u64 virtual_addr;
dma_addr_t bus_address;
u32 size;
u32 roundup_size;
atomic_t cs_cnt;
atomic_t is_handle_destroyed;
u8 is_pool;
u8 is_internal;
u8 is_mmu_mapped;
};
/* * QUEUES
*/
struct hl_cs_job;
/* Queue length of external and HW queues */ #define HL_QUEUE_LENGTH 4096 #define HL_QUEUE_SIZE_IN_BYTES (HL_QUEUE_LENGTH * HL_BD_SIZE)
#if (HL_MAX_JOBS_PER_CS > HL_QUEUE_LENGTH) #error"HL_QUEUE_LENGTH must be greater than HL_MAX_JOBS_PER_CS" #endif
/* HL_CQ_LENGTH is in units of struct hl_cq_entry */ #define HL_CQ_LENGTH HL_QUEUE_LENGTH #define HL_CQ_SIZE_IN_BYTES (HL_CQ_LENGTH * HL_CQ_ENTRY_SIZE)
/* Must be power of 2 */ #define HL_EQ_LENGTH 64 #define HL_EQ_SIZE_IN_BYTES (HL_EQ_LENGTH * HL_EQ_ENTRY_SIZE)
/** * struct hl_sync_stream_properties - * describes a H/W queue sync stream properties * @hw_sob: array of the used H/W SOBs by this H/W queue. * @next_sob_val: the next value to use for the currently used SOB. * @base_sob_id: the base SOB id of the SOBs used by this queue. * @base_mon_id: the base MON id of the MONs used by this queue. * @collective_mstr_mon_id: the MON ids of the MONs used by this master queue * in order to sync with all slave queues. * @collective_slave_mon_id: the MON id used by this slave queue in order to * sync with its master queue. * @collective_sob_id: current SOB id used by this collective slave queue * to signal its collective master queue upon completion. * @curr_sob_offset: the id offset to the currently used SOB from the * HL_RSVD_SOBS that are being used by this queue.
*/ struct hl_sync_stream_properties { struct hl_hw_sob hw_sob[HL_RSVD_SOBS];
u16 next_sob_val;
u16 base_sob_id;
u16 base_mon_id;
u16 collective_mstr_mon_id[HL_COLLECTIVE_RSVD_MSTR_MONS];
u16 collective_slave_mon_id;
u16 collective_sob_id;
u8 curr_sob_offset;
};
/** * struct hl_encaps_signals_mgr - describes sync stream encapsulated signals * handlers manager * @lock: protects handles. * @handles: an idr to hold all encapsulated signals handles.
*/ struct hl_encaps_signals_mgr {
spinlock_t lock; struct idr handles;
};
/** * struct hl_hw_queue - describes a H/W transport queue. * @shadow_queue: pointer to a shadow queue that holds pointers to jobs. * @sync_stream_prop: sync stream queue properties * @queue_type: type of queue. * @collective_mode: collective mode of current queue * @kernel_address: holds the queue's kernel virtual address. * @bus_address: holds the queue's DMA address. * @pq_dram_address: hold the dram address when the PQ is allocated, used when dram_bd is true in * queue properites. * @pi: holds the queue's pi value. * @ci: holds the queue's ci value, AS CALCULATED BY THE DRIVER (not real ci). * @hw_queue_id: the id of the H/W queue. * @cq_id: the id for the corresponding CQ for this H/W queue. * @msi_vec: the IRQ number of the H/W queue. * @int_queue_len: length of internal queue (number of entries). * @valid: is the queue valid (we have array of 32 queues, not all of them * exist). * @supports_sync_stream: True if queue supports sync stream * @dram_bd: True if the bd should be copied to dram, needed for PQ which has been allocated on dram
*/ struct hl_hw_queue { struct hl_cs_job **shadow_queue; struct hl_sync_stream_properties sync_stream_prop; enum hl_queue_type queue_type; enum hl_collective_mode collective_mode; void *kernel_address;
dma_addr_t bus_address;
u64 pq_dram_address;
u32 pi;
atomic_t ci;
u32 hw_queue_id;
u32 cq_id;
u32 msi_vec;
u16 int_queue_len;
u8 valid;
u8 supports_sync_stream;
u8 dram_bd;
};
/** * struct hl_cq - describes a completion queue * @hdev: pointer to the device structure * @kernel_address: holds the queue's kernel virtual address * @bus_address: holds the queue's DMA address * @cq_idx: completion queue index in array * @hw_queue_id: the id of the matching H/W queue * @ci: ci inside the queue * @pi: pi inside the queue * @free_slots_cnt: counter of free slots in queue
*/ struct hl_cq { struct hl_device *hdev; void *kernel_address;
dma_addr_t bus_address;
u32 cq_idx;
u32 hw_queue_id;
u32 ci;
u32 pi;
atomic_t free_slots_cnt;
};
/** * struct hl_ts_free_jobs - holds user interrupt ts free nodes related data * @free_nodes_pool: pool of nodes to be used for free timestamp jobs * @free_nodes_length: number of nodes in free_nodes_pool * @next_avail_free_node_idx: index of the next free node in the pool * * the free nodes pool must be protected by the user interrupt lock * to avoid race between different interrupts which are using the same * ts buffer with different offsets.
*/ struct hl_ts_free_jobs { struct timestamp_reg_free_node *free_nodes_pool;
u32 free_nodes_length;
u32 next_avail_free_node_idx;
};
/** * struct hl_user_interrupt - holds user interrupt information * @hdev: pointer to the device structure * @ts_free_jobs_data: timestamp free jobs related data * @type: user interrupt type * @wait_list_head: head to the list of user threads pending on this interrupt * @ts_list_head: head to the list of timestamp records * @wait_list_lock: protects wait_list_head * @ts_list_lock: protects ts_list_head * @timestamp: last timestamp taken upon interrupt * @interrupt_id: msix interrupt id
*/ struct hl_user_interrupt { struct hl_device *hdev; struct hl_ts_free_jobs ts_free_jobs_data; enum hl_user_interrupt_type type; struct list_head wait_list_head; struct list_head ts_list_head;
spinlock_t wait_list_lock;
spinlock_t ts_list_lock;
ktime_t timestamp;
u32 interrupt_id;
};
/** * struct timestamp_reg_free_node - holds the timestamp registration free objects node * @free_objects_node: node in the list free_obj_jobs * @cq_cb: pointer to cq command buffer to be freed * @buf: pointer to timestamp buffer to be freed * @in_use: indicates whether the node still in use in workqueue thread. * @dynamic_alloc: indicates whether the node was allocated dynamically in the interrupt handler
*/ struct timestamp_reg_free_node { struct list_head free_objects_node; struct hl_cb *cq_cb; struct hl_mmap_mem_buf *buf;
atomic_t in_use;
u8 dynamic_alloc;
};
/* struct timestamp_reg_work_obj - holds the timestamp registration free objects job * the job will be to pass over the free_obj_jobs list and put refcount to objects * in each node of the list * @free_obj: workqueue object to free timestamp registration node objects * @hdev: pointer to the device structure * @free_obj_head: list of free jobs nodes (node type timestamp_reg_free_node) * @dynamic_alloc_free_obj_head: list of free jobs nodes which were dynamically allocated in the * interrupt handler.
*/ struct timestamp_reg_work_obj { struct work_struct free_obj; struct hl_device *hdev; struct list_head *free_obj_head; struct list_head *dynamic_alloc_free_obj_head;
};
/* struct timestamp_reg_info - holds the timestamp registration related data. * @buf: pointer to the timestamp buffer which include both user/kernel buffers. * relevant only when doing timestamps records registration. * @cq_cb: pointer to CQ counter CB. * @interrupt: interrupt that the node hanged on it's wait list. * @timestamp_kernel_addr: timestamp handle address, where to set timestamp * relevant only when doing timestamps records * registration. * @in_use: indicates if the node already in use. relevant only when doing * timestamps records registration, since in this case the driver * will have it's own buffer which serve as a records pool instead of * allocating records dynamically.
*/ struct timestamp_reg_info { struct hl_mmap_mem_buf *buf; struct hl_cb *cq_cb; struct hl_user_interrupt *interrupt;
u64 *timestamp_kernel_addr; bool in_use;
};
/** * struct hl_user_pending_interrupt - holds a context to a user thread * pending on an interrupt * @ts_reg_info: holds the timestamps registration nodes info * @list_node: node in the list of user threads pending on an interrupt or timestamp * @fence: hl fence object for interrupt completion * @cq_target_value: CQ target value * @cq_kernel_addr: CQ kernel address, to be used in the cq interrupt * handler for target value comparison
*/ struct hl_user_pending_interrupt { struct timestamp_reg_info ts_reg_info; struct list_head list_node; struct hl_fence fence;
u64 cq_target_value;
u64 *cq_kernel_addr;
};
/** * struct hl_eq - describes the event queue (single one per device) * @hdev: pointer to the device structure * @kernel_address: holds the queue's kernel virtual address * @bus_address: holds the queue's DMA address * @size: the event queue size * @ci: ci inside the queue * @prev_eqe_index: the index of the previous event queue entry. The index of * the current entry's index must be +1 of the previous one. * @check_eqe_index: do we need to check the index of the current entry vs. the * previous one. This is for backward compatibility with older * firmwares
*/ struct hl_eq { struct hl_device *hdev; void *kernel_address;
dma_addr_t bus_address;
u32 size;
u32 ci;
u32 prev_eqe_index; bool check_eqe_index;
};
/** * struct hl_dec - describes a decoder sw instance. * @hdev: pointer to the device structure. * @abnrm_intr_work: workqueue work item to run when decoder generates an error interrupt. * @core_id: ID of the decoder. * @base_addr: base address of the decoder.
*/ struct hl_dec { struct hl_device *hdev; struct work_struct abnrm_intr_work;
u32 core_id;
u32 base_addr;
};
/** * enum hl_pm_mng_profile - power management profile. * @PM_AUTO: internal clock is set by the Linux driver. * @PM_MANUAL: internal clock is set by the user. * @PM_LAST: last power management type.
*/ enum hl_pm_mng_profile {
PM_AUTO = 1,
PM_MANUAL,
PM_LAST
};
/** * enum hl_pll_frequency - PLL frequency. * @PLL_HIGH: high frequency. * @PLL_LOW: low frequency. * @PLL_LAST: last frequency values that were configured by the user.
*/ enum hl_pll_frequency {
PLL_HIGH = 1,
PLL_LOW,
PLL_LAST
};
/** * struct pci_mem_region - describe memory region in a PCI bar * @region_base: region base address * @region_size: region size * @bar_size: size of the BAR * @offset_in_bar: region offset into the bar * @bar_id: bar ID of the region * @used: if used 1, otherwise 0
*/ struct pci_mem_region {
u64 region_base;
u64 region_size;
u64 bar_size;
u64 offset_in_bar;
u8 bar_id;
u8 used;
};
/** * struct static_fw_load_mgr - static FW load manager * @preboot_version_max_off: max offset to preboot version * @boot_fit_version_max_off: max offset to boot fit version * @kmd_msg_to_cpu_reg: register address for KDM->CPU messages * @cpu_cmd_status_to_host_reg: register address for CPU command status response * @cpu_boot_status_reg: boot status register * @cpu_boot_dev_status0_reg: boot device status register 0 * @cpu_boot_dev_status1_reg: boot device status register 1 * @boot_err0_reg: boot error register 0 * @boot_err1_reg: boot error register 1 * @preboot_version_offset_reg: SRAM offset to preboot version register * @boot_fit_version_offset_reg: SRAM offset to boot fit version register * @sram_offset_mask: mask for getting offset into the SRAM * @cpu_reset_wait_msec: used when setting WFE via kmd_msg_to_cpu_reg
*/ struct static_fw_load_mgr {
u64 preboot_version_max_off;
u64 boot_fit_version_max_off;
u32 kmd_msg_to_cpu_reg;
u32 cpu_cmd_status_to_host_reg;
u32 cpu_boot_status_reg;
u32 cpu_boot_dev_status0_reg;
u32 cpu_boot_dev_status1_reg;
u32 boot_err0_reg;
u32 boot_err1_reg;
u32 preboot_version_offset_reg;
u32 boot_fit_version_offset_reg;
u32 sram_offset_mask;
u32 cpu_reset_wait_msec;
};
/** * struct fw_response - FW response to LKD command * @ram_offset: descriptor offset into the RAM * @ram_type: RAM type containing the descriptor (SRAM/DRAM) * @status: command status
*/ struct fw_response {
u32 ram_offset;
u8 ram_type;
u8 status;
};
/** * struct dynamic_fw_load_mgr - dynamic FW load manager * @response: FW to LKD response * @comm_desc: the communication descriptor with FW * @image_region: region to copy the FW image to * @fw_image_size: size of FW image to load * @wait_for_bl_timeout: timeout for waiting for boot loader to respond * @fw_desc_valid: true if FW descriptor has been validated and hence the data can be used
*/ struct dynamic_fw_load_mgr { struct fw_response response; struct lkd_fw_comms_desc comm_desc; struct pci_mem_region *image_region;
size_t fw_image_size;
u32 wait_for_bl_timeout; bool fw_desc_valid;
};
/** * struct pre_fw_load_props - needed properties for pre-FW load * @cpu_boot_status_reg: cpu_boot_status register address * @sts_boot_dev_sts0_reg: sts_boot_dev_sts0 register address * @sts_boot_dev_sts1_reg: sts_boot_dev_sts1 register address * @boot_err0_reg: boot_err0 register address * @boot_err1_reg: boot_err1 register address * @wait_for_preboot_timeout: timeout to poll for preboot ready * @wait_for_preboot_extended_timeout: timeout to pull for preboot ready in case where we know * preboot needs longer time.
*/ struct pre_fw_load_props {
u32 cpu_boot_status_reg;
u32 sts_boot_dev_sts0_reg;
u32 sts_boot_dev_sts1_reg;
u32 boot_err0_reg;
u32 boot_err1_reg;
u32 wait_for_preboot_timeout;
u32 wait_for_preboot_extended_timeout;
};
/** * struct fw_image_props - properties of FW image * @image_name: name of the image * @src_off: offset in src FW to copy from * @copy_size: amount of bytes to copy (0 to copy the whole binary)
*/ struct fw_image_props { char *image_name;
u32 src_off;
u32 copy_size;
};
/** * struct fw_load_mgr - manager FW loading process * @dynamic_loader: specific structure for dynamic load * @static_loader: specific structure for static load * @pre_fw_load_props: parameter for pre FW load * @boot_fit_img: boot fit image properties * @linux_img: linux image properties * @cpu_timeout: CPU response timeout in usec * @boot_fit_timeout: Boot fit load timeout in usec * @skip_bmc: should BMC be skipped * @sram_bar_id: SRAM bar ID * @dram_bar_id: DRAM bar ID * @fw_comp_loaded: bitmask of loaded FW components. set bit meaning loaded * component. values are set according to enum hl_fw_types.
*/ struct fw_load_mgr { union { struct dynamic_fw_load_mgr dynamic_loader; struct static_fw_load_mgr static_loader;
}; struct pre_fw_load_props pre_fw_load; struct fw_image_props boot_fit_img; struct fw_image_props linux_img;
u32 cpu_timeout;
u32 boot_fit_timeout;
u8 skip_bmc;
u8 sram_bar_id;
u8 dram_bar_id;
u8 fw_comp_loaded;
};
struct hl_cs;
/** * struct engines_data - asic engines data * @buf: buffer for engines data in ascii * @actual_size: actual size of data that was written by the driver to the allocated buffer * @allocated_buf_size: total size of allocated buffer
*/ struct engines_data { char *buf; int actual_size;
u32 allocated_buf_size;
};
/** * struct hl_asic_funcs - ASIC specific functions that are can be called from * common code. * @early_init: sets up early driver state (pre sw_init), doesn't configure H/W. * @early_fini: tears down what was done in early_init. * @late_init: sets up late driver/hw state (post hw_init) - Optional. * @late_fini: tears down what was done in late_init (pre hw_fini) - Optional. * @sw_init: sets up driver state, does not configure H/W. * @sw_fini: tears down driver state, does not configure H/W. * @hw_init: sets up the H/W state. * @hw_fini: tears down the H/W state. * @halt_engines: halt engines, needed for reset sequence. This also disables * interrupts from the device. Should be called before * hw_fini and before CS rollback. * @suspend: handles IP specific H/W or SW changes for suspend. * @resume: handles IP specific H/W or SW changes for resume. * @mmap: maps a memory. * @ring_doorbell: increment PI on a given QMAN. * @pqe_write: Write the PQ entry to the PQ. This is ASIC-specific * function because the PQs are located in different memory areas * per ASIC (SRAM, DRAM, Host memory) and therefore, the method of * writing the PQE must match the destination memory area * properties. * @asic_dma_alloc_coherent: Allocate coherent DMA memory by calling * dma_alloc_coherent(). This is ASIC function because * its implementation is not trivial when the driver * is loaded in simulation mode (not upstreamed). * @asic_dma_free_coherent: Free coherent DMA memory by calling * dma_free_coherent(). This is ASIC function because * its implementation is not trivial when the driver * is loaded in simulation mode (not upstreamed). * @scrub_device_mem: Scrub the entire SRAM and DRAM. * @scrub_device_dram: Scrub the dram memory of the device. * @get_int_queue_base: get the internal queue base address. * @test_queues: run simple test on all queues for sanity check. * @asic_dma_pool_zalloc: small DMA allocation of coherent memory from DMA pool. * size of allocation is HL_DMA_POOL_BLK_SIZE. * @asic_dma_pool_free: free small DMA allocation from pool. * @cpu_accessible_dma_pool_alloc: allocate CPU PQ packet from DMA pool. * @cpu_accessible_dma_pool_free: free CPU PQ packet from DMA pool. * @dma_unmap_sgtable: DMA unmap scatter-gather table. * @dma_map_sgtable: DMA map scatter-gather table. * @cs_parser: parse Command Submission. * @add_end_of_cb_packets: Add packets to the end of CB, if device requires it. * @update_eq_ci: update event queue CI. * @context_switch: called upon ASID context switch. * @restore_phase_topology: clear all SOBs amd MONs. * @debugfs_read_dma: debug interface for reading up to 2MB from the device's * internal memory via DMA engine. * @add_device_attr: add ASIC specific device attributes. * @handle_eqe: handle event queue entry (IRQ) from CPU-CP. * @get_events_stat: retrieve event queue entries histogram. * @read_pte: read MMU page table entry from DRAM. * @write_pte: write MMU page table entry to DRAM. * @mmu_invalidate_cache: flush MMU STLB host/DRAM cache, either with soft * (L1 only) or hard (L0 & L1) flush. * @mmu_invalidate_cache_range: flush specific MMU STLB cache lines with ASID-VA-size mask. * @mmu_prefetch_cache_range: pre-fetch specific MMU STLB cache lines with ASID-VA-size mask. * @send_heartbeat: send is-alive packet to CPU-CP and verify response. * @debug_coresight: perform certain actions on Coresight for debugging. * @is_device_idle: return true if device is idle, false otherwise. * @compute_reset_late_init: perform certain actions needed after a compute reset * @hw_queues_lock: acquire H/W queues lock. * @hw_queues_unlock: release H/W queues lock. * @get_pci_id: retrieve PCI ID. * @get_eeprom_data: retrieve EEPROM data from F/W. * @get_monitor_dump: retrieve monitor registers dump from F/W. * @send_cpu_message: send message to F/W. If the message is timedout, the * driver will eventually reset the device. The timeout can * be determined by the calling function or it can be 0 and * then the timeout is the default timeout for the specific * ASIC * @get_hw_state: retrieve the H/W state * @pci_bars_map: Map PCI BARs. * @init_iatu: Initialize the iATU unit inside the PCI controller. * @rreg: Read a register. Needed for simulator support. * @wreg: Write a register. Needed for simulator support. * @halt_coresight: stop the ETF and ETR traces. * @ctx_init: context dependent initialization. * @ctx_fini: context dependent cleanup. * @pre_schedule_cs: Perform pre-CS-scheduling operations. * @get_queue_id_for_cq: Get the H/W queue id related to the given CQ index. * @load_firmware_to_device: load the firmware to the device's memory * @load_boot_fit_to_device: load boot fit to device's memory * @get_signal_cb_size: Get signal CB size. * @get_wait_cb_size: Get wait CB size. * @gen_signal_cb: Generate a signal CB. * @gen_wait_cb: Generate a wait CB. * @reset_sob: Reset a SOB. * @reset_sob_group: Reset SOB group * @get_device_time: Get the device time. * @pb_print_security_errors: print security errors according block and cause * @collective_wait_init_cs: Generate collective master/slave packets * and place them in the relevant cs jobs * @collective_wait_create_jobs: allocate collective wait cs jobs * @get_dec_base_addr: get the base address of a given decoder. * @scramble_addr: Routine to scramble the address prior of mapping it * in the MMU. * @descramble_addr: Routine to de-scramble the address prior of * showing it to users. * @ack_protection_bits_errors: ack and dump all security violations * @get_hw_block_id: retrieve a HW block id to be used by the user to mmap it. * also returns the size of the block if caller supplies * a valid pointer for it * @hw_block_mmap: mmap a HW block with a given id. * @enable_events_from_fw: send interrupt to firmware to notify them the * driver is ready to receive asynchronous events. This * function should be called during the first init and * after every hard-reset of the device * @ack_mmu_errors: check and ack mmu errors, page fault, access violation. * @get_msi_info: Retrieve asic-specific MSI ID of the f/w async event * @map_pll_idx_to_fw_idx: convert driver specific per asic PLL index to * generic f/w compatible PLL Indexes * @init_firmware_preload_params: initialize pre FW-load parameters. * @init_firmware_loader: initialize data for FW loader. * @init_cpu_scrambler_dram: Enable CPU specific DRAM scrambling * @state_dump_init: initialize constants required for state dump * @get_sob_addr: get SOB base address offset. * @set_pci_memory_regions: setting properties of PCI memory regions * @get_stream_master_qid_arr: get pointer to stream masters QID array * @check_if_razwi_happened: check if there was a razwi due to RR violation. * @access_dev_mem: access device memory * @set_dram_bar_base: set the base of the DRAM BAR * @set_engine_cores: set a config command to engine cores * @set_engines: set a config command to user engines * @send_device_activity: indication to FW about device availability * @set_dram_properties: set DRAM related properties. * @set_binning_masks: set binning/enable masks for all relevant components.
*/ struct hl_asic_funcs { int (*early_init)(struct hl_device *hdev); int (*early_fini)(struct hl_device *hdev); int (*late_init)(struct hl_device *hdev); void (*late_fini)(struct hl_device *hdev); int (*sw_init)(struct hl_device *hdev); int (*sw_fini)(struct hl_device *hdev); int (*hw_init)(struct hl_device *hdev); int (*hw_fini)(struct hl_device *hdev, bool hard_reset, bool fw_reset); void (*halt_engines)(struct hl_device *hdev, bool hard_reset, bool fw_reset); int (*suspend)(struct hl_device *hdev); int (*resume)(struct hl_device *hdev); int (*mmap)(struct hl_device *hdev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size); void (*ring_doorbell)(struct hl_device *hdev, u32 hw_queue_id, u32 pi); void (*pqe_write)(struct hl_device *hdev, __le64 *pqe, struct hl_bd *bd); void* (*asic_dma_alloc_coherent)(struct hl_device *hdev, size_t size,
dma_addr_t *dma_handle, gfp_t flag); void (*asic_dma_free_coherent)(struct hl_device *hdev, size_t size, void *cpu_addr, dma_addr_t dma_handle); int (*scrub_device_mem)(struct hl_device *hdev); int (*scrub_device_dram)(struct hl_device *hdev, u64 val); void* (*get_int_queue_base)(struct hl_device *hdev, u32 queue_id,
dma_addr_t *dma_handle, u16 *queue_len); int (*test_queues)(struct hl_device *hdev); void* (*asic_dma_pool_zalloc)(struct hl_device *hdev, size_t size,
gfp_t mem_flags, dma_addr_t *dma_handle); void (*asic_dma_pool_free)(struct hl_device *hdev, void *vaddr,
dma_addr_t dma_addr); void* (*cpu_accessible_dma_pool_alloc)(struct hl_device *hdev,
size_t size, dma_addr_t *dma_handle); void (*cpu_accessible_dma_pool_free)(struct hl_device *hdev,
size_t size, void *vaddr); void (*dma_unmap_sgtable)(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); int (*dma_map_sgtable)(struct hl_device *hdev, struct sg_table *sgt, enum dma_data_direction dir); int (*cs_parser)(struct hl_device *hdev, struct hl_cs_parser *parser); void (*add_end_of_cb_packets)(struct hl_device *hdev, void *kernel_address, u32 len,
u32 original_len,
u64 cq_addr, u32 cq_val, u32 msix_num, bool eb); void (*update_eq_ci)(struct hl_device *hdev, u32 val); int (*context_switch)(struct hl_device *hdev, u32 asid); void (*restore_phase_topology)(struct hl_device *hdev); int (*debugfs_read_dma)(struct hl_device *hdev, u64 addr, u32 size, void *blob_addr); void (*add_device_attr)(struct hl_device *hdev, struct attribute_group *dev_clk_attr_grp, struct attribute_group *dev_vrm_attr_grp); void (*handle_eqe)(struct hl_device *hdev, struct hl_eq_entry *eq_entry); void* (*get_events_stat)(struct hl_device *hdev, bool aggregate,
u32 *size);
u64 (*read_pte)(struct hl_device *hdev, u64 addr); void (*write_pte)(struct hl_device *hdev, u64 addr, u64 val); int (*mmu_invalidate_cache)(struct hl_device *hdev, bool is_hard,
u32 flags); int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard,
u32 flags, u32 asid, u64 va, u64 size); int (*mmu_prefetch_cache_range)(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size); int (*send_heartbeat)(struct hl_device *hdev); int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data); bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr, u8 mask_len, struct engines_data *e); int (*compute_reset_late_init)(struct hl_device *hdev); void (*hw_queues_lock)(struct hl_device *hdev); void (*hw_queues_unlock)(struct hl_device *hdev);
u32 (*get_pci_id)(struct hl_device *hdev); int (*get_eeprom_data)(struct hl_device *hdev, void *data, size_t max_size); int (*get_monitor_dump)(struct hl_device *hdev, void *data); int (*send_cpu_message)(struct hl_device *hdev, u32 *msg,
u16 len, u32 timeout, u64 *result); int (*pci_bars_map)(struct hl_device *hdev); int (*init_iatu)(struct hl_device *hdev);
u32 (*rreg)(struct hl_device *hdev, u32 reg); void (*wreg)(struct hl_device *hdev, u32 reg, u32 val); void (*halt_coresight)(struct hl_device *hdev, struct hl_ctx *ctx); int (*ctx_init)(struct hl_ctx *ctx); void (*ctx_fini)(struct hl_ctx *ctx); int (*pre_schedule_cs)(struct hl_cs *cs);
u32 (*get_queue_id_for_cq)(struct hl_device *hdev, u32 cq_idx); int (*load_firmware_to_device)(struct hl_device *hdev); int (*load_boot_fit_to_device)(struct hl_device *hdev);
u32 (*get_signal_cb_size)(struct hl_device *hdev);
u32 (*get_wait_cb_size)(struct hl_device *hdev);
u32 (*gen_signal_cb)(struct hl_device *hdev, void *data, u16 sob_id,
u32 size, bool eb);
u32 (*gen_wait_cb)(struct hl_device *hdev, struct hl_gen_wait_properties *prop); void (*reset_sob)(struct hl_device *hdev, void *data); void (*reset_sob_group)(struct hl_device *hdev, u16 sob_group);
u64 (*get_device_time)(struct hl_device *hdev); void (*pb_print_security_errors)(struct hl_device *hdev,
u32 block_addr, u32 cause, u32 offended_addr); int (*collective_wait_init_cs)(struct hl_cs *cs); int (*collective_wait_create_jobs)(struct hl_device *hdev,
--> --------------------
--> maximum size reached
--> --------------------
Messung V0.5
¤ Dauer der Verarbeitung: 0.11 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.