/* * Macros to add or remove encryption attribute
*/ #define pgprot_encrypted(prot) __pgprot(cc_mkenc(pgprot_val(prot))) #define pgprot_decrypted(prot) __pgprot(cc_mkdec(pgprot_val(prot)))
#ifdef CONFIG_DEBUG_WX #define debug_checkwx_user() ptdump_walk_user_pgd_level_checkwx() #else #define debug_checkwx_user() do { } while (0) #endif
/* * ZERO_PAGE is a global shared page that is always zero: used * for zero-mapped memory areas etc..
*/ externunsignedlong empty_zero_page[PAGE_SIZE / sizeof(unsignedlong)]
__visible; #define ZERO_PAGE(vaddr) ((void)(vaddr),virt_to_page(empty_zero_page))
/* * The following only work if pte_present() is true. * Undefined behaviour if not..
*/ staticinlinebool pte_dirty(pte_t pte)
{ return pte_flags(pte) & _PAGE_DIRTY_BITS;
}
staticinlineint pte_write(pte_t pte)
{ /* * Shadow stack pages are logically writable, but do not have * _PAGE_RW. Check for them separately from _PAGE_RW itself.
*/ return (pte_flags(pte) & _PAGE_RW) || pte_shstk(pte);
}
#define pmd_write pmd_write staticinlineint pmd_write(pmd_t pmd)
{ /* * Shadow stack pages are logically writable, but do not have * _PAGE_RW. Check for them separately from _PAGE_RW itself.
*/ return (pmd_flags(pmd) & _PAGE_RW) || pmd_shstk(pmd);
}
/* * Write protection operations can result in Dirty=1,Write=0 PTEs. But in the * case of X86_FEATURE_USER_SHSTK, these PTEs denote shadow stack memory. So * when creating dirty, write-protected memory, a software bit is used: * _PAGE_BIT_SAVED_DIRTY. The following functions take a PTE and transition the * Dirty bit to SavedDirty, and vice-vesra. * * This shifting is only done if needed. In the case of shifting * Dirty->SavedDirty, the condition is if the PTE is Write=0. In the case of * shifting SavedDirty->Dirty, the condition is Write=1.
*/ staticinline pgprotval_t mksaveddirty_shift(pgprotval_t v)
{
pgprotval_t cond = (~v >> _PAGE_BIT_RW) & 1;
v |= ((v >> _PAGE_BIT_DIRTY) & cond) << _PAGE_BIT_SAVED_DIRTY;
v &= ~(cond << _PAGE_BIT_DIRTY);
/* * Blindly clearing _PAGE_RW might accidentally create * a shadow stack PTE (Write=0,Dirty=1). Move the hardware * dirty value to the software bit, if present.
*/ return pte_mksaveddirty(pte);
}
/* * Mask out unsupported bits in a present pgprot. Non-present pgprots * can use those bits for other purposes, so leave them be.
*/ staticinline pgprotval_t massage_pgprot(pgprot_t pgprot)
{
pgprotval_t protval = pgprot_val(pgprot);
if (protval & _PAGE_PRESENT)
protval &= __supported_pte_mask;
/* mmdebug.h can not be included here because of dependencies */ #ifdef CONFIG_DEBUG_VM
WARN_ONCE(pgprot_val(pgprot) != massaged_val, "attempted to set unsupported pgprot: %016llx " "bits: %016llx supported: %016llx\n",
(u64)pgprot_val(pgprot),
(u64)pgprot_val(pgprot) ^ massaged_val,
(u64)__supported_pte_mask); #endif
return massaged_val;
}
staticinline pte_t pfn_pte(unsignedlong page_nr, pgprot_t pgprot)
{
phys_addr_t pfn = (phys_addr_t)page_nr << PAGE_SHIFT; /* This bit combination is used to mark shadow stacks */
WARN_ON_ONCE((pgprot_val(pgprot) & (_PAGE_DIRTY | _PAGE_RW)) ==
_PAGE_DIRTY);
pfn ^= protnone_mask(pgprot_val(pgprot));
pfn &= PTE_PFN_MASK; return __pte(pfn | check_pgprot(pgprot));
}
/* * Chop off the NX bit (if present), and add the NX portion of * the newprot (if present):
*/
val &= _PAGE_CHG_MASK;
val |= check_pgprot(newprot) & ~_PAGE_CHG_MASK;
val = flip_protnone_guard(oldval, val, PTE_PFN_MASK);
pte_result = __pte(val);
/* * To avoid creating Write=0,Dirty=1 PTEs, pte_modify() needs to avoid: * 1. Marking Write=0 PTEs Dirty=1 * 2. Marking Dirty=1 PTEs Write=0 * * The first case cannot happen because the _PAGE_CHG_MASK will filter * out any Dirty bit passed in newprot. Handle the second case by * going through the mksaveddirty exercise. Only do this if the old * value was Write=1 to avoid doing this on Shadow Stack PTEs.
*/ if (oldval & _PAGE_RW)
pte_result = pte_mksaveddirty(pte_result); else
pte_result = pte_clear_saveddirty(pte_result);
val &= (_HPAGE_CHG_MASK & ~_PAGE_DIRTY);
val |= check_pgprot(newprot) & ~_HPAGE_CHG_MASK;
val = flip_protnone_guard(oldval, val, PHYSICAL_PMD_PAGE_MASK);
pmd_result = __pmd(val);
/* * Avoid creating shadow stack PMD by accident. See comment in * pte_modify().
*/ if (oldval & _PAGE_RW)
pmd_result = pmd_mksaveddirty(pmd_result); else
pmd_result = pmd_clear_saveddirty(pmd_result);
staticinlineint is_new_memtype_allowed(u64 paddr, unsignedlong size, enum page_cache_mode pcm, enum page_cache_mode new_pcm)
{ /* * PAT type is always WB for untracked ranges, so no need to check.
*/ if (x86_platform.is_untracked_pat_range(paddr, paddr + size)) return 1;
/* * Certain new memtypes are not allowed with certain * requested memtype: * - request is uncached, return cannot be write-back * - request is write-combine, return cannot be write-back * - request is write-through, return cannot be write-back * - request is write-through, return cannot be write-combine
*/ if ((pcm == _PAGE_CACHE_MODE_UC_MINUS &&
new_pcm == _PAGE_CACHE_MODE_WB) ||
(pcm == _PAGE_CACHE_MODE_WC &&
new_pcm == _PAGE_CACHE_MODE_WB) ||
(pcm == _PAGE_CACHE_MODE_WT &&
new_pcm == _PAGE_CACHE_MODE_WB) ||
(pcm == _PAGE_CACHE_MODE_WT &&
new_pcm == _PAGE_CACHE_MODE_WC)) { return 0;
}
/* * Take a PGD location (pgdp) and a pgd value that needs to be set there. * Populates the user and returns the resulting PGD that must be set in * the kernel copy of the page tables.
*/ staticinline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
{ if (!static_cpu_has(X86_FEATURE_PTI)) return pgd; return __pti_set_user_pgtbl(pgdp, pgd);
} #else/* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */ staticinline pgd_t pti_set_user_pgtbl(pgd_t *pgdp, pgd_t pgd)
{ return pgd;
} #endif/* CONFIG_MITIGATION_PAGE_TABLE_ISOLATION */
#endif/* __ASSEMBLER__ */
#ifdef CONFIG_X86_32 # include <asm/pgtable_32.h> #else # include <asm/pgtable_64.h> #endif
#define pte_accessible pte_accessible staticinlinebool pte_accessible(struct mm_struct *mm, pte_t a)
{ if (pte_flags(a) & _PAGE_PRESENT) returntrue;
if ((pte_flags(a) & _PAGE_PROTNONE) &&
atomic_read(&mm->tlb_flush_pending)) returntrue;
returnfalse;
}
staticinlineint pmd_present(pmd_t pmd)
{ /* * Checking for _PAGE_PSE is needed too because * split_huge_page will temporarily clear the present bit (but * the _PAGE_PSE flag will remain set at all times while the * _PAGE_PRESENT bit is clear).
*/ return pmd_flags(pmd) & (_PAGE_PRESENT | _PAGE_PROTNONE | _PAGE_PSE);
}
#ifdef CONFIG_NUMA_BALANCING /* * These work without NUMA balancing but the kernel does not care. See the * comment in include/linux/pgtable.h
*/ staticinlineint pte_protnone(pte_t pte)
{ return (pte_flags(pte) & (_PAGE_PROTNONE | _PAGE_PRESENT))
== _PAGE_PROTNONE;
}
staticinlineint pmd_none(pmd_t pmd)
{ /* Only check low word on 32-bit platforms, since it might be
out of sync with upper half. */ unsignedlong val = native_pmd_val(pmd); return (val & ~_PAGE_KNL_ERRATUM_MASK) == 0;
}
/* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition:
*/ #define pmd_page(pmd) pfn_to_page(pmd_pfn(pmd))
/* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition:
*/ #define pud_page(pud) pfn_to_page(pud_pfn(pud))
/* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition:
*/ #define p4d_page(p4d) pfn_to_page(p4d_pfn(p4d))
/* * Currently stuck as a macro due to indirect forward reference to * linux/mmzone.h's __section_mem_map_addr() definition:
*/ #define pgd_page(pgd) pfn_to_page(pgd_pfn(pgd))
/* to find an entry in a page-table-directory. */ staticinline p4d_t *p4d_offset(pgd_t *pgd, unsignedlong address)
{ if (!pgtable_l5_enabled()) return (p4d_t *)pgd; return (p4d_t *)pgd_page_vaddr(*pgd) + p4d_index(address);
}
staticinlineint pgd_none(pgd_t pgd)
{ if (!pgtable_l5_enabled()) return 0; /* * There is no need to do a workaround for the KNL stray * A/D bit erratum here. PGDs only point to page tables * except on 32-bit non-PAE which is not supported on * KNL.
*/ return !native_pgd_val(pgd);
} #endif/* CONFIG_PGTABLE_LEVELS > 4 */
/* * We only update the dirty/accessed state if we set * the dirty bit by hand in the kernel, since the hardware * will do the accessed bit for us, and we don't want to * race with other CPU's that might be updating the dirty * bit at the same time.
*/ struct vm_area_struct;
/* * Page table pages are page-aligned. The lower half of the top * level is used for userspace and the top half for the kernel. * * Returns true for parts of the PGD that map userspace and * false for the parts that map the kernel.
*/ staticinlinebool pgdp_maps_userspace(void *__ptr)
{ unsignedlong ptr = (unsignedlong)__ptr;
#ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION /* * All top-level MITIGATION_PAGE_TABLE_ISOLATION page tables are order-1 pages * (8k-aligned and 8k in size). The kernel one is at the beginning 4k and * the user one is in the last 4k. To switch between them, you * just need to flip the 12th bit in their addresses.
*/ #define PTI_PGTABLE_SWITCH_BIT PAGE_SHIFT
/* * This generates better code than the inline assembly in * __set_bit().
*/ staticinlinevoid *ptr_set_bit(void *ptr, int bit)
{ unsignedlong __ptr = (unsignedlong)ptr;
/* * clone_pgd_range(pgd_t *dst, pgd_t *src, int count); * * dst - pointer to pgd range anywhere on a pgd page * src - "" * count - the number of pgds to copy. * * dst and src can be on the same page, but the range must not overlap, * and must not cross a page boundary.
*/ staticinlinevoid clone_pgd_range(pgd_t *dst, pgd_t *src, int count)
{
memcpy(dst, src, count * sizeof(pgd_t)); #ifdef CONFIG_MITIGATION_PAGE_TABLE_ISOLATION if (!static_cpu_has(X86_FEATURE_PTI)) return; /* Clone the user space pgd as well */
memcpy(kernel_to_user_pgdp(dst), kernel_to_user_pgdp(src),
count * sizeof(pgd_t)); #endif
}
if (!__pkru_allows_read(pkru, pkey)) returnfalse; if (write && !__pkru_allows_write(pkru, pkey)) returnfalse;
returntrue;
}
/* * 'pteval' can come from a PTE, PMD or PUD. We only check * _PAGE_PRESENT, _PAGE_USER, and _PAGE_RW in here which are the * same value on all 3 types.
*/ staticinlinebool __pte_access_permitted(unsignedlong pteval, bool write)
{ unsignedlong need_pte_bits = _PAGE_PRESENT|_PAGE_USER;
/* * Write=0,Dirty=1 PTEs are shadow stack, which the kernel * shouldn't generally allow access to, but since they * are already Write=0, the below logic covers both cases.
*/ if (write)
need_pte_bits |= _PAGE_RW;
if ((pteval & need_pte_bits) != need_pte_bits) return 0;
/* * Use set_p*_safe(), and elide TLB flushing, when confident that *no* * TLB flush will be required as a result of the "set". For example, use * in scenarios where it is known ahead of time that the routine is * setting non-present entries, or re-setting an existing entry to the * same value. Otherwise, use the typical "set" helpers and flush the * TLB.
*/ #define set_pte_safe(ptep, pte) \
({ \
WARN_ON_ONCE(pte_present(*ptep) && !pte_same(*ptep, pte)); \
set_pte(ptep, pte); \
})
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.