#ifdef CONFIG_SPARSEMEM_VMEMMAP /* * Given an address within the vmemmap, determine the page that * represents the start of the subsection it is within. Note that we have to * do this by hand as the proffered address may not be correctly aligned. * Subtraction of non-aligned pointers produces undefined results.
*/ staticstruct page * __meminit vmemmap_subsection_start(unsignedlong vmemmap_addr)
{ unsignedlong start_pfn; unsignedlong offset = vmemmap_addr - ((unsignedlong)(vmemmap));
/* Return the pfn of the start of the section. */
start_pfn = (offset / sizeof(struct page)) & PAGE_SUBSECTION_MASK; return pfn_to_page(start_pfn);
}
/* * Since memory is added in sub-section chunks, before creating a new vmemmap * mapping, the kernel should check whether there is an existing memmap mapping * covering the new subsection added. This is needed because kernel can map * vmemmap area using 16MB pages which will cover a memory range of 16G. Such * a range covers multiple subsections (2M) * * If any subsection in the 16G range mapped by vmemmap is valid we consider the * vmemmap populated (There is a page table entry already present). We can't do * a page table lookup here because with the hash translation we don't keep * vmemmap details in linux page table.
*/ int __meminit vmemmap_populated(unsignedlong vmemmap_addr, int vmemmap_map_size)
{ struct page *start; unsignedlong vmemmap_end = vmemmap_addr + vmemmap_map_size;
start = vmemmap_subsection_start(vmemmap_addr);
for (; (unsignedlong)start < vmemmap_end; start += PAGES_PER_SUBSECTION) /* * pfn valid check here is intended to really check * whether we have any subsection already initialized * in this range.
*/ if (pfn_valid(page_to_pfn(start))) return 1;
return 0;
}
/* * vmemmap virtual address space management does not have a traditional page * table to track which virtual struct pages are backed by physical mapping. * The virtual to physical mappings are tracked in a simple linked list * format. 'vmemmap_list' maintains the entire vmemmap physical mapping at * all times where as the 'next' list maintains the available * vmemmap_backing structures which have been deleted from the * 'vmemmap_global' list during system runtime (memory hotplug remove * operation). The freed 'vmemmap_backing' structures are reused later when * new requests come in without allocating fresh memory. This pointer also * tracks the allocated 'vmemmap_backing' structures as we allocate one * full page memory at a time when we dont have any.
*/ struct vmemmap_backing *vmemmap_list; staticstruct vmemmap_backing *next;
/* * The same pointer 'next' tracks individual chunks inside the allocated * full page during the boot time and again tracks the freed nodes during * runtime. It is racy but it does not happen as they are separated by the * boot process. Will create problem if some how we have memory hotplug * operation during boot !!
*/ staticint num_left; staticint num_freed;
static __meminit struct vmemmap_backing * vmemmap_list_alloc(int node)
{ struct vmemmap_backing *vmem_back; /* get from freed entries first */ if (num_freed) {
num_freed--;
vmem_back = next;
next = next->list;
return vmem_back;
}
/* allocate a page when required and hand out chunks */ if (!num_left) {
next = vmemmap_alloc_block(PAGE_SIZE, node); if (unlikely(!next)) {
WARN_ON(1); return NULL;
}
num_left = PAGE_SIZE / sizeof(struct vmemmap_backing);
}
num_left--;
return next++;
}
static __meminit int vmemmap_list_populate(unsignedlong phys, unsignedlong start, int node)
{ struct vmemmap_backing *vmem_back;
vmem_back = vmemmap_list_alloc(node); if (unlikely(!vmem_back)) {
pr_debug("vmemap list allocation failed\n"); return -ENOMEM;
}
for (; start < end; start += page_size) { void *p = NULL; int rc;
/* * This vmemmap range is backing different subsections. If any * of that subsection is marked valid, that means we already * have initialized a page table covering this range and hence * the vmemmap range is populated.
*/ if (vmemmap_populated(start, page_size)) continue;
/* * Allocate from the altmap first if we have one. This may * fail due to alignment issues when using 16MB hugepages, so * fall back to system memory if the altmap allocation fail.
*/ if (altmap && !altmap_cross_boundary(altmap, start, page_size)) {
p = vmemmap_alloc_block_buf(page_size, node, altmap); if (!p)
pr_debug("altmap block allocation failed, falling back to system memory"); else
altmap_alloc = true;
} if (!p) {
p = vmemmap_alloc_block_buf(page_size, node, NULL);
altmap_alloc = false;
} if (!p) return -ENOMEM;
if (vmemmap_list_populate(__pa(p), start, node)) { /* * If we don't populate vmemap list, we don't have * the ability to free the allocated vmemmap * pages in section_deactivate. Hence free them * here.
*/ int nr_pfns = page_size >> PAGE_SHIFT; unsignedlong page_order = get_order(page_size);
if (altmap_alloc)
vmem_altmap_free(altmap, nr_pfns); else
free_pages((unsignedlong)p, page_order); return -ENOMEM;
}
/* look for it with prev pointer recorded */ for (; vmem_back; vmem_back = vmem_back->list) { if (vmem_back->virt_addr == start) break;
vmem_back_prev = vmem_back;
}
if (unlikely(!vmem_back)) return 0;
/* remove it from vmemmap_list */ if (vmem_back == vmemmap_list) /* remove head */
vmemmap_list = vmem_back->list; else
vmem_back_prev->list = vmem_back->list;
/* next point to this freed entry */
vmem_back->list = next;
next = vmem_back;
num_freed++;
/* * We have already marked the subsection we are trying to remove * invalid. So if we want to remove the vmemmap range, we * need to make sure there is no subsection marked valid * in this range.
*/ if (vmemmap_populated(start, page_size)) continue;
addr = vmemmap_list_free(start); if (!addr) continue;
if (base_pfn >= alt_start && base_pfn < alt_end) {
vmem_altmap_free(altmap, nr_pages);
} elseif (PageReserved(page)) { /* allocated from bootmem */ if (page_size < PAGE_SIZE) { /* * this shouldn't happen, but if it is * the case, leave the memory there
*/
WARN_ON_ONCE(1);
} else { while (nr_pages--)
free_reserved_page(page++);
}
} else {
free_pages((unsignedlong)(__va(addr)), page_order);
}
/* * If we're running under a hypervisor, we need to check the contents of * /chosen/ibm,architecture-vec-5 to see if the hypervisor is willing to do * radix. If not, we clear the radix feature bit so we fall back to hash.
*/ staticvoid __init early_check_vec5(void)
{ unsignedlong root, chosen; int size; const u8 *vec5;
u8 mmu_supported;
/* * Outside hotplug the kernel uses this value to map the kernel direct map * with radix. To be compatible with older kernels, let's keep this value * as 16M which is also SECTION_SIZE with SPARSEMEM. We can ideally map * things with 1GB size in the case where we don't support hotplug.
*/ #ifndef CONFIG_MEMORY_HOTPLUG #define DEFAULT_MEMORY_BLOCK_SIZE SZ_16M #else #define DEFAULT_MEMORY_BLOCK_SIZE MIN_MEMORY_BLOCK_SIZE #endif
if (depth != 1) return 0; /* * If we have dynamic-reconfiguration-memory node, use the * lmb value.
*/ if (strcmp(uname, "ibm,dynamic-reconfiguration-memory") == 0) {
if (!prop || l < dt_root_size_cells * sizeof(__be32)) /* * Nothing in the device tree
*/
*block_size = DEFAULT_MEMORY_BLOCK_SIZE; else
*block_size = of_read_number(prop, dt_root_size_cells); /* * We have found the final value. Don't probe further.
*/ return 1;
} /* * Find all the device tree nodes of memory type and make sure * the area can be mapped using the memory block size value * we end up using. We start with 1G value and keep reducing * it such that we can map the entire area using memory_block_size. * This will be used on powernv and older pseries that don't * have ibm,lmb-size node. * For ex: with P5 we can end up with * memory@0 -> 128MB * memory@128M -> 64M * This will end up using 64MB memory block size value.
*/
type = of_get_flat_dt_prop(node, "device_type", NULL); if (type == NULL || strcmp(type, "memory") != 0) return 0;
reg = of_get_flat_dt_prop(node, "linux,usable-memory", &l); if (!reg)
reg = of_get_flat_dt_prop(node, "reg", &l); if (!reg) return 0;
if (size) {
update_memory_block_size(block_size, size); continue;
} /* * ibm,coherent-device-memory with linux,usable-memory = 0 * Force 256MiB block size. Work around for GPUs on P9 PowerNV * linux,usable-memory == 0 implies driver managed memory and * we can't use large memory block size due to hotplug/unplug * limitations.
*/
compatible = of_get_flat_dt_prop(node, "compatible", NULL); if (compatible && !strcmp(compatible, "ibm,coherent-device-memory")) { if (*block_size > SZ_256M)
*block_size = SZ_256M; /* * We keep 256M as the upper limit with GPU present.
*/ return 0;
}
} /* continue looking for other memory device types */ return 0;
}
/* * start with 1G memory block size. Early init will * fix this with correct value.
*/ unsignedlong memory_block_size __ro_after_init = 1UL << 30; staticvoid __init early_init_memory_block_size(void)
{ /* * We need to do memory_block_size probe early so that * radix__early_init_mmu() can use this as limit for * mapping page size.
*/
of_scan_flat_dt(probe_memory_block_size, &memory_block_size);
}
/* Disable radix mode based on kernel command line. */ if (disable_radix) { if (IS_ENABLED(CONFIG_PPC_64S_HASH_MMU))
cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; else
pr_warn("WARNING: Ignoring cmdline option disable_radix\n");
}
of_scan_flat_dt(dt_scan_mmu_pid_width, NULL); if (hvmode && !mmu_lpid_bits) { if (early_cpu_has_feature(CPU_FTR_ARCH_207S))
mmu_lpid_bits = 12; /* POWER8-10 */ else
mmu_lpid_bits = 10; /* POWER7 */
} if (!mmu_pid_bits) { if (early_cpu_has_feature(CPU_FTR_ARCH_300))
mmu_pid_bits = 20; /* POWER9-10 */
}
/* * Check /chosen/ibm,architecture-vec-5 if running as a guest. * When running bare-metal, we can use radix if we like * even though the ibm,architecture-vec-5 property created by * skiboot doesn't have the necessary bits set.
*/ if (!hvmode)
early_check_vec5();
early_init_memory_block_size();
if (early_radix_enabled()) {
radix__early_init_devtree();
/* * We have finalized the translation we are going to use by now. * Radix mode is not limited by RMA / VRMA addressing. * Hence don't limit memblock allocations.
*/
ppc64_rma_size = ULONG_MAX;
memblock_set_current_limit(MEMBLOCK_ALLOC_ANYWHERE);
} else
hash__early_init_devtree();
if (IS_ENABLED(CONFIG_HUGETLB_PAGE_SIZE_VARIABLE))
hugetlbpage_init_defaultsize();
if (!(cur_cpu_spec->mmu_features & MMU_FTR_HPTE_TABLE) &&
!(cur_cpu_spec->mmu_features & MMU_FTR_TYPE_RADIX))
panic("kernel does not support any MMU type offered by platform");
} #endif/* CONFIG_PPC_BOOK3S_64 */
Messung V0.5
¤ Dauer der Verarbeitung: 0.15 Sekunden
(vorverarbeitet)
¤
Die Informationen auf dieser Webseite wurden
nach bestem Wissen sorgfältig zusammengestellt. Es wird jedoch weder Vollständigkeit, noch Richtigkeit,
noch Qualität der bereit gestellten Informationen zugesichert.
Bemerkung:
Die farbliche Syntaxdarstellung und die Messung sind noch experimentell.