/* * This struct contains information about a zone in a zonelist. It is stored * here to avoid dereferences into large structures and lookups of tables */ structzoneref { structzone *zone;/* Pointer to actual zone */ int zone_idx; /* zone_idx(zoneref->zone) */ };
/* * One allocation request operates on a zonelist. A zonelist * is a list of zones, the first one is the 'goal' of the * allocation, the other zones are fallback zones, in decreasing * priority. * * To speed the reading of the zonelist, the zonerefs contain the zone index * of the entry being read. Helper functions to access information given * a struct zoneref are * * zonelist_zone() - Return the struct zone * for an entry in _zonerefs * zonelist_zone_idx() - Return the index of the zone for an entry * zonelist_node_idx() - Return the index of the node for an entry */ structzonelist { // 数组最后一个元素为空,可以用于结束zone的遍历 - 参考for_next_zone_zonelist_nodemask() structzoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1]; };
staticvoid __build_all_zonelists(void *data) { /* * This node is hotadded and no memory is yet present. So just * building zonelists is fine - no need to touch other nodes. */ if (self && !node_online(self->node_id)) { build_zonelists(self); } else { for_each_online_node(nid) { pg_data_t *pgdat = NODE_DATA(nid); build_zonelists(pgdat); } } }
/* * Build zonelists ordered by zone and nodes within zones. * This results in conserving DMA zone[s] until all Normal memory is * exhausted, but results in overflowing to remote node while memory * may still exist in local DMA zone. */ staticvoidbuild_zonelists(pg_data_t *pgdat) { // pgdat是当前CPU所在的Node build_zonelists_in_node_order(pgdat, node_order, nr_nodes); }
/* * Build zonelists ordered by node and zones within node. * This results in maximum locality--normal zone overflows into local * DMA zone, if any--but risks exhausting DMA zone. */ staticvoidbuild_zonelists_in_node_order(pg_data_t *pgdat, int *node_order, unsigned nr_nodes) { structzoneref *zonerefs; int i; // --------------------------------------------------------------- (1) // 通过当前节点获取zonelist的信息 zonerefs = pgdat->node_zonelists[ZONELIST_FALLBACK]._zonerefs;
/* * Builds allocation fallback zone lists. * Add all populated zones of a node to the zonelist. */ staticintbuild_zonerefs_node(pg_data_t *pgdat, struct zoneref *zonerefs) { structzone *zone; enumzone_typezone_type = MAX_NR_ZONES; int nr_zones = 0;
/* * Set up kernel memory allocators */ staticvoid __init mm_init(void) { /* * page_ext requires contiguous pages, * bigger than MAX_ORDER unless SPARSEMEM. */ // 在sparsemem内存模型下,该函数为空 page_ext_init_flatmem(); // 这个是重点函数 mem_init(); kmem_cache_init(); pgtable_init(); debug_objects_mem_init(); vmalloc_init(); ioremap_huge_init(); /* Should be run before the first non-init thread is created */ init_espfix_bsp(); /* Should be run after espfix64 is set up. */ pti_init(); }
/* * mem_init() marks the free areas in the mem_map and tells us how much memory * is free. This is done after various parts of the system have claimed their * memory after the kernel image. */ void __init mem_init(void) { if (swiotlb_force == SWIOTLB_FORCE || max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) swiotlb_init(1); else swiotlb_force = SWIOTLB_NO_FORCE; // 更新max_mapnr,在sparsemem模型中,我们并不使用它,同时也不会用mem_map // 因此在sparsemem启动的情况下,这句可以忽略 set_max_mapnr(pfn_to_page(max_pfn) - mem_map);
#ifndef CONFIG_SPARSEMEM_VMEMMAP // ----------------------------------------------------------- (1) free_unused_memmap(); #endif /* this will put all unused low memory onto the freelists */ memblock_free_all();
kexec_reserve_crashkres_pages();
mem_init_print_info(NULL);
/* * Check boundaries twice: Some fundamental inconsistencies can be * detected at build time already. */ #ifdef CONFIG_COMPAT BUILD_BUG_ON(TASK_SIZE_32 > DEFAULT_MAP_WINDOW_64); #endif
if (PAGE_SIZE >= 16384 && get_num_physpages() <= 128) { externint sysctl_overcommit_memory; /* * On a machine this small we won't get anywhere without * overcommit, so turn it on by default. */ sysctl_overcommit_memory = OVERCOMMIT_ALWAYS; } }
/* * The mem_map array can get very big. Free the unused area of the memory map. */ staticvoid __init free_unused_memmap(void) { unsignedlong start, prev_end = 0; structmemblock_region *reg;
// 以我个人的理解, sparsemem中没有用到过多的内存空间存不存在的page,因此这里并不会 // free之前memblock分配的内存空间 #ifdef CONFIG_SPARSEMEM /* * Take care not to free memmap entries that don't exist due * to SPARSEMEM sections which aren't present. */ // 这里的ALIGN()是向上对齐 start = min(start, ALIGN(prev_end, PAGES_PER_SECTION)); #endif /* * If we had a previous bank, and there is a space between the * current bank and the previous, free it. */ if (prev_end && prev_end < start) free_memmap(prev_end, start);
/* * Align up here since the VM subsystem insists that the * memmap entries are valid from the bank end aligned to * MAX_ORDER_NR_PAGES. */ prev_end = ALIGN(__phys_to_pfn(reg->base + reg->size), MAX_ORDER_NR_PAGES); }
/* * We need to use NUMA_NO_NODE instead of NODE_DATA(0)->node_id * because in some case like Node0 doesn't have RAM installed * low ram will be on Node1 */ // --------------------------------------------- (2) for_each_free_mem_range(i, NUMA_NO_NODE, MEMBLOCK_NONE, &start, &end, NULL) count += __free_memory_core(start, end);
/* * Set up kernel memory allocators */ staticvoid __init mm_init(void) { /* * page_ext requires contiguous pages, * bigger than MAX_ORDER unless SPARSEMEM. */ // 在sparsemem内存模型下,该函数为空 page_ext_init_flatmem(); // 这个是重点函数 mem_init(); kmem_cache_init(); pgtable_init(); debug_objects_mem_init(); vmalloc_init(); ioremap_huge_init(); /* Should be run before the first non-init thread is created */ init_espfix_bsp(); /* Should be run after espfix64 is set up. */ pti_init(); }