The buddy memory allocation technique is a memory allocation algorithm that divides memory into partitions to try to satisfy a memory request as suitably as possible. This system makes use of splitting memory into halves to try to give a best fit. According to Donald Knuth, the buddy system was invented in 1963 by Harry Markowitz, and was first described by Kenneth C. Knowlton (published 1965). The Buddy memory allocation is relatively easy to implement. It supports limited but efficient splitting and coalescing of memory blocks.
enummigratetype { MIGRATE_UNMOVABLE, // 不可移动页,内核中通过线性映射的区域都是不可移动的 MIGRATE_MOVABLE, // 可移动页 MIGRATE_RECLAIMABLE, // 可回收页 /* the number of types on the pcp lists */ MIGRATE_PCPTYPES, MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES, #ifdef CONFIG_CMA /* * MIGRATE_CMA migration type is designed to mimic the way * ZONE_MOVABLE works. Only movable pages can be allocated * from MIGRATE_CMA pageblocks and page allocator never * implicitly change migration type of MIGRATE_CMA pageblock. * * The way to use it is to change migratetype of a range of * pageblocks to MIGRATE_CMA which can be done by * __free_pageblock_cma() function. What is important though * is that a range of pageblocks must be aligned to * MAX_ORDER_NR_PAGES should biggest page be bigger then * a single pageblock. */ MIGRATE_CMA, #endif #ifdef CONFIG_MEMORY_ISOLATION MIGRATE_ISOLATE, /* can't allocate from here */ #endif MIGRATE_TYPES };
Linux在管理内存时,有个很重要的概念是水位线(watermarks)。watermarks决定了内存分配的流程以及处理方式,这里我简单介绍下watermarks,下面内容主要来自一步一图带你深入理解 Linux 物理内存管理中的5.2 物理内存区域中的水位线以及深入理解 Linux 物理内存分配全链路实现中的3.1 内存分配行为标识掩码 ALLOC_*。
/* The ALLOC_WMARK bits are used as an index to zone->watermark */ #define ALLOC_WMARK_MIN WMARK_MIN #define ALLOC_WMARK_LOW WMARK_LOW #define ALLOC_WMARK_HIGH WMARK_HIGH #define ALLOC_NO_WATERMARKS 0x04 /* don't check watermarks at all */
/* Mask to get the watermark bits */ #define ALLOC_WMARK_MASK (ALLOC_NO_WATERMARKS-1)
/* * Only MMU archs have async oom victim reclaim - aka oom_reaper so we * cannot assume a reduced access to memory reserves is sufficient for * !MMU */ #ifdef CONFIG_MMU #define ALLOC_OOM 0x08 #else #define ALLOC_OOM ALLOC_NO_WATERMARKS #endif
#define ALLOC_HARDER 0x10 /* try to alloc harder */ #define ALLOC_HIGH 0x20 /* __GFP_HIGH set */ #define ALLOC_CPUSET 0x40 /* check for correct cpuset */ #define ALLOC_CMA 0x80 /* allow allocations from CMA areas */ #ifdef CONFIG_ZONE_DMA32 #define ALLOC_NOFRAGMENT 0x100 /* avoid mixing pageblock types */ #else #define ALLOC_NOFRAGMENT 0x0 #endif #define ALLOC_KSWAPD 0x200 /* allow waking of kswapd */
/* * This array describes the order lists are fallen back to when * the free lists for the desirable migrate type are depleted */ staticint fallbacks[MIGRATE_TYPES][4] = { [MIGRATE_UNMOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, MIGRATE_TYPES }, [MIGRATE_MOVABLE] = { MIGRATE_RECLAIMABLE, MIGRATE_UNMOVABLE, MIGRATE_TYPES }, [MIGRATE_RECLAIMABLE] = { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_TYPES }, #ifdef CONFIG_CMA [MIGRATE_CMA] = { MIGRATE_TYPES }, /* Never used */ #endif #ifdef CONFIG_MEMORY_ISOLATION [MIGRATE_ISOLATE] = { MIGRATE_TYPES }, /* Never used */ #endif };
/* * Allocate pages, preferring the node given as nid. When nid == NUMA_NO_NODE, * prefer the current CPU's closest node. Otherwise node must be valid and * online. */ staticinline struct page *alloc_pages_node(int nid, gfp_t gfp_mask, unsignedint order) { // 如果nid == NUMA_NO_NODE,选择离当前CPU最近的node if (nid == NUMA_NO_NODE) nid = numa_mem_id();
/* * Allocate pages, preferring the node given as nid. The node must be valid and * online. For more general interface, see alloc_pages_node(). */ staticinlinestructpage *__alloc_pages_node(intnid, gfp_tgfp_mask, unsignedintorder) { // 对nid, gfp_mask进行简单的检查 VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid));
/* * This is the 'heart' of the zoned buddy allocator. */ structpage *__alloc_pages_nodemask(gfp_tgfp_mask, unsignedintorder, intpreferred_nid, nodemask_t *nodemask) { structpage *page; // 默认情况下只要处于低水位之上都可以正常的分配内存空间 unsignedint alloc_flags = ALLOC_WMARK_LOW; gfp_t alloc_mask; /* The gfp_t that was actually used for allocation */ structalloc_contextac = { };
/* * There are several places where we assume that the order value is sane * so bail out early if the request is out of bound. */ if (unlikely(order >= MAX_ORDER)) { WARN_ON_ONCE(!(gfp_mask & __GFP_NOWARN)); returnNULL; }
/* * Forbid the first pass from falling back to types that fragment * memory until all local zones are considered. */ // 避免内存碎片化的相关分配标识设置,可暂时忽略 alloc_flags |= alloc_flags_nofragment(ac.preferred_zoneref->zone, gfp_mask);
/* First allocation attempt */ // ------------------------------------------------------------------------------------------------ (3) // 第一次尝试进行内存分配,此时的watermarks为ALLOC_WMARK_LOW page = get_page_from_freelist(alloc_mask, order, alloc_flags, &ac); if (likely(page)) goto out; // 如果执行到这里就说明上面的内存分配失败了,我们需要通过__alloc_pages_slowpath()再次尝试内存的分配 /* * Apply scoped allocation constraints. This is mainly about GFP_NOFS * resp. GFP_NOIO which has to be inherited for all allocation requests * from a particular context which has been marked by * memalloc_no{fs,io}_{save,restore}. */ // 清除GFP掩码中的(__GFP_IO | __GFP_FS) alloc_mask = current_gfp_context(gfp_mask); ac.spread_dirty_pages = false;
/* * Restore the original nodemask if it was potentially replaced with * &cpuset_current_mems_allowed to optimize the fast-path attempt. */ if (unlikely(ac.nodemask != nodemask)) ac.nodemask = nodemask; // 再次尝试分配内存,既然第一次分配失败了,说明内存空间可能出现不足的情况。因此在这里 // 会做更多的工作比如回收内存等,之后再进行内存的分配 page = __alloc_pages_slowpath(alloc_mask, order, &ac);