/** * kmem_cache_create - Create a cache. * @name: A string which is used in /proc/slabinfo to identify this cache. * @size: The size of objects to be created in this cache. * @align: The required alignment for the objects. * @flags: SLAB flags * @ctor: A constructor for the objects. * * Cannot be called within a interrupt, but can be interrupted. * The @ctor is run when new pages are allocated by the cache. * * The flags are * * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) * to catch references to uninitialised memory. * * %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check * for buffer overruns. * * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware * cacheline. This can be beneficial if you're counting cycles as closely * as davem. * * Return: a pointer to the cache on success, NULL on failure. */ struct kmem_cache *kmem_cache_create(constchar *name, unsignedint size, unsignedint align, slab_flags_t flags, void (*ctor)(void *)) { return kmem_cache_create_usercopy(name, size, align, flags, 0, 0, ctor); }
/** * kmem_cache_create_usercopy - Create a cache with a region suitable * for copying to userspace * @name: A string which is used in /proc/slabinfo to identify this cache. * @size: The size of objects to be created in this cache. * @align: The required alignment for the objects. * @flags: SLAB flags * @useroffset: Usercopy region offset * @usersize: Usercopy region size * @ctor: A constructor for the objects. * * Cannot be called within a interrupt, but can be interrupted. * The @ctor is run when new pages are allocated by the cache. * * The flags are * * %SLAB_POISON - Poison the slab with a known test pattern (a5a5a5a5) * to catch references to uninitialised memory. * * %SLAB_RED_ZONE - Insert `Red` zones around the allocated memory to check * for buffer overruns. * * %SLAB_HWCACHE_ALIGN - Align the objects in this cache to a hardware * cacheline. This can be beneficial if you're counting cycles as closely * as davem. * * Return: a pointer to the cache on success, NULL on failure. */ structkmem_cache * kmem_cache_create_usercopy(constchar *name, unsignedintsize, unsignedintalign, slab_flags_tflags, unsignedintuseroffset, unsignedintusersize, void (*ctor)(void *)) { structkmem_cache *s =NULL; constchar *cache_name; int err;
/* Refuse requests with allocator specific flags */ // 如果flag有SLAB_FLAGS_PERMITTED外的位,那么就直接返回错误 if (flags & ~SLAB_FLAGS_PERMITTED) { err = -EINVAL; goto out_unlock; }
/* * Some allocators will constraint the set of valid flags to a subset * of all flags. We expect them to define CACHE_CREATE_MASK in this * case, and we'll just provide them with a sanitized version of the * passed flags. */ flags &= CACHE_CREATE_MASK; /* Fail closed on bad usersize of useroffset values. */ if (WARN_ON(!usersize && useroffset) || WARN_ON(size < usersize || size - usersize < useroffset)) usersize = useroffset = 0; // 根据size, align, flag等在slab_caches中寻找和当前需求相似的slab专用缓存 if (!usersize) s = __kmem_cache_alias(name, size, align, flags, ctor); if (s) goto out_unlock; // 根据输入名字和gfp flag生成slab cache的名字 cache_name = kstrdup_const(name, GFP_KERNEL); if (!cache_name) { err = -ENOMEM; goto out_unlock; } // ----------------------------------------------------------------------- (1) // 正式创建slab cache s = create_cache(cache_name, size, calculate_alignment(flags, align, size), flags, useroffset, usersize, ctor, NULL, NULL); if (IS_ERR(s)) { err = PTR_ERR(s); kfree_const(cache_name); }
// ################################################################### // kmem_cache_allock() // ################################################################### /** * kmem_cache_alloc - Allocate an object * @cachep: The cache to allocate from. * @flags: See kmalloc(). * * Allocate an object from this cache. The flags are only relevant * if the cache has no available objects. */ void *kmem_cache_alloc(struct kmem_cache *cachep, gfp_t flags) { void *ret = slab_alloc(cachep, flags, _RET_IP_);
STATS_INC_ALLOCMISS(cachep); // ----------------------------------------------------------------- (2) // 如果(1)失败那么就要去kmem_cache_node[]中找对象了(通过各种方式对per-cpu的cache进行补充) objp = cache_alloc_refill(cachep, flags); /* * the 'ac' may be updated by cache_alloc_refill(), * and kmemleak_erase() requires its correct value. */ ac = cpu_cache_get(cachep);
out: /* * To avoid a false negative, if an object that is in one of the * per-CPU caches is leaked, we need to make sure kmemleak doesn't * treat the array pointers as a reference to the object. */ if (objp) kmemleak_erase(&ac->entry[ac->avail]); return objp; }
/* See if we can refill from the shared array */ // static int transfer_objects(struct array_cache *to, struct array_cache *from, unsigned int max) // 如果shared array_cache有至少一个对象的内存空间,那么我们就将shared array_cache中的对象复制到per-cpu缓存 // 复制完后,不会清除shared array_cache中的内容(只更新ac->avail) if (shared && transfer_objects(ac, shared, batchcount)) { shared->touched = 1; goto alloc_done; } // 能走到这里,说明满足(n->free_objects && !shared) while (batchcount > 0) { /* Get slab alloc is to come from. */ // 通过kmem_cache_node[]中的slabs_partial(第一优先级)和slabs_free(第二优先级)寻找slab page page = get_first_slab(n, false); // 如果还是没有slab page,那么我们就需要通过伙伴系统获取新的页了 if (!page) goto must_grow;
direct_grow: // 大部分情况到这里ac->avail != 0, 少量情况到这里ac->avail == 0 // 所以大部分情况这里不运行,少部分情况这里才会被执行进行slab缓存的补充 if (unlikely(!ac->avail)) { /* Check if we can use obj in pfmemalloc slab */ // 未定义CONFIG_NET时该函数返回0 if (sk_memalloc_socks()) { void *obj = cache_alloc_pfmemalloc(cachep, n, flags);
/* * cache_grow_begin() can reenable interrupts, * then ac could change. */ ac = cpu_cache_get(cachep); if (!ac->avail && page) // 到这里,我们有很大的概率获取新的页,可以再次调用alloc_block()函数来补充内存空间了 alloc_block(cachep, ac, page, batchcount); // ---------------------------------------------------------------- (3) cache_grow_end(cachep, page);
// 如果到这里ac->avail还是0,那么我们就没有办法了 if (!ac->avail) returnNULL; } ac->touched = 1;
// ########################################################################## // alloc_block() // ########################################################################## /* * Slab list should be fixed up by fixup_slab_list() for existing slab * or cache_grow_end() for new slab */ static __always_inline intalloc_block(struct kmem_cache *cachep, struct array_cache *ac, struct page *page, int batchcount) { /* * There must be at least one object available for * allocation. */ // page->active是当前的freelist的索引,它不是对象的直接索引,但必须要小于kmem_cache中对象的数量 BUG_ON(page->active >= cachep->num);
while (page->active < cachep->num && batchcount--) { STATS_INC_ALLOCED(cachep); STATS_INC_ACTIVE(cachep); STATS_SET_HIGH(cachep);
/* * Grow (by 1) the number of slabs within a cache. This is called by * kmem_cache_alloc() when there are no active objs left in a cache. */ static struct page *cache_grow_begin(struct kmem_cache *cachep, gfp_t flags, int nodeid) { void *freelist; size_t offset; gfp_t local_flags; int page_node; structkmem_cache_node *n; structpage *page;
/* * Be lazy and only check for valid flags here, keeping it out of the * critical path in kmem_cache_alloc(). */ if (unlikely(flags & GFP_SLAB_BUG_MASK)) { gfp_t invalid_mask = flags & GFP_SLAB_BUG_MASK; flags &= ~GFP_SLAB_BUG_MASK; pr_warn("Unexpected gfp: %#x (%pGg). Fixing up to gfp: %#x (%pGg). Fix your code!\n", invalid_mask, &invalid_mask, flags, &flags); dump_stack(); } WARN_ON_ONCE(cachep->ctor && (flags & __GFP_ZERO)); local_flags = flags & (GFP_CONSTRAINT_MASK|GFP_RECLAIM_MASK);
check_irq_off(); if (gfpflags_allow_blocking(local_flags)) local_irq_enable();
/* * Get mem for the objs. Attempt to allocate a physical page from * 'nodeid'. */ // ------------------------------------------------------------------- (1) page = kmem_getpages(cachep, local_flags, nodeid); if (!page) goto failed;
page_node = page_to_nid(page); // 获取page对应的kmem_cache_node[] n = get_node(cachep, page_node);
/* Get colour for the slab, and cal the next value. */ // colour以及offset主要用于调整对象在页中的起始地址,在下面(2)处进行详细地讨论 n->colour_next++; if (n->colour_next >= cachep->colour) n->colour_next = 0;
offset = n->colour_next; if (offset >= cachep->colour) offset = 0;
offset *= cachep->colour_off;
/* * Call kasan_poison_slab() before calling alloc_slabmgmt(), so * page_address() in the latter returns a non-tagged pointer, * as it should be for slab pages. */ kasan_poison_slab(page);
/* * Get the memory for a slab management obj. * * For a slab cache when the slab descriptor is off-slab, the * slab descriptor can't come from the same cache which is being created, * Because if it is the case, that means we defer the creation of * the kmalloc_{dma,}_cache of size sizeof(slab descriptor) to this point. * And we eventually call down to __kmem_cache_create(), which * in turn looks up in the kmalloc_{dma,}_caches for the disired-size one. * This is a "chicken-and-egg" problem. * * So the off-slab slab descriptor shall come from the kmalloc_{dma,}_caches, * which are all initialized during kmem_cache_init(). */ staticvoid *alloc_slabmgmt(struct kmem_cache *cachep, struct page *page, int colour_off, gfp_t local_flags, int nodeid) { void *freelist; void *addr = page_address(page);
// #################################################################### // kmalloc() // #################################################################### /** * kmalloc - allocate memory * @size: how many bytes of memory are required. * @flags: the type of memory to allocate. * * kmalloc is the normal method of allocating memory * for objects smaller than page size in the kernel. * * The @flags argument may be one of the GFP flags defined at * include/linux/gfp.h and described at * :ref:`Documentation/core-api/mm-api.rst <mm-api-gfp-flags>` * * The recommended usage of the @flags is described at * :ref:`Documentation/core-api/memory-allocation.rst <memory-allocation>` * * Below is a brief outline of the most useful GFP flags * * %GFP_KERNEL * Allocate normal kernel ram. May sleep. * * %GFP_NOWAIT * Allocation will not sleep. * * %GFP_ATOMIC * Allocation will not sleep. May use emergency pools. * * %GFP_HIGHUSER * Allocate memory from high memory on behalf of user. * * Also it is possible to set different flags by OR'ing * in one or more of the following additional @flags: * * %__GFP_HIGH * This allocation has high priority and may use emergency pools. * * %__GFP_NOFAIL * Indicate that this allocation is in no way allowed to fail * (think twice before using). * * %__GFP_NORETRY * If memory is not immediately available, * then give up at once. * * %__GFP_NOWARN * If allocation fails, don't issue any warnings. * * %__GFP_RETRY_MAYFAIL * Try really hard to succeed the allocation but fail * eventually. */ static __always_inline void *kmalloc(size_t size, gfp_t flags) { if (__builtin_constant_p(size)) { #ifndef CONFIG_SLOB unsignedint index; #endif if (size > KMALLOC_MAX_CACHE_SIZE) return kmalloc_large(size, flags); #ifndef CONFIG_SLOB index = kmalloc_index(size);
/** * __do_kmalloc - allocate memory * @size: how many bytes of memory are required. * @flags: the type of memory to allocate (see kmalloc). * @caller: function caller for debug tracking of the caller */ static __always_inline void *__do_kmalloc(size_t size, gfp_t flags, unsignedlong caller) { structkmem_cache *cachep; void *ret;
if (unlikely(size > KMALLOC_MAX_CACHE_SIZE)) returnNULL; // 通过通用的slab缓存获取通用缓存 cachep = kmalloc_slab(size, flags); if (unlikely(ZERO_OR_NULL_PTR(cachep))) return cachep; // 获取对象 ret = slab_alloc(cachep, flags, caller);