【LM15】slab系统的释放

硬核技术 Linux内核 Linux_5_0 ARMv8_64 Linux内存管理

 2024/01/22 

在上一篇笔记【LM14】slab系统的创建和分配，我分析了slab缓存的分配，这篇笔记我将继续分析slab缓存的下一个重要内容 - 释放。相比于slab缓存的分配，其释放过程要更简单。下面我们从kmem_cache_free()这个函数开始，一步一步地分析slab缓存的释放。

kmem_cache_free()

// ./mm/slab.c

/**
 * kmem_cache_free - Deallocate an object
 * @cachep: The cache the allocation was from.
 * @objp: The previously allocated object.
 *
 * Free an object which was previously allocated from this
 * cache.
 */
void kmem_cache_free(struct kmem_cache *cachep, void *objp)
{
    unsigned long flags;
    // 判断要释放的对象objp是否来自cachep这个kmem_cache缓存
    cachep = cache_from_obj(cachep, objp);
    if (!cachep)
        return;

    local_irq_save(flags);
    // 这里我跳过所有和debug相关的内容
    debug_check_no_locks_freed(objp, cachep->object_size);
    if (!(cachep->flags & SLAB_DEBUG_OBJECTS))
        debug_check_no_obj_freed(objp, cachep->object_size);
    __cache_free(cachep, objp, _RET_IP_);
    local_irq_restore(flags);

    trace_kmem_cache_free(_RET_IP_, objp);
}

cache_from_obj()

这个函数用于判断对象是否来自特定的kmem_cache。

// ./mm/slab.h

// #############################################################################
// cache_from_obj()
// #############################################################################
static inline struct kmem_cache *cache_from_obj(struct kmem_cache *s, void *x)
{
    struct kmem_cache *cachep;
    struct page *page;

    /*
     * When kmemcg is not being used, both assignments should return the
     * same value. but we don't want to pay the assignment price in that
     * case. If it is not compiled in, the compiler should be smart enough
     * to not do even the assignment. In that case, slab_equal_or_root
     * will also be a constant.
     */
    if (!memcg_kmem_enabled() &&
        !unlikely(s->flags & SLAB_CONSISTENCY_CHECKS))
        return s;
    // ------------------------------------------------------------ (1)
    page = virt_to_head_page(x);
    cachep = page->slab_cache;
    if (slab_equal_or_root(cachep, s))
        return cachep;

    pr_err("%s: Wrong slab cache. %s but object is from %s\n",
           __func__, s->name, cachep->name);
    WARN_ON_ONCE(1);
    return s;
}

// ./include/linux/mm.h

// #############################################################################
// virt_to_head_page()
// #############################################################################
static inline struct page *virt_to_head_page(const void *x)
{
    struct page *page = virt_to_page(x);

    return compound_head(page);
}

// ./arch/arm64/include/asm/memory.h
#ifndef CONFIG_SPARSEMEM_VMEMMAP
#define virt_to_page(kaddr)       pfn_to_page(__pa(kaddr) >> PAGE_SHIFT)
#else
#define virt_to_page(vaddr)       ((struct page *)((__virt_to_pgoff(vaddr)) | VMEMMAP_START))

#define __pa(x)                   __virt_to_phys((unsigned long)(x))
#define __virt_to_phys(x)         __virt_to_phys_nodebug(x)
#define __virt_to_phys_nodebug(x) ({\
                                    phys_addr_t __x = (phys_addr_t)(x);\
                                    __is_lm_address(__x) ? __lm_to_phys(__x) :\
                                    __kimg_to_phys(__x);\
                                  })
/*
 * The linear kernel range starts in the middle of the virtual adddress
 * space. Testing the top bit for the start of the region is a
 * sufficient check.
 */
#define __is_lm_address(addr)     (!!((addr) & BIT(VA_BITS - 1)))
#define __lm_to_phys(addr)        (((addr) & ~PAGE_OFFSET) + PHYS_OFFSET)
#define __kimg_to_phys(addr)      ((addr) - kimage_voffset)

(1)是这个函数最重要的一部分，它根据obj的地址找到对应的page，其实现过程是通过一系列的宏定义完成的。这里假设CONFIG_SPARSEMEM_VMEMMAP未定义，通过一系列的转换就可以找到obj对应的page。关于虚拟内存的布局和VA->PA的转换，请参考【LM05】paging_init()以及内存的布局 - 内存的布局；关于pfn_to_page的相关内容，请参考【LM06】bootmem_init()和Linux的内存模型。获取obj对应的page后，接下来的工作就很简单了，直接看源码即可。

这里我其实有一个疑问，为什么这个函数不阻止错误的obj的释放？从源码上来看，如果obj不是来自该kmem_cache，那么除了输出警告信息外，并没有阻止obj的释放。只是该obj被释放到了另外一个kmem_cache，这样就埋下了一个隐患，之后内核可能会因此而崩溃。我个人理解是，我们默认内核的代码是安全的，不会出现错误释放obj的情况。如果出现，那么就需要修复代码，因此在这里内核不会阻止obj的释放。

__cache_free()

这个函数是一个封装函数，它调用___cache_free()函数完成对象的释放。

// ./mm/slab.c

// #############################################################################
// __cache_free()
// #############################################################################
/*
 * Release an obj back to its cache. If the obj has a constructed state, it must
 * be in this state _before_ it is released.  Called with disabled ints.
 */
static __always_inline void __cache_free(struct kmem_cache *cachep, void *objp,
                                        unsigned long caller)
{
    /* Put the object into the quarantine, don't touch it for now. */
    if (kasan_slab_free(cachep, objp, _RET_IP_))
        return;

    ___cache_free(cachep, objp, caller);
}

// #############################################################################
// ___cache_free()
// #############################################################################
void ___cache_free(struct kmem_cache *cachep, void *objp,
        unsigned long caller)
{
    struct array_cache *ac = cpu_cache_get(cachep);

    check_irq_off();
    kmemleak_free_recursive(objp, cachep->flags);
    // 这里我们不考虑debug相关的内容 
    objp = cache_free_debugcheck(cachep, objp, caller);

    /*
     * Skip calling cache_free_alien() when the platform is not numa.
     * This will avoid cache misses that happen while accessing slabp (which
     * is per page memory  reference) to get nodeid. Instead use a global
     * variable to skip the call, which is mostly likely to be present in
     * the cache.
     */
    if (nr_online_nodes > 1 && cache_free_alien(cachep, objp))
        return;
    // case1: 如果ac中的obj还没有达到limit，那么就很简单，直接释放obj到ac即可
    if (ac->avail < ac->limit) {
        STATS_INC_FREEHIT(cachep);
    // case2: 如果ac中的obj满了，那么就需要先进行flush操作了 
    } else {
        STATS_INC_FREEMISS(cachep);
        cache_flusharray(cachep, ac);
    }
    // 这里不讨论net相关内容，该函数返回0    
    if (sk_memalloc_socks()) {
        struct page *page = virt_to_head_page(objp);

        if (unlikely(PageSlabPfmemalloc(page))) {
            cache_free_pfmemalloc(cachep, page, objp);
            return;
        }
    }

    ac->entry[ac->avail++] = objp;
}

我们先来看case1这种情况，这种情况很简单，per-cpu缓存中还有空位，那么就直接将这个对象放在per-cpu缓存就行了。这里有个细节需要注意，当我们把这个obj放回per-cpu缓存后，并没有更新其所在的slab page，只是更新了per-cpu缓存的avail变量。至于slab page的更新，只有等per-cpu缓存中的对象释放回slab page后才会更新。在case2中我们会分析这种情况。

cache_flusharray()

static void cache_flusharray(struct kmem_cache *cachep, struct array_cache *ac)
{
    int batchcount;
    struct kmem_cache_node *n;
    int node = numa_mem_id();
    LIST_HEAD(list);

    batchcount = ac->batchcount;

    check_irq_off();
    n = get_node(cachep, node);
    spin_lock(&n->list_lock);
    // --------------------------------------------------------------- (1)
    if (n->shared) {
        struct array_cache *shared_array = n->shared;
        int max = shared_array->limit - shared_array->avail;
        if (max) {
            if (batchcount > max)
                batchcount = max;
            memcpy(&(shared_array->entry[shared_array->avail]),
                   ac->entry, sizeof(void *) * batchcount);
            shared_array->avail += batchcount;
            goto free_done;
        }
    }

    // --------------------------------------------------------------- (2)
    free_block(cachep, ac->entry, batchcount, node, &list);
free_done:
#if STATS
    {
        int i = 0;
        struct page *page;

        list_for_each_entry(page, &n->slabs_free, lru) {
            BUG_ON(page->active);

            i++;
        }
        STATS_SET_FREEABLE(cachep, i);
    }
#endif
    spin_unlock(&n->list_lock);
    // 在(2)中，我们可能会释放部分free slab pages，这些pages都会在这里被销毁
    // 销毁主要工作包括 - 修改page和slab相关的信息，将page释放回伙伴系统，
    // 将off_slab中kmem_cache的freelist对象释放
    slabs_destroy(cachep, &list);
    ac->avail -= batchcount;
    memmove(ac->entry, &(ac->entry[batchcount]), sizeof(void *)*ac->avail);
}

我们先看(1)，根据上一篇笔记可以知道，不是每个kmem_cache都有shared缓存。对于有shared的缓存，只要它还有空闲空间，那么我们就可以将per-cpu缓存中的对象复制到shared缓存中，这样，per-cpu缓存中就有足够的空间放释放的对象。
但是，对于没有shared缓存的kmem_cache，我们就需要执行(2)处的函数了。

free_block()

由于没有shared缓存，在这种情况下只能将对象释放回slab page，

/*
 * Caller needs to acquire correct kmem_cache_node's list_lock
 * @list: List of detached free slabs should be freed by caller
 */
static void free_block(struct kmem_cache *cachep, void **objpp,
                      int nr_objects, int node, struct list_head *list)
{
    int i;
    struct kmem_cache_node *n = get_node(cachep, node);
    struct page *page;

    n->free_objects += nr_objects;

    for (i = 0; i < nr_objects; i++) {
        void *objp;
        struct page *page;
        // per-cpu缓存中的对象
        objp = objpp[i];
        // 获取per-cpu缓存对象所在的页
        page = virt_to_head_page(objp);
        list_del(&page->lru);
        check_spinlock_acquired_node(cachep, node);
        // 因为这个obj是从该slab page复制到per-cpu缓存的，因此它一定可以释放回该slab page 
        // 将obj的objnr写入更新后的freelist中
        slab_put_obj(cachep, page, objp);
        STATS_DEC_ACTIVE(cachep);

        // 根据情况将slab放回slab_list
        /* fixup slab chains */
        if (page->active == 0) {
            list_add(&page->lru, &n->slabs_free);
            n->free_slabs++;
        } else {
            /* Unconditionally move a slab to the end of the
             * partial list on free - maximum time for the
             * other objects to be freed, too.
             */
            list_add_tail(&page->lru, &n->slabs_partial);
        }
    }

    // 由于是一次释放ac->batchcount个对象，如果kmem_cache_node中的空闲对象太多，
    // 我们就需要尝试释放一些free的slab page
    while (n->free_objects > n->free_limit && !list_empty(&n->slabs_free)) {
        // 这里我们至少有1个free的slab page，因此不会出现最后结果为负的情况 
        n->free_objects -= cachep->num;

        page = list_last_entry(&n->slabs_free, struct page, lru);
        // 把slab page从slab_list中断开并添加进list 
        list_move(&page->lru, list);
        n->free_slabs--;
        n->total_slabs--;
    }
}

至此，slab缓存的释过程已经分析完毕，整个slab系统的学习也将告一段落。接下来准备学习vmalloc相关的知识，加油吧！

参考资料

Next Post

【LM16】vmalloc()
Previous Post

2023投资总结

CATALOG

1. kmem_cache_free()
1. 1.1. cache_from_obj()
2. 1.2. __cache_free()
  1. 1.2.1. cache_flusharray()
  2. 1.2.2. free_block()
2. 参考资料