13. 【Slab】5 - slab、高速缓存以及对象的释放和销毁

Linux内核内存管理 Linux 2.6.11

 2023/03/16 

到这，前面4篇slab笔记已经讨论了高速缓存、slab以及对象的分配。这篇笔记主要记录和释放、销毁相关的函数。在slab上花了太多时间和精力了，争取这是倒数第二篇，最后一篇准备综合的分析一些情况以及做总结。

不多说，直接进入正题吧。这篇笔记我主要讨论下面几个函数：

kmalloc()；
kfree()；
kmem_cache_free()；
slab_destroy()；
kmem_cache_shrink()；
kmem_cache_destroy()；

`kmalloc()`

kmalloc()函数主要用于普通高速缓存中对象的分配，通过源码可以看出，它可能调用kmem_cache_alloc()也可能调用__kmalloc()来分配对象。我没有看出这两种方法在这里有什么实质的区别，它们都是从malloc_sizes[]里面找合适的普通高速缓存，然后调用__cache_alloc()来分配对象空间。__cache_alloc这个函数在上一篇笔记 12. 【Slab】4 - Slab的基本操作 - kmem_cache_alloc()中已经详细介绍了，这里就不具体分析了。kmalloc()比较简单，下面直接看它的源码就行了。

static inline void *kmalloc(size_t size, int flags)
{
    if (__builtin_constant_p(size)) {
        int i = 0;
#define CACHE(x) \
        if (size <= x) \
            goto found; \
        else \
            i++;
#include "kmalloc_sizes.h"
#undef CACHE
        {
            extern void __you_cannot_kmalloc_that_much(void);
            __you_cannot_kmalloc_that_much();
        }
found:
        return kmem_cache_alloc((flags & GFP_DMA) ?
            malloc_sizes[i].cs_dmacachep :
            malloc_sizes[i].cs_cachep, flags);
    }
    return __kmalloc(size, flags);
}

/**
 * kmalloc - allocate memory
 * @size: how many bytes of memory are required.
 * @flags: the type of memory to allocate.
 *
 * kmalloc is the normal method of allocating memory
 * in the kernel.
 *
 * The @flags argument may be one of:
 *
 * %GFP_USER - Allocate memory on behalf of user.  May sleep.
 *
 * %GFP_KERNEL - Allocate normal kernel ram.  May sleep.
 *
 * %GFP_ATOMIC - Allocation will not sleep.  Use inside interrupt handlers.
 *
 * Additionally, the %GFP_DMA flag may be set to indicate the memory
 * must be suitable for DMA.  This can mean different things on different
 * platforms.  For example, on i386, it means that the memory must come
 * from the first 16MB.
 */
void * __kmalloc (size_t size, int flags)
{
    struct cache_sizes *csizep = malloc_sizes;

    for (; csizep->cs_size; csizep++) {
        if (size > csizep->cs_size)
            continue;
#if DEBUG
        /* This happens if someone tries to call
         * kmem_cache_create(), or kmalloc(), before
         * the generic caches are initialized.
         */
        BUG_ON(csizep->cs_cachep == NULL);
#endif
        return __cache_alloc(flags & GFP_DMA ?
             csizep->cs_dmacachep : csizep->cs_cachep, flags);
    }
    return NULL;
}

kfree()

kfree()这个函数用于释放kmalloc()分配的对象空间，使用时要注意它只能释放kmalloc()分配的对象空间。如果给它一个其它的指针，那么很可能会出错。

/**
 * kfree - free previously allocated memory
 * @objp: pointer returned by kmalloc.
 *
 * Don't free memory not originally allocated by kmalloc()
 * or you will run into trouble.
 */
void kfree (const void *objp)
{
    kmem_cache_t *c;
    unsigned long flags;

    if (!objp)
        return;
    local_irq_save(flags);
    kfree_debugcheck(objp);
    // 通过objp我们获取其高速缓存描述符，如果objp是一个随机的指针，那么就会获取一个错误的高速缓存描述符
    // 对这个错误的高速缓存描述符进行__cache_free()操作就会出错
    c = GET_PAGE_CACHE(virt_to_page(objp));
    __cache_free(c, (void*)objp);
    local_irq_restore(flags);
}

接下来我们看__cache_free()函数。
这个函数和__cache_alloc()类似，先去查看CPU的本地高速缓存，如果满了/没有，那么再去其它地方寻找合适的对象。

/*
 * __cache_free
 * Release an obj back to its cache. If the obj has a constructed
 * state, it must be in this state _before_ it is released.
 *
 * Called with disabled ints.
 */
static inline void __cache_free (kmem_cache_t *cachep, void* objp)
{
    struct array_cache *ac = ac_data(cachep);

    check_irq_off();
    objp = cache_free_debugcheck(cachep, objp, __builtin_return_address(0));

    if (likely(ac->avail < ac->limit)) {
        STATS_INC_FREEHIT(cachep);
        // 注意，这里并没有销毁或者将对象清零，仅仅是重新将对象的指针放回array_cache
        ac_entry(ac)[ac->avail++] = objp;
        return;
    } else {
        STATS_INC_FREEMISS(cachep);
        cache_flusharray(cachep, ac);
        ac_entry(ac)[ac->avail++] = objp;
    }
}

下面我们看看如果ac->avail < ac->limit，也就是array_cache满了，怎么办。在这种情况下，我们就需要调用cache_flusharray()函数了。

cache_flusharray()

这个函数和cache_alloc_refill()看上去很像，都是去找高速缓存shared的array_cache。
这部分简单来说就是将部分本地缓存的对象地址放入shared的array_cache，这样本地缓存的array_cache就有空闲空间可以使用了。如果shared的array_cache空间也不够，那么只能释放部分本地缓存的array_cache了。

static void cache_flusharray (kmem_cache_t* cachep, struct array_cache *ac)
{
    int batchcount;

    batchcount = ac->batchcount;
// batchcount 要 <= ac->avail，否则会出错
#if DEBUG
    BUG_ON(!batchcount || batchcount > ac->avail);
#endif
    check_irq_off();
    spin_lock(&cachep->spinlock);
    // 这部分就是将本地的array_cache复制batchcount个对象到shared array_cache
    if (cachep->lists.shared) {
        struct array_cache *shared_array = cachep->lists.shared;
        int max = shared_array->limit-shared_array->avail;
        if (max) {
            // 还剩多少我们就移动多少
            if (batchcount > max)
                batchcount = max;
            memcpy(&ac_entry(shared_array)[shared_array->avail],
                    &ac_entry(ac)[0],
                    sizeof(void*) * batchcount);
            shared_array->avail += batchcount;
            goto free_done;
        }
    }

    // 如果shared array_cache也没有额外的空间了，那么只有释放部分本地的array_cache
    free_block(cachep, &ac_entry(ac)[0], batchcount);

free_done:
    spin_unlock(&cachep->spinlock);
    // 正常情况下这里不会是负数，前面开启debug后会检测 
    ac->avail -= batchcount;
    // 把后面的移动过来覆盖前面已经复制或者释放的对象地址
    memmove(&ac_entry(ac)[0], &ac_entry(ac)[batchcount],
            sizeof(void*)*ac->avail);
}

通过上面分析，我们可以看出，free相关的函数只是将不用的对象返回缓存以便再次使用，并没有真正销毁清除任何东西。

kmem_cache_free()

这个函数就是直接调用__cache_free()，没有什么好讲的，直接跳过了。

/**
 * kmem_cache_free - Deallocate an object
 * @cachep: The cache the allocation was from.
 * @objp: The previously allocated object.
 *
 * Free an object which was previously allocated from this
 * cache.
 */
void kmem_cache_free (kmem_cache_t *cachep, void *objp)
{
    unsigned long flags;

    local_irq_save(flags);
    __cache_free(cachep, objp);
    local_irq_restore(flags);
}

slab_destroy()

上面几个函数都是在对象层面的，这里我们开始看看slab是如何销毁的。这个函数会调用对象的析构函数来销毁所有的对象（如果没有就什么都不做），然后再将该slab所在的页返还给伙伴系统。

这个函数会在哪里被调用呢？__cache_shrink(), free_block和cache_reap()，其中前两个函数我们一个已经分析过，一个接下来就会进行分析。

/* Destroy all the objs in a slab, and release the mem back to the system.
 * Before calling the slab must have been unlinked from the cache.
 * The cache-lock is not held/needed.
 */
static void slab_destroy (kmem_cache_t *cachep, struct slab *slabp)
{
    // slabp->colouroff包括了offset + slab描述符 + 对象描述符
    void *addr = slabp->s_mem - slabp->colouroff;

#if DEBUG
#else
    if (cachep->dtor) {
        int i;
        for (i = 0; i < cachep->num; i++) {
            void* objp = slabp->s_mem+cachep->objsize*i;
            (cachep->dtor)(objp, cachep, 0);
        }
    }
#endif

    if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU)) {
        struct slab_rcu *slab_rcu;

        slab_rcu = (struct slab_rcu *) slabp;
        slab_rcu->cachep = cachep;
        slab_rcu->addr = addr;
        call_rcu(&slab_rcu->head, kmem_rcu_free);
    } else {
        // 这个函数根据slab所在页的起始地址，将2 ^ order个页还给伙伴系统
        kmem_freepages(cachep, addr);
        // 我们还需要释放其在其它地方的slab以及对象描述符
        if (OFF_SLAB(cachep))
            kmem_cache_free(cachep->slabp_cache, slabp);
    }
}

kmem_cache_shrink()

这个函数只是一个封装函数，我们直接来看它调用的函数__cache_shrink()。

/**
 * kmem_cache_shrink - Shrink a cache.
 * @cachep: The cache to shrink.
 *
 * Releases as many slabs as possible for a cache.
 * To help debugging, a zero exit status indicates all slabs were released.
 */
int kmem_cache_shrink(kmem_cache_t *cachep)
{
    if (!cachep || in_interrupt())
        BUG();

    return __cache_shrink(cachep);
}

__cache_shrink()

这个函数除了在kmem_cache_shrink()中使用外，还在下一个介绍的函数kmem_cache_destroy()中使用。简单来说，这个函数就是尽量地释放高速缓存，它会对本地高速缓存、shared高速缓存以及处于空闲链表中的slab进行处理。它不会对处于partial或者full链表的slab进行处理。

static int __cache_shrink(kmem_cache_t *cachep)
{
    struct slab *slabp;
    int ret;

    // 这个函数用于处理本地高速缓存和shared的高速缓存，下面给出了具体代码
    drain_cpu_caches(cachep);、

    check_irq_on();
    spin_lock_irq(&cachep->spinlock);

    for(;;) {
        struct list_head *p;

        p = cachep->lists.slabs_free.prev;
        // 如果已经没有free的slab了，那么就直接跳出
        if (p == &cachep->lists.slabs_free)
            break;

        slabp = list_entry(cachep->lists.slabs_free.prev, struct slab, list);
#if DEBUG
        if (slabp->inuse)
            BUG();
#endif
        list_del(&slabp->list);
        
        cachep->lists.free_objects -= cachep->num;
        spin_unlock_irq(&cachep->spinlock);
        // 先将该slab从free list中断开，再销毁这个slab
        slab_destroy(cachep, slabp);
        spin_lock_irq(&cachep->spinlock);
    }
    // 这个函数是不管另外两条链表的，根据情况返回不同的值
    ret = !list_empty(&cachep->lists.slabs_full) ||
        !list_empty(&cachep->lists.slabs_partial);
    spin_unlock_irq(&cachep->spinlock);
    return ret;
}

static void drain_cpu_caches(kmem_cache_t *cachep)
{
    // do_drain释放所有的本地高速缓存
    smp_call_function_all_cpus(do_drain, cachep);
    check_irq_on();
    spin_lock_irq(&cachep->spinlock);
    // drain_array_locked处理shared本地高速缓存，根据情况它会进行不同的操作，具体请参考下面代码
    if (cachep->lists.shared)
        drain_array_locked(cachep, cachep->lists.shared, 1);
    spin_unlock_irq(&cachep->spinlock);
}

static void do_drain(void *arg)
{
    kmem_cache_t *cachep = (kmem_cache_t*)arg;
    struct array_cache *ac;

    check_irq_off();
    ac = ac_data(cachep);
    spin_lock(&cachep->spinlock);
    free_block(cachep, &ac_entry(ac)[0], ac->avail);
    spin_unlock(&cachep->spinlock);
    ac->avail = 0;
}

static void drain_array_locked(kmem_cache_t *cachep,
                struct array_cache *ac, int force)
{
    int tofree;

    check_spinlock_acquired(cachep);
    // 如果不是必须，尽量还是不动shared的高速缓存，要动也只动一部分，这是多个CPU共享的
    // 高速缓存区域
    if (ac->touched && !force) {
        ac->touched = 0;
    } else if (ac->avail) {
        tofree = force ? ac->avail : (ac->limit+4)/5;
        if (tofree > ac->avail) {
            tofree = (ac->avail+1)/2;
        }
        free_block(cachep, ac_entry(ac), tofree);
        ac->avail -= tofree;
        memmove(&ac_entry(ac)[0], &ac_entry(ac)[tofree],
                    sizeof(void*)*ac->avail);
    }
}

kmem_cache_destroy()

这个函数只能销毁全是空闲slab的高速缓存，如果该高速缓存中还有非空的slab，那么它不会完成销毁的工作。

/**
 * kmem_cache_destroy - delete a cache
 * @cachep: the cache to destroy
 *
 * Remove a kmem_cache_t object from the slab cache.
 * Returns 0 on success.
 *
 * It is expected this function will be called by a module when it is
 * unloaded.  This will remove the cache completely, and avoid a duplicate
 * cache being allocated each time a module is loaded and unloaded, if the
 * module doesn't have persistent in-kernel storage across loads and unloads.
 *
 * The cache must be empty before calling this function.
 *
 * The caller must guarantee that noone will allocate memory from the cache
 * during the kmem_cache_destroy().
 */
int kmem_cache_destroy (kmem_cache_t * cachep)
{
    int i;

    if (!cachep || in_interrupt())
        BUG();

    /* Don't let CPUs to come and go */
    lock_cpu_hotplug();

    /* Find the cache in the chain of caches. */
    down(&cache_chain_sem);
    /*
     * the chain is never empty, cache_cache is never destroyed
     */
    list_del(&cachep->next);
    up(&cache_chain_sem);

    // 这里会销毁所有free的slab，但如果还有full或者partial的slab，那么就会报错
    // 在free_block这个函数中，我们可以通过不断地释放对象来改变slab所在的链表
    if (__cache_shrink(cachep)) {
        slab_error(cachep, "Can't free all objects");
        down(&cache_chain_sem);
        list_add(&cachep->next,&cache_chain);
        up(&cache_chain_sem);
        unlock_cpu_hotplug();
        return 1;
    }

    if (unlikely(cachep->flags & SLAB_DESTROY_BY_RCU))
        synchronize_kernel();

    /* no cpu_online check required here since we clear the percpu
     * array on cpu offline and set this to NULL.
     */
    // 下面这几步都是销毁对应的描述符，包括cache_array, shared_cache_array和cache本身
    for (i = 0; i < NR_CPUS; i++)
        kfree(cachep->array[i]);

    /* NUMA: free the list3 structures */
    kfree(cachep->lists.shared);
    cachep->lists.shared = NULL;
    kmem_cache_free(&cache_cache, cachep);

    unlock_cpu_hotplug();

    return 0;
}

【补充】slab对象描述符的更新

slab的对象会被不断地分配和释放，我这里简单总结下slab对象描述符如何工作的。

// 实际上，我们甚至都不需要更改分配后的对象描述符，比如第1和2步，只有释放后我们才再次修改它们。
// 同时，我们也不需要更改对象本身，free和数组的链表化保证了我们总是能拿到空的对象
// 注意，这个过程是一个LIFO的过程，最后释放的对象会被优先分配使用

// 分配时相关代码
// next = slab_bufctl(slabp)[slabp->free];
// slabp->free = next;

// 释放时相关代码
// slab_bufctl(slabp)[objnr] = slabp->free;
// slabp->free = objnr;

// F = Free, U = Used

// 1. 初始化时
[1][2][3][4][5][X] {F}{F}{F}{F}{F}
 |
 free

// 2. 分配一个对象
[1][2][3][4][5][X] {U}{F}{F}{F}{F}
    |
    free

// 3. 分配两个对象
[1][2][3][4][5][X] {U}{U}{F}{F}{F}
       |
       free

// 4. 释放对象0 
[2][2][3][4][5][X] {F}{U}{F}{F}{F}
 |
 free

// 5. 释放对象1 
[2][0][3][4][5][X] {F}{F}{F}{F}{F}
    |
    free

总结

至此，slab相关的关键函数基本都介绍完了。这里面的内容很多，需要好好思考总结。下一篇笔记我准备根据不同的情景，把这些函数串起来，也算是对slab的一种加深理解和总结吧。

Next Post

14. 【Slab】6 - slab的一些情景分析
Previous Post

12. 【Slab】4 - slab的基本操作 - kmem_cache_alloc()

CATALOG

1. kmalloc()
2. kfree()
1. 2.1. cache_flusharray()
3. kmem_cache_free()
4. slab_destroy()
5. kmem_cache_shrink()
1. 5.1. __cache_shrink()
6. kmem_cache_destroy()
7. 【补充】slab对象描述符的更新
8. 总结