10. 【slab】2 - slab的基本操作 - kmem_cache_init()

Linux内核内存管理 Linux 2.6.11

 2023/02/20 

在上一篇笔记09. 【slab】1 - slab简介以及相关结构体中，我已经介绍了slab的基本原理以及其使用的数据结构。在这篇笔记中，我将开始第一个函数 - kmem_cache_init()的分析。

这个函数在start_kernel()中被调用，用于初始化slab。在这之前，伙伴系统已经初始化完成。kmem_cache_init()这个函数主要分4步：

初始化cache_cache的部分成员变量；
建立通用高速缓存；
在2的基础上，用kmalloc()分配空间来取代cache_cache和malloc_sizes[0]中静态分配的CPU本地缓存描述符；
resize the head arrays to their final sizes - 重新初始化CPU本地缓存和shared缓存；

下面我通过注释来说明该函数。

//  start_kernel()
//    --> mem_init()        -- 初始化伙伴系统
//    --> kmem_cache_init() -- 初始化slab

// 初始化时，cache_cache是静态分配的
static kmem_cache_t cache_cache = {
    .lists        = LIST3_INIT(cache_cache.lists),
    .batchcount    = 1,
    .limit        = BOOT_CPUCACHE_ENTRIES,
    .objsize    = sizeof(kmem_cache_t),
    .flags        = SLAB_NO_REAP,
    .spinlock    = SPIN_LOCK_UNLOCKED,
    .name        = "kmem_cache",
#if DEBUG
    .reallen    = sizeof(kmem_cache_t),
#endif
};

/* Initialisation.
 * Called after the gfp() functions have been enabled, and before smp_init().
 */
void __init kmem_cache_init(void)
{
    size_t left_over;

    /* 相关结构体：
       struct cache_sizes {
           size_t       cs_size;
           kmem_cache_t *cs_cachep;
           kmem_cache_t *cs_dmacachep;
       };
    */
    struct cache_sizes *sizes;
    
    /*  相关结构体：
        struct cache_names {
            char *name;
            char *name_dma;
        };    
    */
    struct cache_names *names;

    /*
     * Fragmentation resistance on low memory - only use bigger
     * page orders on machines with more than 32MB of memory.
     */
    if (num_physpages > (32 << 20) >> PAGE_SHIFT)
        slab_break_gfp_order = BREAK_GFP_ORDER_HI;

    /* Bootstrap is tricky, because several objects are allocated
     * from caches that do not exist yet:
     * 1) initialize the cache_cache cache: it contains the kmem_cache_t
     *    structures of all caches, except cache_cache itself: cache_cache
     *    is statically allocated.
     *    Initially an __init data area is used for the head array, it's
     *    replaced with a kmalloc allocated array at the end of the bootstrap.
     * 2) Create the first kmalloc cache.
     *    The kmem_cache_t for the new cache is allocated normally. An __init
     *    data area is used for the head array.
     * 3) Create the remaining kmalloc caches, with minimally sized head arrays.
     * 4) Replace the __init data head arrays for cache_cache and the first
     *    kmalloc cache with kmalloc allocated arrays.
     * 5) Resize the head arrays of the kmalloc caches to their final sizes.
     */

    /* 1) create the cache_cache */
    init_MUTEX(&cache_chain_sem);
    INIT_LIST_HEAD(&cache_chain);
    list_add(&cache_cache.next, &cache_chain);
    
    // slab按照cache line的大小进行偏移（该偏移为最小偏移单位，32B, 64B等）
    cache_cache.colour_off = cache_line_size();
    
    // CPU本地缓存使用静态分配的方式进行初始化
    // static struct arraycache_init initarray_cache __initdata = {{0, BOOT_CPUCACHE_ENTRIES, 1, 0}};
    cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
    cache_cache.objsize = ALIGN(cache_cache.objsize, cache_line_size());

    // 这个函数就不展开细节了，简单来说，它是根据order(这里是0)计算出单个slab的大小，然后根据一系列计算，确定
    // 对象数量以及最后还剩多少空间
    cache_estimate(0,                   // unsigned long gfporder
                   cache_cache.objsize, // size_t size
                   cache_line_size(),   // size_t align 
                   0,                   // int flags
                   &left_over,          // size_t *left_over
                   &cache_cache.num);   // unsigned int *num
    if (!cache_cache.num)
        BUG();

    // 计算需要多少种颜色进行染色
    cache_cache.colour = left_over / cache_cache.colour_off; // ULK, p335
    cache_cache.colour_next = 0;
    
    // slab_size不包括对象本身的内存空间（只包括描述符的内存空间，一个cache中的slab大小是一样的） 
    cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + sizeof(struct slab), cache_line_size());

    /* 2+3) create the kmalloc caches */
    /*  相关结构体
        struct cache_sizes {
            size_t         cs_size;
            kmem_cache_t   *cs_cachep;
            kmem_cache_t   *cs_dmacachep;
        };

        第一次看到结构体中使用#include，注意这里最后用了#undef，因为在其它地方也有CACHE()的宏定义
        struct cache_sizes malloc_sizes[] = {
            #define CACHE(x) { .cs_size = (x) },
            #include <linux/kmalloc_sizes.h> --> 这部分是CACHE(32), CACHE(64), CACHE(96), etc
            { 0, }
            #undef CACHE
        };
    */
    sizes = malloc_sizes;
    names = cache_names;

    // 这里用kmem_cache_create()来创建普通高速缓存，这里存在一个鸡生蛋的问题，简单来说就是slab还没初始化完成，怎么能
    // 分配高速缓存等的描述符？这里用了静态分配的方法巧妙的解决了这个问题，具体实现在分析kmem_cache_create()时说明
    // 这里遍历sizes来生成所有的普通高速缓存 
    while (sizes->cs_size) {
        /* For performance, all the general caches are L1 aligned.
         * This should be particularly beneficial on SMP boxes, as it
         * eliminates "false sharing".
         * Note for systems short on memory removing the alignment will
         * allow tighter packing of the smaller caches. */
        sizes->cs_cachep = kmem_cache_create(
                           names->name,                       // onst char *name
                           sizes->cs_size,                    // size_t size, the size of objects to be created in this cache
                           ARCH_KMALLOC_MINALIGN,             // size_t align:
                           (ARCH_KMALLOC_FLAGS | SLAB_PANIC), // unsigned long flags
                           NULL,                              // void (*ctor)(void*, kmem_cache_t *, unsigned long)
                           NULL);                             // void (*dtor)(void*, kmem_cache_t *, unsigned long)

        /* Inc off-slab bufctl limit until the ceiling is hit. */
        /* Max number of objs-per-slab for caches which use off-slab slabs.
         * Needed to avoid a possible looping condition in cache_grow().
         */
        if (!(OFF_SLAB(sizes->cs_cachep))) {
            offslab_limit = sizes->cs_size-sizeof(struct slab);
            offslab_limit /= sizeof(kmem_bufctl_t);
        }

        // 创建DMA的普通高速缓存
        sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
                                                sizes->cs_size, 
                                                ARCH_KMALLOC_MINALIGN,
                                                (ARCH_KMALLOC_FLAGS | SLAB_CACHE_DMA | SLAB_PANIC),
                                                NULL, 
                                                NULL);
        sizes++;
        names++;
    }

    // 有了普通高速缓存，可以使用kmalloc()来获取小的内存空间了。用动态获取的内存空间取代之前静态分配的CPU本地缓存
    /* 4) Replace the bootstrap head arrays */
    {
        void *ptr;
        
        ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
        local_irq_disable();
        BUG_ON(ac_data(&cache_cache) != &initarray_cache.cache);
        memcpy(ptr, ac_data(&cache_cache), sizeof(struct arraycache_init));
        cache_cache.array[smp_processor_id()] = ptr;
        local_irq_enable();
    
        ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
        local_irq_disable();
        BUG_ON(ac_data(malloc_sizes[0].cs_cachep) != &initarray_generic.cache);
        memcpy(ptr, ac_data(malloc_sizes[0].cs_cachep), sizeof(struct arraycache_init));
        malloc_sizes[0].cs_cachep->array[smp_processor_id()] = ptr;
        local_irq_enable();
    }

    /* 5) resize the head arrays to their final sizes */
    // 重新初始化CPU本地缓存和shared缓存，cache_chain上包括了cache_cache, 普通和专业高速缓存 
    {
        kmem_cache_t *cachep;
        down(&cache_chain_sem);
        list_for_each_entry(cachep, &cache_chain, next)
            enable_cpucache(cachep);
        up(&cache_chain_sem);
    }

    /* Done! */
    // 以后在kmem_cache_create()中就需要调用enable_cpucache()了
    g_cpucache_up = FULL;

    /* Register a cpu startup notifier callback
     * that initializes ac_data for all new cpus
     */
    register_cpu_notifier(&cpucache_notifier);
    

    /* The reap timers are started later, with a module init call:
     * That part of the kernel is not yet operational.
     */
}

至此，slab已经完成初始化了，这个函数有很多可以学习的地方，因为它基本涵盖了所有slab需要使用的函数。这里我还有些不太明白的地方，比如第3步，我在注释中已经注明，以后要是搞懂了我再回来修改。

Next Post

11. 【Slab】3 - slab的基本操作 - kmem_cache_create()
Previous Post

09. 【slab】1 - slab简介以及相关结构体

CATALOG