从零到负一

10. 【slab】2 - slab的基本操作 - kmem_cache_init()

2023/02/20

在上一篇笔记09. 【slab】1 - slab简介以及相关结构体中,我已经介绍了slab的基本原理以及其使用的数据结构。在这篇笔记中,我将开始第一个函数 - kmem_cache_init()的分析。

这个函数在start_kernel()中被调用,用于初始化slab。在这之前,伙伴系统已经初始化完成。kmem_cache_init()这个函数主要分4步:

  1. 初始化cache_cache的部分成员变量;
  2. 建立通用高速缓存;
  3. 在2的基础上,用kmalloc()分配空间来取代cache_cachemalloc_sizes[0]中静态分配的CPU本地缓存描述符;
  4. resize the head arrays to their final sizes - 重新初始化CPU本地缓存和shared缓存;

下面我通过注释来说明该函数。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
//  start_kernel()
// --> mem_init() -- 初始化伙伴系统
// --> kmem_cache_init() -- 初始化slab

// 初始化时,cache_cache是静态分配的
static kmem_cache_t cache_cache = {
.lists = LIST3_INIT(cache_cache.lists),
.batchcount = 1,
.limit = BOOT_CPUCACHE_ENTRIES,
.objsize = sizeof(kmem_cache_t),
.flags = SLAB_NO_REAP,
.spinlock = SPIN_LOCK_UNLOCKED,
.name = "kmem_cache",
#if DEBUG
.reallen = sizeof(kmem_cache_t),
#endif
};

/* Initialisation.
* Called after the gfp() functions have been enabled, and before smp_init().
*/
void __init kmem_cache_init(void)
{
size_t left_over;

/* 相关结构体:
struct cache_sizes {
size_t cs_size;
kmem_cache_t *cs_cachep;
kmem_cache_t *cs_dmacachep;
};
*/
struct cache_sizes *sizes;

/* 相关结构体:
struct cache_names {
char *name;
char *name_dma;
};
*/
struct cache_names *names;

/*
* Fragmentation resistance on low memory - only use bigger
* page orders on machines with more than 32MB of memory.
*/
if (num_physpages > (32 << 20) >> PAGE_SHIFT)
slab_break_gfp_order = BREAK_GFP_ORDER_HI;

/* Bootstrap is tricky, because several objects are allocated
* from caches that do not exist yet:
* 1) initialize the cache_cache cache: it contains the kmem_cache_t
* structures of all caches, except cache_cache itself: cache_cache
* is statically allocated.
* Initially an __init data area is used for the head array, it's
* replaced with a kmalloc allocated array at the end of the bootstrap.
* 2) Create the first kmalloc cache.
* The kmem_cache_t for the new cache is allocated normally. An __init
* data area is used for the head array.
* 3) Create the remaining kmalloc caches, with minimally sized head arrays.
* 4) Replace the __init data head arrays for cache_cache and the first
* kmalloc cache with kmalloc allocated arrays.
* 5) Resize the head arrays of the kmalloc caches to their final sizes.
*/

/* 1) create the cache_cache */
init_MUTEX(&cache_chain_sem);
INIT_LIST_HEAD(&cache_chain);
list_add(&cache_cache.next, &cache_chain);

// slab按照cache line的大小进行偏移(该偏移为最小偏移单位,32B, 64B等)
cache_cache.colour_off = cache_line_size();

// CPU本地缓存使用静态分配的方式进行初始化
// static struct arraycache_init initarray_cache __initdata = {{0, BOOT_CPUCACHE_ENTRIES, 1, 0}};
cache_cache.array[smp_processor_id()] = &initarray_cache.cache;
cache_cache.objsize = ALIGN(cache_cache.objsize, cache_line_size());

// 这个函数就不展开细节了,简单来说,它是根据order(这里是0)计算出单个slab的大小,然后根据一系列计算,确定
// 对象数量以及最后还剩多少空间
cache_estimate(0, // unsigned long gfporder
cache_cache.objsize, // size_t size
cache_line_size(), // size_t align
0, // int flags
&left_over, // size_t *left_over
&cache_cache.num); // unsigned int *num
if (!cache_cache.num)
BUG();

// 计算需要多少种颜色进行染色
cache_cache.colour = left_over / cache_cache.colour_off; // ULK, p335
cache_cache.colour_next = 0;

// slab_size不包括对象本身的内存空间(只包括描述符的内存空间,一个cache中的slab大小是一样的)
cache_cache.slab_size = ALIGN(cache_cache.num * sizeof(kmem_bufctl_t) + sizeof(struct slab), cache_line_size());

/* 2+3) create the kmalloc caches */
/* 相关结构体
struct cache_sizes {
size_t cs_size;
kmem_cache_t *cs_cachep;
kmem_cache_t *cs_dmacachep;
};

第一次看到结构体中使用#include,注意这里最后用了#undef,因为在其它地方也有CACHE()的宏定义
struct cache_sizes malloc_sizes[] = {
#define CACHE(x) { .cs_size = (x) },
#include <linux/kmalloc_sizes.h> --> 这部分是CACHE(32), CACHE(64), CACHE(96), etc
{ 0, }
#undef CACHE
};
*/
sizes = malloc_sizes;
names = cache_names;

// 这里用kmem_cache_create()来创建普通高速缓存,这里存在一个鸡生蛋的问题,简单来说就是slab还没初始化完成,怎么能
// 分配高速缓存等的描述符?这里用了静态分配的方法巧妙的解决了这个问题,具体实现在分析kmem_cache_create()时说明
// 这里遍历sizes来生成所有的普通高速缓存
while (sizes->cs_size) {
/* For performance, all the general caches are L1 aligned.
* This should be particularly beneficial on SMP boxes, as it
* eliminates "false sharing".
* Note for systems short on memory removing the alignment will
* allow tighter packing of the smaller caches. */
sizes->cs_cachep = kmem_cache_create(
names->name, // onst char *name
sizes->cs_size, // size_t size, the size of objects to be created in this cache
ARCH_KMALLOC_MINALIGN, // size_t align:
(ARCH_KMALLOC_FLAGS | SLAB_PANIC), // unsigned long flags
NULL, // void (*ctor)(void*, kmem_cache_t *, unsigned long)
NULL); // void (*dtor)(void*, kmem_cache_t *, unsigned long)

/* Inc off-slab bufctl limit until the ceiling is hit. */
/* Max number of objs-per-slab for caches which use off-slab slabs.
* Needed to avoid a possible looping condition in cache_grow().
*/
if (!(OFF_SLAB(sizes->cs_cachep))) {
offslab_limit = sizes->cs_size-sizeof(struct slab);
offslab_limit /= sizeof(kmem_bufctl_t);
}

// 创建DMA的普通高速缓存
sizes->cs_dmacachep = kmem_cache_create(names->name_dma,
sizes->cs_size,
ARCH_KMALLOC_MINALIGN,
(ARCH_KMALLOC_FLAGS | SLAB_CACHE_DMA | SLAB_PANIC),
NULL,
NULL);
sizes++;
names++;
}

// 有了普通高速缓存,可以使用kmalloc()来获取小的内存空间了。用动态获取的内存空间取代之前静态分配的CPU本地缓存
/* 4) Replace the bootstrap head arrays */
{
void *ptr;

ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
local_irq_disable();
BUG_ON(ac_data(&cache_cache) != &initarray_cache.cache);
memcpy(ptr, ac_data(&cache_cache), sizeof(struct arraycache_init));
cache_cache.array[smp_processor_id()] = ptr;
local_irq_enable();

ptr = kmalloc(sizeof(struct arraycache_init), GFP_KERNEL);
local_irq_disable();
BUG_ON(ac_data(malloc_sizes[0].cs_cachep) != &initarray_generic.cache);
memcpy(ptr, ac_data(malloc_sizes[0].cs_cachep), sizeof(struct arraycache_init));
malloc_sizes[0].cs_cachep->array[smp_processor_id()] = ptr;
local_irq_enable();
}

/* 5) resize the head arrays to their final sizes */
// 重新初始化CPU本地缓存和shared缓存,cache_chain上包括了cache_cache, 普通和专业高速缓存
{
kmem_cache_t *cachep;
down(&cache_chain_sem);
list_for_each_entry(cachep, &cache_chain, next)
enable_cpucache(cachep);
up(&cache_chain_sem);
}

/* Done! */
// 以后在kmem_cache_create()中就需要调用enable_cpucache()了
g_cpucache_up = FULL;

/* Register a cpu startup notifier callback
* that initializes ac_data for all new cpus
*/
register_cpu_notifier(&cpucache_notifier);


/* The reap timers are started later, with a module init call:
* That part of the kernel is not yet operational.
*/
}

至此,slab已经完成初始化了,这个函数有很多可以学习的地方,因为它基本涵盖了所有slab需要使用的函数。这里我还有些不太明白的地方,比如第3步,我在注释中已经注明,以后要是搞懂了我再回来修改。

CATALOG