Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2#define _GNU_SOURCE
3
4#include <linux/limits.h>
5#include <fcntl.h>
6#include <stdio.h>
7#include <stdlib.h>
8#include <string.h>
9#include <sys/stat.h>
10#include <sys/types.h>
11#include <unistd.h>
12#include <sys/wait.h>
13#include <errno.h>
14#include <sys/sysinfo.h>
15#include <pthread.h>
16
17#include "kselftest.h"
18#include "cgroup_util.h"
19
20
21/*
22 * Memory cgroup charging is performed using percpu batches 64 pages
23 * big (look at MEMCG_CHARGE_BATCH), whereas memory.stat is exact. So
24 * the maximum discrepancy between charge and vmstat entries is number
25 * of cpus multiplied by 64 pages.
26 */
27#define MAX_VMSTAT_ERROR (4096 * 64 * get_nprocs())
28
29#define KMEM_DEAD_WAIT_RETRIES 80
30
31static int alloc_dcache(const char *cgroup, void *arg)
32{
33 unsigned long i;
34 struct stat st;
35 char buf[128];
36
37 for (i = 0; i < (unsigned long)arg; i++) {
38 snprintf(buf, sizeof(buf),
39 "/something-non-existent-with-a-long-name-%64lu-%d",
40 i, getpid());
41 stat(buf, &st);
42 }
43
44 return 0;
45}
46
47/*
48 * This test allocates 100000 of negative dentries with long names.
49 * Then it checks that "slab" in memory.stat is larger than 1M.
50 * Then it sets memory.high to 1M and checks that at least 1/2
51 * of slab memory has been reclaimed.
52 */
53static int test_kmem_basic(const char *root)
54{
55 int ret = KSFT_FAIL;
56 char *cg = NULL;
57 long slab0, slab1, current;
58
59 cg = cg_name(root, "kmem_basic_test");
60 if (!cg)
61 goto cleanup;
62
63 if (cg_create(cg))
64 goto cleanup;
65
66 if (cg_run(cg, alloc_dcache, (void *)100000))
67 goto cleanup;
68
69 slab0 = cg_read_key_long(cg, "memory.stat", "slab ");
70 if (slab0 < (1 << 20))
71 goto cleanup;
72
73 cg_write(cg, "memory.high", "1M");
74
75 /* wait for RCU freeing */
76 sleep(1);
77
78 slab1 = cg_read_key_long(cg, "memory.stat", "slab ");
79 if (slab1 < 0)
80 goto cleanup;
81
82 current = cg_read_long(cg, "memory.current");
83 if (current < 0)
84 goto cleanup;
85
86 if (slab1 < slab0 / 2 && current < slab0 / 2)
87 ret = KSFT_PASS;
88cleanup:
89 cg_destroy(cg);
90 free(cg);
91
92 return ret;
93}
94
95static void *alloc_kmem_fn(void *arg)
96{
97 alloc_dcache(NULL, (void *)100);
98 return NULL;
99}
100
101static int alloc_kmem_smp(const char *cgroup, void *arg)
102{
103 int nr_threads = 2 * get_nprocs();
104 pthread_t *tinfo;
105 unsigned long i;
106 int ret = -1;
107
108 tinfo = calloc(nr_threads, sizeof(pthread_t));
109 if (tinfo == NULL)
110 return -1;
111
112 for (i = 0; i < nr_threads; i++) {
113 if (pthread_create(&tinfo[i], NULL, &alloc_kmem_fn,
114 (void *)i)) {
115 free(tinfo);
116 return -1;
117 }
118 }
119
120 for (i = 0; i < nr_threads; i++) {
121 ret = pthread_join(tinfo[i], NULL);
122 if (ret)
123 break;
124 }
125
126 free(tinfo);
127 return ret;
128}
129
130static int cg_run_in_subcgroups(const char *parent,
131 int (*fn)(const char *cgroup, void *arg),
132 void *arg, int times)
133{
134 char *child;
135 int i;
136
137 for (i = 0; i < times; i++) {
138 child = cg_name_indexed(parent, "child", i);
139 if (!child)
140 return -1;
141
142 if (cg_create(child)) {
143 cg_destroy(child);
144 free(child);
145 return -1;
146 }
147
148 if (cg_run(child, fn, NULL)) {
149 cg_destroy(child);
150 free(child);
151 return -1;
152 }
153
154 cg_destroy(child);
155 free(child);
156 }
157
158 return 0;
159}
160
161/*
162 * The test creates and destroys a large number of cgroups. In each cgroup it
163 * allocates some slab memory (mostly negative dentries) using 2 * NR_CPUS
164 * threads. Then it checks the sanity of numbers on the parent level:
165 * the total size of the cgroups should be roughly equal to
166 * anon + file + kernel + sock.
167 */
168static int test_kmem_memcg_deletion(const char *root)
169{
170 long current, anon, file, kernel, sock, sum;
171 int ret = KSFT_FAIL;
172 char *parent;
173
174 parent = cg_name(root, "kmem_memcg_deletion_test");
175 if (!parent)
176 goto cleanup;
177
178 if (cg_create(parent))
179 goto cleanup;
180
181 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
182 goto cleanup;
183
184 if (cg_run_in_subcgroups(parent, alloc_kmem_smp, NULL, 100))
185 goto cleanup;
186
187 current = cg_read_long(parent, "memory.current");
188 anon = cg_read_key_long(parent, "memory.stat", "anon ");
189 file = cg_read_key_long(parent, "memory.stat", "file ");
190 kernel = cg_read_key_long(parent, "memory.stat", "kernel ");
191 sock = cg_read_key_long(parent, "memory.stat", "sock ");
192 if (current < 0 || anon < 0 || file < 0 || kernel < 0 || sock < 0)
193 goto cleanup;
194
195 sum = anon + file + kernel + sock;
196 if (labs(sum - current) < MAX_VMSTAT_ERROR) {
197 ret = KSFT_PASS;
198 } else {
199 printf("memory.current = %ld\n", current);
200 printf("anon + file + kernel + sock = %ld\n", sum);
201 printf("anon = %ld\n", anon);
202 printf("file = %ld\n", file);
203 printf("kernel = %ld\n", kernel);
204 printf("sock = %ld\n", sock);
205 }
206
207cleanup:
208 cg_destroy(parent);
209 free(parent);
210
211 return ret;
212}
213
214/*
215 * The test reads the entire /proc/kpagecgroup. If the operation went
216 * successfully (and the kernel didn't panic), the test is treated as passed.
217 */
218static int test_kmem_proc_kpagecgroup(const char *root)
219{
220 unsigned long buf[128];
221 int ret = KSFT_FAIL;
222 ssize_t len;
223 int fd;
224
225 fd = open("/proc/kpagecgroup", O_RDONLY);
226 if (fd < 0)
227 return ret;
228
229 do {
230 len = read(fd, buf, sizeof(buf));
231 } while (len > 0);
232
233 if (len == 0)
234 ret = KSFT_PASS;
235
236 close(fd);
237 return ret;
238}
239
240static void *pthread_wait_fn(void *arg)
241{
242 sleep(100);
243 return NULL;
244}
245
246static int spawn_1000_threads(const char *cgroup, void *arg)
247{
248 int nr_threads = 1000;
249 pthread_t *tinfo;
250 unsigned long i;
251 long stack;
252 int ret = -1;
253
254 tinfo = calloc(nr_threads, sizeof(pthread_t));
255 if (tinfo == NULL)
256 return -1;
257
258 for (i = 0; i < nr_threads; i++) {
259 if (pthread_create(&tinfo[i], NULL, &pthread_wait_fn,
260 (void *)i)) {
261 free(tinfo);
262 return(-1);
263 }
264 }
265
266 stack = cg_read_key_long(cgroup, "memory.stat", "kernel_stack ");
267 if (stack >= 4096 * 1000)
268 ret = 0;
269
270 free(tinfo);
271 return ret;
272}
273
274/*
275 * The test spawns a process, which spawns 1000 threads. Then it checks
276 * that memory.stat's kernel_stack is at least 1000 pages large.
277 */
278static int test_kmem_kernel_stacks(const char *root)
279{
280 int ret = KSFT_FAIL;
281 char *cg = NULL;
282
283 cg = cg_name(root, "kmem_kernel_stacks_test");
284 if (!cg)
285 goto cleanup;
286
287 if (cg_create(cg))
288 goto cleanup;
289
290 if (cg_run(cg, spawn_1000_threads, NULL))
291 goto cleanup;
292
293 ret = KSFT_PASS;
294cleanup:
295 cg_destroy(cg);
296 free(cg);
297
298 return ret;
299}
300
301/*
302 * This test sequentionally creates 30 child cgroups, allocates some
303 * kernel memory in each of them, and deletes them. Then it checks
304 * that the number of dying cgroups on the parent level is 0.
305 */
306static int test_kmem_dead_cgroups(const char *root)
307{
308 int ret = KSFT_FAIL;
309 char *parent;
310 long dead = -1;
311
312 parent = cg_name(root, "kmem_dead_cgroups_test");
313 if (!parent)
314 goto cleanup;
315
316 if (cg_create(parent))
317 goto cleanup;
318
319 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
320 goto cleanup;
321
322 if (cg_run_in_subcgroups(parent, alloc_dcache, (void *)100, 30))
323 goto cleanup;
324
325 /*
326 * Allow up to ~8s for reclaim of dying descendants to complete.
327 * This is a generous upper bound derived from stress testing, not
328 * from a specific kernel constant, and can be adjusted if reclaim
329 * behavior changes in the future.
330 */
331 dead = cg_read_key_long_poll(parent, "cgroup.stat",
332 "nr_dying_descendants ", 0, KMEM_DEAD_WAIT_RETRIES,
333 DEFAULT_WAIT_INTERVAL_US);
334 if (dead)
335 goto cleanup;
336
337 ret = KSFT_PASS;
338
339cleanup:
340 cg_destroy(parent);
341 free(parent);
342
343 return ret;
344}
345
346/*
347 * This test creates a sub-tree with 1000 memory cgroups.
348 * Then it checks that the memory.current on the parent level
349 * is greater than 0 and approximates matches the percpu value
350 * from memory.stat.
351 */
352static int test_percpu_basic(const char *root)
353{
354 int ret = KSFT_FAIL;
355 char *parent, *child;
356 long current, percpu;
357 int i;
358
359 parent = cg_name(root, "percpu_basic_test");
360 if (!parent)
361 goto cleanup;
362
363 if (cg_create(parent))
364 goto cleanup;
365
366 if (cg_write(parent, "cgroup.subtree_control", "+memory"))
367 goto cleanup;
368
369 for (i = 0; i < 1000; i++) {
370 child = cg_name_indexed(parent, "child", i);
371 if (!child)
372 return -1;
373
374 if (cg_create(child))
375 goto cleanup_children;
376
377 free(child);
378 }
379
380 current = cg_read_long(parent, "memory.current");
381 percpu = cg_read_key_long(parent, "memory.stat", "percpu ");
382
383 if (current > 0 && percpu > 0 && labs(current - percpu) <
384 MAX_VMSTAT_ERROR)
385 ret = KSFT_PASS;
386 else
387 printf("memory.current %ld\npercpu %ld\n",
388 current, percpu);
389
390cleanup_children:
391 for (i = 0; i < 1000; i++) {
392 child = cg_name_indexed(parent, "child", i);
393 cg_destroy(child);
394 free(child);
395 }
396
397cleanup:
398 cg_destroy(parent);
399 free(parent);
400
401 return ret;
402}
403
404#define T(x) { x, #x }
405struct kmem_test {
406 int (*fn)(const char *root);
407 const char *name;
408} tests[] = {
409 T(test_kmem_basic),
410 T(test_kmem_memcg_deletion),
411 T(test_kmem_proc_kpagecgroup),
412 T(test_kmem_kernel_stacks),
413 T(test_kmem_dead_cgroups),
414 T(test_percpu_basic),
415};
416#undef T
417
418int main(int argc, char **argv)
419{
420 char root[PATH_MAX];
421 int i;
422
423 ksft_print_header();
424 ksft_set_plan(ARRAY_SIZE(tests));
425 if (cg_find_unified_root(root, sizeof(root), NULL))
426 ksft_exit_skip("cgroup v2 isn't mounted\n");
427
428 /*
429 * Check that memory controller is available:
430 * memory is listed in cgroup.controllers
431 */
432 if (cg_read_strstr(root, "cgroup.controllers", "memory"))
433 ksft_exit_skip("memory controller isn't available\n");
434
435 if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
436 if (cg_write(root, "cgroup.subtree_control", "+memory"))
437 ksft_exit_skip("Failed to set memory controller\n");
438
439 for (i = 0; i < ARRAY_SIZE(tests); i++) {
440 switch (tests[i].fn(root)) {
441 case KSFT_PASS:
442 ksft_test_result_pass("%s\n", tests[i].name);
443 break;
444 case KSFT_SKIP:
445 ksft_test_result_skip("%s\n", tests[i].name);
446 break;
447 default:
448 ksft_test_result_fail("%s\n", tests[i].name);
449 break;
450 }
451 }
452
453 ksft_finished();
454}