Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

zram: introduce compressed data writeback

Patch series "zram: introduce compressed data writeback", v2.

As writeback becomes more common there is another shortcoming that needs
to be addressed - compressed data writeback. Currently zram does
uncompressed data writeback which is not optimal due to potential CPU and
battery wastage. This series changes suboptimal uncompressed writeback to
a more optimal compressed data writeback.


This patch (of 7):

zram stores all written back slots raw, which implies that during
writeback zram first has to decompress slots (except for ZRAM_HUGE slots,
which are raw already). The problem with this approach is that not every
written back page gets read back (either via read() or via page-fault),
which means that zram basically wastes CPU cycles and battery
decompressing such slots. This changes with introduction of decompression
on demand, in other words decompression on read()/page-fault.

One caveat of decompression on demand is that async read is completed in
IRQ context, while zram decompression is sleepable. To workaround this,
read-back decompression is offloaded to a preemptible context - system
high-prio work-queue.

At this point compressed writeback is still disabled, a follow up patch
will introduce a new device attribute which will make it possible to
toggle compressed writeback per-device.

[senozhatsky@chromium.org: rewrote original implementation]
Link: https://lkml.kernel.org/r/20251201094754.4149975-1-senozhatsky@chromium.org
Link: https://lkml.kernel.org/r/20251201094754.4149975-2-senozhatsky@chromium.org
Signed-off-by: Richard Chang <richardycc@google.com>
Co-developed-by: Sergey Senozhatsky <senozhatsky@chromium.org>
Suggested-by: Minchan Kim <minchan@google.com>
Suggested-by: Brian Geffon <bgeffon@google.com>
Cc: David Stevens <stevensd@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>

authored by

Richard Chang and committed by
Andrew Morton
d38fab60 7adc97bc

+228 -54
+227 -54
drivers/block/zram/zram_drv.c
··· 57 57 static const struct block_device_operations zram_devops; 58 58 59 59 static void zram_free_page(struct zram *zram, size_t index); 60 - static int zram_read_from_zspool(struct zram *zram, struct page *page, 61 - u32 index); 62 - 63 60 #define slot_dep_map(zram, index) (&(zram)->table[(index)].dep_map) 64 61 65 62 static void zram_slot_lock_init(struct zram *zram, u32 index) ··· 499 502 #ifdef CONFIG_ZRAM_WRITEBACK 500 503 #define INVALID_BDEV_BLOCK (~0UL) 501 504 505 + static int read_from_zspool_raw(struct zram *zram, struct page *page, 506 + u32 index); 507 + static int read_from_zspool(struct zram *zram, struct page *page, u32 index); 508 + 502 509 struct zram_wb_ctl { 503 510 /* idle list is accessed only by the writeback task, no concurency */ 504 511 struct list_head idle_reqs; ··· 521 520 struct bio bio; 522 521 523 522 struct list_head entry; 523 + }; 524 + 525 + struct zram_rb_req { 526 + struct work_struct work; 527 + struct zram *zram; 528 + struct page *page; 529 + /* The read bio for backing device */ 530 + struct bio *bio; 531 + unsigned long blk_idx; 532 + union { 533 + /* The original bio to complete (async read) */ 534 + struct bio *parent; 535 + /* error status (sync read) */ 536 + int error; 537 + }; 538 + u32 index; 524 539 }; 525 540 526 541 static ssize_t writeback_limit_enable_store(struct device *dev, ··· 797 780 atomic64_dec(&zram->stats.bd_count); 798 781 } 799 782 800 - static void read_from_bdev_async(struct zram *zram, struct page *page, 801 - unsigned long entry, struct bio *parent) 802 - { 803 - struct bio *bio; 804 - 805 - bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); 806 - bio->bi_iter.bi_sector = entry * (PAGE_SIZE >> 9); 807 - __bio_add_page(bio, page, PAGE_SIZE, 0); 808 - bio_chain(bio, parent); 809 - submit_bio(bio); 810 - } 811 - 812 783 static void release_wb_req(struct zram_wb_req *req) 813 784 { 814 785 __free_page(req->page); ··· 891 886 892 887 static int zram_writeback_complete(struct zram *zram, struct zram_wb_req *req) 893 888 { 894 - u32 index = req->pps->index; 895 - int err; 889 + u32 size, index = req->pps->index; 890 + int err, prio; 891 + bool huge; 896 892 897 893 err = blk_status_to_errno(req->bio.bi_status); 898 894 if (err) { ··· 920 914 goto out; 921 915 } 922 916 917 + if (zram->wb_compressed) { 918 + /* 919 + * ZRAM_WB slots get freed, we need to preserve data required 920 + * for read decompression. 921 + */ 922 + size = zram_get_obj_size(zram, index); 923 + prio = zram_get_priority(zram, index); 924 + huge = zram_test_flag(zram, index, ZRAM_HUGE); 925 + } 926 + 923 927 zram_free_page(zram, index); 924 928 zram_set_flag(zram, index, ZRAM_WB); 925 929 zram_set_handle(zram, index, req->blk_idx); 930 + 931 + if (zram->wb_compressed) { 932 + if (huge) 933 + zram_set_flag(zram, index, ZRAM_HUGE); 934 + zram_set_obj_size(zram, index, size); 935 + zram_set_priority(zram, index, prio); 936 + } 937 + 926 938 atomic64_inc(&zram->stats.pages_stored); 927 939 928 940 out: ··· 1074 1050 */ 1075 1051 if (!zram_test_flag(zram, index, ZRAM_PP_SLOT)) 1076 1052 goto next; 1077 - if (zram_read_from_zspool(zram, req->page, index)) 1053 + if (zram->wb_compressed) 1054 + err = read_from_zspool_raw(zram, req->page, index); 1055 + else 1056 + err = read_from_zspool(zram, req->page, index); 1057 + if (err) 1078 1058 goto next; 1079 1059 zram_slot_unlock(zram, index); 1080 1060 ··· 1341 1313 return ret; 1342 1314 } 1343 1315 1344 - struct zram_work { 1345 - struct work_struct work; 1346 - struct zram *zram; 1347 - unsigned long entry; 1348 - struct page *page; 1349 - int error; 1350 - }; 1351 - 1352 - static void zram_sync_read(struct work_struct *work) 1316 + static int decompress_bdev_page(struct zram *zram, struct page *page, u32 index) 1353 1317 { 1354 - struct zram_work *zw = container_of(work, struct zram_work, work); 1318 + struct zcomp_strm *zstrm; 1319 + unsigned int size; 1320 + int ret, prio; 1321 + void *src; 1322 + 1323 + zram_slot_lock(zram, index); 1324 + /* Since slot was unlocked we need to make sure it's still ZRAM_WB */ 1325 + if (!zram_test_flag(zram, index, ZRAM_WB)) { 1326 + zram_slot_unlock(zram, index); 1327 + /* We read some stale data, zero it out */ 1328 + memset_page(page, 0, 0, PAGE_SIZE); 1329 + return -EIO; 1330 + } 1331 + 1332 + if (zram_test_flag(zram, index, ZRAM_HUGE)) { 1333 + zram_slot_unlock(zram, index); 1334 + return 0; 1335 + } 1336 + 1337 + size = zram_get_obj_size(zram, index); 1338 + prio = zram_get_priority(zram, index); 1339 + 1340 + zstrm = zcomp_stream_get(zram->comps[prio]); 1341 + src = kmap_local_page(page); 1342 + ret = zcomp_decompress(zram->comps[prio], zstrm, src, size, 1343 + zstrm->local_copy); 1344 + if (!ret) 1345 + copy_page(src, zstrm->local_copy); 1346 + kunmap_local(src); 1347 + zcomp_stream_put(zstrm); 1348 + zram_slot_unlock(zram, index); 1349 + 1350 + return ret; 1351 + } 1352 + 1353 + static void zram_deferred_decompress(struct work_struct *w) 1354 + { 1355 + struct zram_rb_req *req = container_of(w, struct zram_rb_req, work); 1356 + struct page *page = bio_first_page_all(req->bio); 1357 + struct zram *zram = req->zram; 1358 + u32 index = req->index; 1359 + int ret; 1360 + 1361 + ret = decompress_bdev_page(zram, page, index); 1362 + if (ret) 1363 + req->parent->bi_status = BLK_STS_IOERR; 1364 + 1365 + /* Decrement parent's ->remaining */ 1366 + bio_endio(req->parent); 1367 + bio_put(req->bio); 1368 + kfree(req); 1369 + } 1370 + 1371 + static void zram_async_read_endio(struct bio *bio) 1372 + { 1373 + struct zram_rb_req *req = bio->bi_private; 1374 + struct zram *zram = req->zram; 1375 + 1376 + if (bio->bi_status) { 1377 + req->parent->bi_status = bio->bi_status; 1378 + bio_endio(req->parent); 1379 + bio_put(bio); 1380 + kfree(req); 1381 + return; 1382 + } 1383 + 1384 + /* 1385 + * NOTE: zram_async_read_endio() is not exactly right place for this. 1386 + * Ideally, we need to do it after ZRAM_WB check, but this requires 1387 + * us to use wq path even on systems that don't enable compressed 1388 + * writeback, because we cannot take slot-lock in the current context. 1389 + * 1390 + * Keep the existing behavior for now. 1391 + */ 1392 + if (zram->wb_compressed == false) { 1393 + /* No decompression needed, complete the parent IO */ 1394 + bio_endio(req->parent); 1395 + bio_put(bio); 1396 + kfree(req); 1397 + return; 1398 + } 1399 + 1400 + /* 1401 + * zram decompression is sleepable, so we need to deffer it to 1402 + * a preemptible context. 1403 + */ 1404 + INIT_WORK(&req->work, zram_deferred_decompress); 1405 + queue_work(system_highpri_wq, &req->work); 1406 + } 1407 + 1408 + static void read_from_bdev_async(struct zram *zram, struct page *page, 1409 + u32 index, unsigned long blk_idx, 1410 + struct bio *parent) 1411 + { 1412 + struct zram_rb_req *req; 1413 + struct bio *bio; 1414 + 1415 + req = kmalloc(sizeof(*req), GFP_NOIO); 1416 + if (!req) 1417 + return; 1418 + 1419 + bio = bio_alloc(zram->bdev, 1, parent->bi_opf, GFP_NOIO); 1420 + if (!bio) { 1421 + kfree(req); 1422 + return; 1423 + } 1424 + 1425 + req->zram = zram; 1426 + req->index = index; 1427 + req->blk_idx = blk_idx; 1428 + req->bio = bio; 1429 + req->parent = parent; 1430 + 1431 + bio->bi_iter.bi_sector = blk_idx * (PAGE_SIZE >> 9); 1432 + bio->bi_private = req; 1433 + bio->bi_end_io = zram_async_read_endio; 1434 + 1435 + __bio_add_page(bio, page, PAGE_SIZE, 0); 1436 + bio_inc_remaining(parent); 1437 + submit_bio(bio); 1438 + } 1439 + 1440 + static void zram_sync_read(struct work_struct *w) 1441 + { 1442 + struct zram_rb_req *req = container_of(w, struct zram_rb_req, work); 1355 1443 struct bio_vec bv; 1356 1444 struct bio bio; 1357 1445 1358 - bio_init(&bio, zw->zram->bdev, &bv, 1, REQ_OP_READ); 1359 - bio.bi_iter.bi_sector = zw->entry * (PAGE_SIZE >> 9); 1360 - __bio_add_page(&bio, zw->page, PAGE_SIZE, 0); 1361 - zw->error = submit_bio_wait(&bio); 1446 + bio_init(&bio, req->zram->bdev, &bv, 1, REQ_OP_READ); 1447 + bio.bi_iter.bi_sector = req->blk_idx * (PAGE_SIZE >> 9); 1448 + __bio_add_page(&bio, req->page, PAGE_SIZE, 0); 1449 + req->error = submit_bio_wait(&bio); 1362 1450 } 1363 1451 1364 1452 /* ··· 1482 1338 * chained IO with parent IO in same context, it's a deadlock. To avoid that, 1483 1339 * use a worker thread context. 1484 1340 */ 1485 - static int read_from_bdev_sync(struct zram *zram, struct page *page, 1486 - unsigned long entry) 1341 + static int read_from_bdev_sync(struct zram *zram, struct page *page, u32 index, 1342 + unsigned long blk_idx) 1487 1343 { 1488 - struct zram_work work; 1344 + struct zram_rb_req req; 1489 1345 1490 - work.page = page; 1491 - work.zram = zram; 1492 - work.entry = entry; 1346 + req.page = page; 1347 + req.zram = zram; 1348 + req.blk_idx = blk_idx; 1493 1349 1494 - INIT_WORK_ONSTACK(&work.work, zram_sync_read); 1495 - queue_work(system_dfl_wq, &work.work); 1496 - flush_work(&work.work); 1497 - destroy_work_on_stack(&work.work); 1350 + INIT_WORK_ONSTACK(&req.work, zram_sync_read); 1351 + queue_work(system_dfl_wq, &req.work); 1352 + flush_work(&req.work); 1353 + destroy_work_on_stack(&req.work); 1498 1354 1499 - return work.error; 1355 + if (req.error || zram->wb_compressed == false) 1356 + return req.error; 1357 + 1358 + return decompress_bdev_page(zram, page, index); 1500 1359 } 1501 1360 1502 - static int read_from_bdev(struct zram *zram, struct page *page, 1503 - unsigned long entry, struct bio *parent) 1361 + static int read_from_bdev(struct zram *zram, struct page *page, u32 index, 1362 + unsigned long blk_idx, struct bio *parent) 1504 1363 { 1505 1364 atomic64_inc(&zram->stats.bd_reads); 1506 1365 if (!parent) { 1507 1366 if (WARN_ON_ONCE(!IS_ENABLED(ZRAM_PARTIAL_IO))) 1508 1367 return -EIO; 1509 - return read_from_bdev_sync(zram, page, entry); 1368 + return read_from_bdev_sync(zram, page, index, blk_idx); 1510 1369 } 1511 - read_from_bdev_async(zram, page, entry, parent); 1370 + read_from_bdev_async(zram, page, index, blk_idx, parent); 1512 1371 return 0; 1513 1372 } 1514 1373 #else 1515 1374 static inline void reset_bdev(struct zram *zram) {}; 1516 - static int read_from_bdev(struct zram *zram, struct page *page, 1517 - unsigned long entry, struct bio *parent) 1375 + static int read_from_bdev(struct zram *zram, struct page *page, u32 index, 1376 + unsigned long blk_idx, struct bio *parent) 1518 1377 { 1519 1378 return -EIO; 1520 1379 } ··· 2124 1977 return ret; 2125 1978 } 2126 1979 1980 + #if defined CONFIG_ZRAM_WRITEBACK 1981 + static int read_from_zspool_raw(struct zram *zram, struct page *page, u32 index) 1982 + { 1983 + struct zcomp_strm *zstrm; 1984 + unsigned long handle; 1985 + unsigned int size; 1986 + void *src; 1987 + 1988 + handle = zram_get_handle(zram, index); 1989 + size = zram_get_obj_size(zram, index); 1990 + 1991 + /* 1992 + * We need to get stream just for ->local_copy buffer, in 1993 + * case if object spans two physical pages. No decompression 1994 + * takes place here, as we read raw compressed data. 1995 + */ 1996 + zstrm = zcomp_stream_get(zram->comps[ZRAM_PRIMARY_COMP]); 1997 + src = zs_obj_read_begin(zram->mem_pool, handle, zstrm->local_copy); 1998 + memcpy_to_page(page, 0, src, size); 1999 + zs_obj_read_end(zram->mem_pool, handle, src); 2000 + zcomp_stream_put(zstrm); 2001 + 2002 + return 0; 2003 + } 2004 + #endif 2005 + 2127 2006 /* 2128 2007 * Reads (decompresses if needed) a page from zspool (zsmalloc). 2129 2008 * Corresponding ZRAM slot should be locked. 2130 2009 */ 2131 - static int zram_read_from_zspool(struct zram *zram, struct page *page, 2132 - u32 index) 2010 + static int read_from_zspool(struct zram *zram, struct page *page, u32 index) 2133 2011 { 2134 2012 if (zram_test_flag(zram, index, ZRAM_SAME) || 2135 2013 !zram_get_handle(zram, index)) ··· 2174 2002 zram_slot_lock(zram, index); 2175 2003 if (!zram_test_flag(zram, index, ZRAM_WB)) { 2176 2004 /* Slot should be locked through out the function call */ 2177 - ret = zram_read_from_zspool(zram, page, index); 2005 + ret = read_from_zspool(zram, page, index); 2178 2006 zram_slot_unlock(zram, index); 2179 2007 } else { 2180 2008 unsigned long blk_idx = zram_get_handle(zram, index); ··· 2184 2012 * device. 2185 2013 */ 2186 2014 zram_slot_unlock(zram, index); 2187 - ret = read_from_bdev(zram, page, blk_idx, parent); 2015 + ret = read_from_bdev(zram, page, index, blk_idx, parent); 2188 2016 } 2189 2017 2190 2018 /* Should NEVER happen. Return bio error if it does. */ ··· 2445 2273 if (comp_len_old < threshold) 2446 2274 return 0; 2447 2275 2448 - ret = zram_read_from_zspool(zram, page, index); 2276 + ret = read_from_zspool(zram, page, index); 2449 2277 if (ret) 2450 2278 return ret; 2451 2279 ··· 3132 2960 init_rwsem(&zram->init_lock); 3133 2961 #ifdef CONFIG_ZRAM_WRITEBACK 3134 2962 zram->wb_batch_size = 32; 2963 + zram->wb_compressed = false; 3135 2964 #endif 3136 2965 3137 2966 /* gendisk structure */
+1
drivers/block/zram/zram_drv.h
··· 128 128 #ifdef CONFIG_ZRAM_WRITEBACK 129 129 struct file *backing_dev; 130 130 bool wb_limit_enable; 131 + bool wb_compressed; 131 132 u32 wb_batch_size; 132 133 u64 bd_wb_limit; 133 134 struct block_device *bdev;