Linux kernel mirror (for testing)
git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel
os
linux
1// SPDX-License-Identifier: GPL-2.0
2/*
3 * f2fs extent cache support
4 *
5 * Copyright (c) 2015 Motorola Mobility
6 * Copyright (c) 2015 Samsung Electronics
7 * Authors: Jaegeuk Kim <jaegeuk@kernel.org>
8 * Chao Yu <chao2.yu@samsung.com>
9 *
10 * block_age-based extent cache added by:
11 * Copyright (c) 2022 xiaomi Co., Ltd.
12 * http://www.xiaomi.com/
13 */
14
15#include <linux/fs.h>
16#include <linux/f2fs_fs.h>
17
18#include "f2fs.h"
19#include "node.h"
20#include <trace/events/f2fs.h>
21
22bool sanity_check_extent_cache(struct inode *inode, struct folio *ifolio)
23{
24 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
25 struct f2fs_extent *i_ext = &F2FS_INODE(ifolio)->i_ext;
26 struct extent_info ei;
27 int devi;
28
29 get_read_extent_info(&ei, i_ext);
30
31 if (!ei.len)
32 return true;
33
34 if (!f2fs_is_valid_blkaddr(sbi, ei.blk, DATA_GENERIC_ENHANCE) ||
35 !f2fs_is_valid_blkaddr(sbi, ei.blk + ei.len - 1,
36 DATA_GENERIC_ENHANCE)) {
37 f2fs_warn(sbi, "%s: inode (ino=%llx) extent info [%u, %u, %u] is incorrect, run fsck to fix",
38 __func__, inode->i_ino,
39 ei.blk, ei.fofs, ei.len);
40 return false;
41 }
42
43 if (!IS_DEVICE_ALIASING(inode))
44 return true;
45
46 for (devi = 0; devi < sbi->s_ndevs; devi++) {
47 if (FDEV(devi).start_blk != ei.blk ||
48 FDEV(devi).end_blk != ei.blk + ei.len - 1)
49 continue;
50
51 if (devi == 0) {
52 f2fs_warn(sbi,
53 "%s: inode (ino=%llx) is an alias of meta device",
54 __func__, inode->i_ino);
55 return false;
56 }
57
58 if (bdev_is_zoned(FDEV(devi).bdev)) {
59 f2fs_warn(sbi,
60 "%s: device alias inode (ino=%llx)'s extent info "
61 "[%u, %u, %u] maps to zoned block device",
62 __func__, inode->i_ino, ei.blk, ei.fofs, ei.len);
63 return false;
64 }
65 return true;
66 }
67
68 f2fs_warn(sbi, "%s: device alias inode (ino=%llx)'s extent info "
69 "[%u, %u, %u] is inconsistent w/ any devices",
70 __func__, inode->i_ino, ei.blk, ei.fofs, ei.len);
71 return false;
72}
73
74static void __set_extent_info(struct extent_info *ei,
75 unsigned int fofs, unsigned int len,
76 block_t blk, bool keep_clen,
77 unsigned long age, unsigned long last_blocks,
78 enum extent_type type)
79{
80 ei->fofs = fofs;
81 ei->len = len;
82
83 if (type == EX_READ) {
84 ei->blk = blk;
85 if (keep_clen)
86 return;
87#ifdef CONFIG_F2FS_FS_COMPRESSION
88 ei->c_len = 0;
89#endif
90 } else if (type == EX_BLOCK_AGE) {
91 ei->age = age;
92 ei->last_blocks = last_blocks;
93 }
94}
95
96static bool __init_may_extent_tree(struct inode *inode, enum extent_type type)
97{
98 if (type == EX_READ)
99 return test_opt(F2FS_I_SB(inode), READ_EXTENT_CACHE) &&
100 S_ISREG(inode->i_mode);
101 if (type == EX_BLOCK_AGE)
102 return test_opt(F2FS_I_SB(inode), AGE_EXTENT_CACHE) &&
103 (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode));
104 return false;
105}
106
107static bool __may_extent_tree(struct inode *inode, enum extent_type type)
108{
109 if (IS_DEVICE_ALIASING(inode) && type == EX_READ)
110 return true;
111
112 /*
113 * for recovered files during mount do not create extents
114 * if shrinker is not registered.
115 */
116 if (list_empty(&F2FS_I_SB(inode)->s_list))
117 return false;
118
119 if (!__init_may_extent_tree(inode, type))
120 return false;
121
122 if (is_inode_flag_set(inode, FI_NO_EXTENT))
123 return false;
124
125 if (type == EX_READ) {
126 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE) &&
127 !f2fs_sb_has_readonly(F2FS_I_SB(inode)))
128 return false;
129 } else if (type == EX_BLOCK_AGE) {
130 if (is_inode_flag_set(inode, FI_COMPRESSED_FILE))
131 return false;
132 if (file_is_cold(inode))
133 return false;
134 }
135 return true;
136}
137
138static void __try_update_largest_extent(struct extent_tree *et,
139 struct extent_node *en)
140{
141 if (et->type != EX_READ)
142 return;
143 if (en->ei.len <= et->largest.len)
144 return;
145
146 et->largest = en->ei;
147 et->largest_updated = true;
148}
149
150static bool __is_extent_mergeable(struct extent_info *back,
151 struct extent_info *front, enum extent_type type)
152{
153 if (type == EX_READ) {
154#ifdef CONFIG_F2FS_FS_COMPRESSION
155 if (back->c_len && back->len != back->c_len)
156 return false;
157 if (front->c_len && front->len != front->c_len)
158 return false;
159#endif
160 return (back->fofs + back->len == front->fofs &&
161 back->blk + back->len == front->blk);
162 } else if (type == EX_BLOCK_AGE) {
163 return (back->fofs + back->len == front->fofs &&
164 abs(back->age - front->age) <= SAME_AGE_REGION &&
165 abs(back->last_blocks - front->last_blocks) <=
166 SAME_AGE_REGION);
167 }
168 return false;
169}
170
171static bool __is_back_mergeable(struct extent_info *cur,
172 struct extent_info *back, enum extent_type type)
173{
174 return __is_extent_mergeable(back, cur, type);
175}
176
177static bool __is_front_mergeable(struct extent_info *cur,
178 struct extent_info *front, enum extent_type type)
179{
180 return __is_extent_mergeable(cur, front, type);
181}
182
183static struct extent_node *__lookup_extent_node(struct rb_root_cached *root,
184 struct extent_node *cached_en, unsigned int fofs)
185{
186 struct rb_node *node = root->rb_root.rb_node;
187 struct extent_node *en;
188
189 /* check a cached entry */
190 if (cached_en && cached_en->ei.fofs <= fofs &&
191 cached_en->ei.fofs + cached_en->ei.len > fofs)
192 return cached_en;
193
194 /* check rb_tree */
195 while (node) {
196 en = rb_entry(node, struct extent_node, rb_node);
197
198 if (fofs < en->ei.fofs)
199 node = node->rb_left;
200 else if (fofs >= en->ei.fofs + en->ei.len)
201 node = node->rb_right;
202 else
203 return en;
204 }
205 return NULL;
206}
207
208/*
209 * lookup rb entry in position of @fofs in rb-tree,
210 * if hit, return the entry, otherwise, return NULL
211 * @prev_ex: extent before fofs
212 * @next_ex: extent after fofs
213 * @insert_p: insert point for new extent at fofs
214 * in order to simplify the insertion after.
215 * tree must stay unchanged between lookup and insertion.
216 */
217static struct extent_node *__lookup_extent_node_ret(struct rb_root_cached *root,
218 struct extent_node *cached_en,
219 unsigned int fofs,
220 struct extent_node **prev_entry,
221 struct extent_node **next_entry,
222 struct rb_node ***insert_p,
223 struct rb_node **insert_parent,
224 bool *leftmost)
225{
226 struct rb_node **pnode = &root->rb_root.rb_node;
227 struct rb_node *parent = NULL, *tmp_node;
228 struct extent_node *en = cached_en;
229
230 *insert_p = NULL;
231 *insert_parent = NULL;
232 *prev_entry = NULL;
233 *next_entry = NULL;
234
235 if (RB_EMPTY_ROOT(&root->rb_root))
236 return NULL;
237
238 if (en && en->ei.fofs <= fofs && en->ei.fofs + en->ei.len > fofs)
239 goto lookup_neighbors;
240
241 *leftmost = true;
242
243 while (*pnode) {
244 parent = *pnode;
245 en = rb_entry(*pnode, struct extent_node, rb_node);
246
247 if (fofs < en->ei.fofs) {
248 pnode = &(*pnode)->rb_left;
249 } else if (fofs >= en->ei.fofs + en->ei.len) {
250 pnode = &(*pnode)->rb_right;
251 *leftmost = false;
252 } else {
253 goto lookup_neighbors;
254 }
255 }
256
257 *insert_p = pnode;
258 *insert_parent = parent;
259
260 en = rb_entry(parent, struct extent_node, rb_node);
261 tmp_node = parent;
262 if (parent && fofs > en->ei.fofs)
263 tmp_node = rb_next(parent);
264 *next_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
265
266 tmp_node = parent;
267 if (parent && fofs < en->ei.fofs)
268 tmp_node = rb_prev(parent);
269 *prev_entry = rb_entry_safe(tmp_node, struct extent_node, rb_node);
270 return NULL;
271
272lookup_neighbors:
273 if (fofs == en->ei.fofs) {
274 /* lookup prev node for merging backward later */
275 tmp_node = rb_prev(&en->rb_node);
276 *prev_entry = rb_entry_safe(tmp_node,
277 struct extent_node, rb_node);
278 }
279 if (fofs == en->ei.fofs + en->ei.len - 1) {
280 /* lookup next node for merging frontward later */
281 tmp_node = rb_next(&en->rb_node);
282 *next_entry = rb_entry_safe(tmp_node,
283 struct extent_node, rb_node);
284 }
285 return en;
286}
287
288static struct kmem_cache *extent_tree_slab;
289static struct kmem_cache *extent_node_slab;
290
291static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
292 struct extent_tree *et, struct extent_info *ei,
293 struct rb_node *parent, struct rb_node **p,
294 bool leftmost)
295{
296 struct extent_tree_info *eti = &sbi->extent_tree[et->type];
297 struct extent_node *en;
298
299 en = f2fs_kmem_cache_alloc(extent_node_slab, GFP_ATOMIC, false, sbi);
300 if (!en)
301 return NULL;
302
303 en->ei = *ei;
304 INIT_LIST_HEAD(&en->list);
305 en->et = et;
306
307 rb_link_node(&en->rb_node, parent, p);
308 rb_insert_color_cached(&en->rb_node, &et->root, leftmost);
309 atomic_inc(&et->node_cnt);
310 atomic_inc(&eti->total_ext_node);
311 return en;
312}
313
314static void __detach_extent_node(struct f2fs_sb_info *sbi,
315 struct extent_tree *et, struct extent_node *en)
316{
317 struct extent_tree_info *eti = &sbi->extent_tree[et->type];
318
319 rb_erase_cached(&en->rb_node, &et->root);
320 atomic_dec(&et->node_cnt);
321 atomic_dec(&eti->total_ext_node);
322
323 if (et->cached_en == en)
324 et->cached_en = NULL;
325 kmem_cache_free(extent_node_slab, en);
326}
327
328/*
329 * Flow to release an extent_node:
330 * 1. list_del_init
331 * 2. __detach_extent_node
332 * 3. kmem_cache_free.
333 */
334static void __release_extent_node(struct f2fs_sb_info *sbi,
335 struct extent_tree *et, struct extent_node *en)
336{
337 struct extent_tree_info *eti = &sbi->extent_tree[et->type];
338
339 spin_lock(&eti->extent_lock);
340 f2fs_bug_on(sbi, list_empty(&en->list));
341 list_del_init(&en->list);
342 spin_unlock(&eti->extent_lock);
343
344 __detach_extent_node(sbi, et, en);
345}
346
347static struct extent_tree *__grab_extent_tree(struct inode *inode,
348 enum extent_type type)
349{
350 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
351 struct extent_tree_info *eti = &sbi->extent_tree[type];
352 struct extent_tree *et;
353 nid_t ino = inode->i_ino;
354
355 mutex_lock(&eti->extent_tree_lock);
356 et = radix_tree_lookup(&eti->extent_tree_root, ino);
357 if (!et) {
358 et = f2fs_kmem_cache_alloc(extent_tree_slab,
359 GFP_NOFS, true, NULL);
360 f2fs_radix_tree_insert(&eti->extent_tree_root, ino, et);
361 memset(et, 0, sizeof(struct extent_tree));
362 et->ino = ino;
363 et->type = type;
364 et->root = RB_ROOT_CACHED;
365 et->cached_en = NULL;
366 rwlock_init(&et->lock);
367 INIT_LIST_HEAD(&et->list);
368 atomic_set(&et->node_cnt, 0);
369 atomic_inc(&eti->total_ext_tree);
370 } else {
371 atomic_dec(&eti->total_zombie_tree);
372 list_del_init(&et->list);
373 }
374 mutex_unlock(&eti->extent_tree_lock);
375
376 /* never died until evict_inode */
377 F2FS_I(inode)->extent_tree[type] = et;
378
379 return et;
380}
381
382static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
383 struct extent_tree *et, unsigned int nr_shrink)
384{
385 struct rb_node *node, *next;
386 struct extent_node *en;
387 unsigned int count;
388
389 node = rb_first_cached(&et->root);
390
391 for (count = 0; node && count < nr_shrink; count++) {
392 next = rb_next(node);
393 en = rb_entry(node, struct extent_node, rb_node);
394 __release_extent_node(sbi, et, en);
395 node = next;
396 }
397
398 return count;
399}
400
401static void __drop_largest_extent(struct extent_tree *et,
402 pgoff_t fofs, unsigned int len)
403{
404 if (fofs < (pgoff_t)et->largest.fofs + et->largest.len &&
405 fofs + len > et->largest.fofs) {
406 et->largest.len = 0;
407 et->largest_updated = true;
408 }
409}
410
411void f2fs_init_read_extent_tree(struct inode *inode, struct folio *ifolio)
412{
413 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
414 struct extent_tree_info *eti = &sbi->extent_tree[EX_READ];
415 struct f2fs_extent *i_ext = &F2FS_INODE(ifolio)->i_ext;
416 struct extent_tree *et;
417 struct extent_node *en;
418 struct extent_info ei = {0};
419
420 if (!__may_extent_tree(inode, EX_READ)) {
421 /* drop largest read extent */
422 if (i_ext->len) {
423 f2fs_folio_wait_writeback(ifolio, NODE, true, true);
424 i_ext->len = 0;
425 folio_mark_dirty(ifolio);
426 }
427 set_inode_flag(inode, FI_NO_EXTENT);
428 return;
429 }
430
431 et = __grab_extent_tree(inode, EX_READ);
432
433 get_read_extent_info(&ei, i_ext);
434
435 write_lock(&et->lock);
436 if (atomic_read(&et->node_cnt) || !ei.len)
437 goto skip;
438
439 if (IS_DEVICE_ALIASING(inode)) {
440 et->largest = ei;
441 goto skip;
442 }
443
444 en = __attach_extent_node(sbi, et, &ei, NULL,
445 &et->root.rb_root.rb_node, true);
446 if (en) {
447 et->largest = en->ei;
448 et->cached_en = en;
449
450 spin_lock(&eti->extent_lock);
451 list_add_tail(&en->list, &eti->extent_list);
452 spin_unlock(&eti->extent_lock);
453 }
454skip:
455 /* Let's drop, if checkpoint got corrupted. */
456 if (f2fs_cp_error(sbi)) {
457 et->largest.len = 0;
458 et->largest_updated = true;
459 }
460 write_unlock(&et->lock);
461}
462
463void f2fs_init_age_extent_tree(struct inode *inode)
464{
465 if (!__init_may_extent_tree(inode, EX_BLOCK_AGE))
466 return;
467 __grab_extent_tree(inode, EX_BLOCK_AGE);
468}
469
470void f2fs_init_extent_tree(struct inode *inode)
471{
472 /* initialize read cache */
473 if (__init_may_extent_tree(inode, EX_READ))
474 __grab_extent_tree(inode, EX_READ);
475
476 /* initialize block age cache */
477 if (__init_may_extent_tree(inode, EX_BLOCK_AGE))
478 __grab_extent_tree(inode, EX_BLOCK_AGE);
479}
480
481static bool __lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
482 struct extent_info *ei, enum extent_type type)
483{
484 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
485 struct extent_tree_info *eti = &sbi->extent_tree[type];
486 struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
487 struct extent_node *en;
488 bool ret = false;
489
490 if (!et)
491 return false;
492
493 trace_f2fs_lookup_extent_tree_start(inode, pgofs, type);
494
495 read_lock(&et->lock);
496
497 if (type == EX_READ &&
498 et->largest.fofs <= pgofs &&
499 (pgoff_t)et->largest.fofs + et->largest.len > pgofs) {
500 *ei = et->largest;
501 ret = true;
502 stat_inc_largest_node_hit(sbi);
503 goto out;
504 }
505
506 if (IS_DEVICE_ALIASING(inode)) {
507 ret = false;
508 goto out;
509 }
510
511 en = __lookup_extent_node(&et->root, et->cached_en, pgofs);
512 if (!en)
513 goto out;
514
515 if (en == et->cached_en)
516 stat_inc_cached_node_hit(sbi, type);
517 else
518 stat_inc_rbtree_node_hit(sbi, type);
519
520 *ei = en->ei;
521 spin_lock(&eti->extent_lock);
522 if (!list_empty(&en->list)) {
523 list_move_tail(&en->list, &eti->extent_list);
524 et->cached_en = en;
525 }
526 spin_unlock(&eti->extent_lock);
527 ret = true;
528out:
529 stat_inc_total_hit(sbi, type);
530 read_unlock(&et->lock);
531
532 if (type == EX_READ)
533 trace_f2fs_lookup_read_extent_tree_end(inode, pgofs, ei);
534 else if (type == EX_BLOCK_AGE)
535 trace_f2fs_lookup_age_extent_tree_end(inode, pgofs, ei);
536 return ret;
537}
538
539static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
540 struct extent_tree *et, struct extent_info *ei,
541 struct extent_node *prev_ex,
542 struct extent_node *next_ex)
543{
544 struct extent_tree_info *eti = &sbi->extent_tree[et->type];
545 struct extent_node *en = NULL;
546
547 if (prev_ex && __is_back_mergeable(ei, &prev_ex->ei, et->type)) {
548 prev_ex->ei.len += ei->len;
549 ei = &prev_ex->ei;
550 en = prev_ex;
551 }
552
553 if (next_ex && __is_front_mergeable(ei, &next_ex->ei, et->type)) {
554 next_ex->ei.fofs = ei->fofs;
555 next_ex->ei.len += ei->len;
556 if (et->type == EX_READ)
557 next_ex->ei.blk = ei->blk;
558 if (en)
559 __release_extent_node(sbi, et, prev_ex);
560
561 en = next_ex;
562 }
563
564 if (!en)
565 return NULL;
566
567 __try_update_largest_extent(et, en);
568
569 spin_lock(&eti->extent_lock);
570 if (!list_empty(&en->list)) {
571 list_move_tail(&en->list, &eti->extent_list);
572 et->cached_en = en;
573 }
574 spin_unlock(&eti->extent_lock);
575 return en;
576}
577
578static struct extent_node *__insert_extent_tree(struct f2fs_sb_info *sbi,
579 struct extent_tree *et, struct extent_info *ei,
580 struct rb_node **insert_p,
581 struct rb_node *insert_parent,
582 bool leftmost)
583{
584 struct extent_tree_info *eti = &sbi->extent_tree[et->type];
585 struct rb_node **p = &et->root.rb_root.rb_node;
586 struct rb_node *parent = NULL;
587 struct extent_node *en = NULL;
588
589 if (insert_p && insert_parent) {
590 parent = insert_parent;
591 p = insert_p;
592 goto do_insert;
593 }
594
595 leftmost = true;
596
597 /* look up extent_node in the rb tree */
598 while (*p) {
599 parent = *p;
600 en = rb_entry(parent, struct extent_node, rb_node);
601
602 if (ei->fofs < en->ei.fofs) {
603 p = &(*p)->rb_left;
604 } else if (ei->fofs >= en->ei.fofs + en->ei.len) {
605 p = &(*p)->rb_right;
606 leftmost = false;
607 } else {
608 f2fs_err_ratelimited(sbi, "%s: corrupted extent, type: %d, "
609 "extent node in rb tree [%u, %u, %u], age [%llu, %llu], "
610 "extent node to insert [%u, %u, %u], age [%llu, %llu]",
611 __func__, et->type, en->ei.fofs, en->ei.blk, en->ei.len, en->ei.age,
612 en->ei.last_blocks, ei->fofs, ei->blk, ei->len, ei->age, ei->last_blocks);
613 f2fs_bug_on(sbi, 1);
614 return NULL;
615 }
616 }
617
618do_insert:
619 en = __attach_extent_node(sbi, et, ei, parent, p, leftmost);
620 if (!en)
621 return NULL;
622
623 __try_update_largest_extent(et, en);
624
625 /* update in global extent list */
626 spin_lock(&eti->extent_lock);
627 list_add_tail(&en->list, &eti->extent_list);
628 et->cached_en = en;
629 spin_unlock(&eti->extent_lock);
630 return en;
631}
632
633static unsigned int __destroy_extent_node(struct inode *inode,
634 enum extent_type type)
635{
636 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
637 struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
638 unsigned int nr_shrink = type == EX_READ ?
639 READ_EXTENT_CACHE_SHRINK_NUMBER :
640 AGE_EXTENT_CACHE_SHRINK_NUMBER;
641 unsigned int node_cnt = 0;
642
643 if (!et || !atomic_read(&et->node_cnt))
644 return 0;
645
646 while (atomic_read(&et->node_cnt)) {
647 write_lock(&et->lock);
648 if (!is_inode_flag_set(inode, FI_NO_EXTENT))
649 set_inode_flag(inode, FI_NO_EXTENT);
650 node_cnt += __free_extent_tree(sbi, et, nr_shrink);
651 write_unlock(&et->lock);
652 }
653
654 f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
655
656 return node_cnt;
657}
658
659static void __update_extent_tree_range(struct inode *inode,
660 struct extent_info *tei, enum extent_type type)
661{
662 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
663 struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
664 struct extent_node *en = NULL, *en1 = NULL;
665 struct extent_node *prev_en = NULL, *next_en = NULL;
666 struct extent_info ei, dei, prev;
667 struct rb_node **insert_p = NULL, *insert_parent = NULL;
668 unsigned int fofs = tei->fofs, len = tei->len;
669 unsigned int end = fofs + len;
670 bool updated = false;
671 bool leftmost = false;
672
673 if (!et)
674 return;
675
676 if (unlikely(len == 0)) {
677 f2fs_err_ratelimited(sbi, "%s: extent len is zero, type: %d, "
678 "extent [%u, %u, %u], age [%llu, %llu]",
679 __func__, type, tei->fofs, tei->blk, tei->len,
680 tei->age, tei->last_blocks);
681 f2fs_bug_on(sbi, 1);
682 return;
683 }
684
685 if (type == EX_READ)
686 trace_f2fs_update_read_extent_tree_range(inode, fofs, len,
687 tei->blk, 0);
688 else if (type == EX_BLOCK_AGE)
689 trace_f2fs_update_age_extent_tree_range(inode, fofs, len,
690 tei->age, tei->last_blocks);
691
692 write_lock(&et->lock);
693
694 if (is_inode_flag_set(inode, FI_NO_EXTENT)) {
695 write_unlock(&et->lock);
696 return;
697 }
698
699 if (type == EX_READ) {
700 prev = et->largest;
701 dei.len = 0;
702
703 /*
704 * drop largest extent before lookup, in case it's already
705 * been shrunk from extent tree
706 */
707 __drop_largest_extent(et, fofs, len);
708 }
709
710 /* 1. lookup first extent node in range [fofs, fofs + len - 1] */
711 en = __lookup_extent_node_ret(&et->root,
712 et->cached_en, fofs,
713 &prev_en, &next_en,
714 &insert_p, &insert_parent,
715 &leftmost);
716 if (!en)
717 en = next_en;
718
719 /* 2. invalidate all extent nodes in range [fofs, fofs + len - 1] */
720 while (en && en->ei.fofs < end) {
721 unsigned int org_end;
722 int parts = 0; /* # of parts current extent split into */
723
724 next_en = en1 = NULL;
725
726 dei = en->ei;
727 org_end = dei.fofs + dei.len;
728 f2fs_bug_on(sbi, fofs >= org_end);
729
730 if (fofs > dei.fofs && (type != EX_READ ||
731 fofs - dei.fofs >= F2FS_MIN_EXTENT_LEN)) {
732 en->ei.len = fofs - en->ei.fofs;
733 prev_en = en;
734 parts = 1;
735 }
736
737 if (end < org_end && (type != EX_READ ||
738 (org_end - end >= F2FS_MIN_EXTENT_LEN &&
739 atomic_read(&et->node_cnt) <
740 sbi->max_read_extent_count))) {
741 if (parts) {
742 __set_extent_info(&ei,
743 end, org_end - end,
744 end - dei.fofs + dei.blk, false,
745 dei.age, dei.last_blocks,
746 type);
747 en1 = __insert_extent_tree(sbi, et, &ei,
748 NULL, NULL, true);
749 next_en = en1;
750 } else {
751 __set_extent_info(&en->ei,
752 end, en->ei.len - (end - dei.fofs),
753 en->ei.blk + (end - dei.fofs), true,
754 dei.age, dei.last_blocks,
755 type);
756 next_en = en;
757 }
758 parts++;
759 }
760
761 if (!next_en) {
762 struct rb_node *node = rb_next(&en->rb_node);
763
764 next_en = rb_entry_safe(node, struct extent_node,
765 rb_node);
766 }
767
768 if (parts)
769 __try_update_largest_extent(et, en);
770 else
771 __release_extent_node(sbi, et, en);
772
773 /*
774 * if original extent is split into zero or two parts, extent
775 * tree has been altered by deletion or insertion, therefore
776 * invalidate pointers regard to tree.
777 */
778 if (parts != 1) {
779 insert_p = NULL;
780 insert_parent = NULL;
781 }
782 en = next_en;
783 }
784
785 if (type == EX_BLOCK_AGE)
786 goto update_age_extent_cache;
787
788 /* 3. update extent in read extent cache */
789 BUG_ON(type != EX_READ);
790
791 if (tei->blk) {
792 __set_extent_info(&ei, fofs, len, tei->blk, false,
793 0, 0, EX_READ);
794 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
795 __insert_extent_tree(sbi, et, &ei,
796 insert_p, insert_parent, leftmost);
797
798 /* give up extent_cache, if split and small updates happen */
799 if (dei.len >= 1 &&
800 prev.len < F2FS_MIN_EXTENT_LEN &&
801 et->largest.len < F2FS_MIN_EXTENT_LEN) {
802 et->largest.len = 0;
803 et->largest_updated = true;
804 set_inode_flag(inode, FI_NO_EXTENT);
805 }
806 }
807
808 if (et->largest_updated) {
809 et->largest_updated = false;
810 updated = true;
811 }
812 goto out_read_extent_cache;
813update_age_extent_cache:
814 if (tei->last_blocks == F2FS_EXTENT_AGE_INVALID)
815 goto out_read_extent_cache;
816
817 __set_extent_info(&ei, fofs, len, 0, false,
818 tei->age, tei->last_blocks, EX_BLOCK_AGE);
819 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
820 __insert_extent_tree(sbi, et, &ei,
821 insert_p, insert_parent, leftmost);
822out_read_extent_cache:
823 write_unlock(&et->lock);
824
825 if (is_inode_flag_set(inode, FI_NO_EXTENT))
826 __destroy_extent_node(inode, EX_READ);
827
828 if (updated)
829 f2fs_mark_inode_dirty_sync(inode, true);
830}
831
832#ifdef CONFIG_F2FS_FS_COMPRESSION
833void f2fs_update_read_extent_tree_range_compressed(struct inode *inode,
834 pgoff_t fofs, block_t blkaddr, unsigned int llen,
835 unsigned int c_len)
836{
837 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
838 struct extent_tree *et = F2FS_I(inode)->extent_tree[EX_READ];
839 struct extent_node *en = NULL;
840 struct extent_node *prev_en = NULL, *next_en = NULL;
841 struct extent_info ei;
842 struct rb_node **insert_p = NULL, *insert_parent = NULL;
843 bool leftmost = false;
844
845 trace_f2fs_update_read_extent_tree_range(inode, fofs, llen,
846 blkaddr, c_len);
847
848 /* it is safe here to check FI_NO_EXTENT w/o et->lock in ro image */
849 if (is_inode_flag_set(inode, FI_NO_EXTENT))
850 return;
851
852 write_lock(&et->lock);
853
854 en = __lookup_extent_node_ret(&et->root,
855 et->cached_en, fofs,
856 &prev_en, &next_en,
857 &insert_p, &insert_parent,
858 &leftmost);
859 if (en)
860 goto unlock_out;
861
862 __set_extent_info(&ei, fofs, llen, blkaddr, true, 0, 0, EX_READ);
863 ei.c_len = c_len;
864
865 if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
866 __insert_extent_tree(sbi, et, &ei,
867 insert_p, insert_parent, leftmost);
868unlock_out:
869 write_unlock(&et->lock);
870}
871#endif
872
873static unsigned long long __calculate_block_age(struct f2fs_sb_info *sbi,
874 unsigned long long new,
875 unsigned long long old)
876{
877 unsigned int rem_old, rem_new;
878 unsigned long long res;
879 unsigned int weight = sbi->last_age_weight;
880
881 res = div_u64_rem(new, 100, &rem_new) * (100 - weight)
882 + div_u64_rem(old, 100, &rem_old) * weight;
883
884 if (rem_new)
885 res += rem_new * (100 - weight) / 100;
886 if (rem_old)
887 res += rem_old * weight / 100;
888
889 return res;
890}
891
892/* This returns a new age and allocated blocks in ei */
893static int __get_new_block_age(struct inode *inode, struct extent_info *ei,
894 block_t blkaddr)
895{
896 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
897 loff_t f_size = i_size_read(inode);
898 unsigned long long cur_blocks =
899 atomic64_read(&sbi->allocated_data_blocks);
900 struct extent_info tei = *ei; /* only fofs and len are valid */
901
902 /*
903 * When I/O is not aligned to a PAGE_SIZE, update will happen to the last
904 * file block even in seq write. So don't record age for newly last file
905 * block here.
906 */
907 if ((f_size >> PAGE_SHIFT) == ei->fofs && f_size & (PAGE_SIZE - 1) &&
908 blkaddr == NEW_ADDR)
909 return -EINVAL;
910
911 if (__lookup_extent_tree(inode, ei->fofs, &tei, EX_BLOCK_AGE)) {
912 unsigned long long cur_age;
913
914 if (cur_blocks >= tei.last_blocks)
915 cur_age = cur_blocks - tei.last_blocks;
916 else
917 /* allocated_data_blocks overflow */
918 cur_age = (ULLONG_MAX - 1) - tei.last_blocks + cur_blocks;
919
920 if (tei.age)
921 ei->age = __calculate_block_age(sbi, cur_age, tei.age);
922 else
923 ei->age = cur_age;
924 ei->last_blocks = cur_blocks;
925 WARN_ON(ei->age > cur_blocks);
926 return 0;
927 }
928
929 f2fs_bug_on(sbi, blkaddr == NULL_ADDR);
930
931 /* the data block was allocated for the first time */
932 if (blkaddr == NEW_ADDR)
933 goto out;
934
935 if (__is_valid_data_blkaddr(blkaddr) &&
936 !f2fs_is_valid_blkaddr(sbi, blkaddr, DATA_GENERIC_ENHANCE))
937 return -EINVAL;
938out:
939 /*
940 * init block age with zero, this can happen when the block age extent
941 * was reclaimed due to memory constraint or system reboot
942 */
943 ei->age = 0;
944 ei->last_blocks = cur_blocks;
945 return 0;
946}
947
948static void __update_extent_cache(struct dnode_of_data *dn, enum extent_type type)
949{
950 struct extent_info ei = {};
951
952 if (!__may_extent_tree(dn->inode, type))
953 return;
954
955 ei.fofs = f2fs_start_bidx_of_node(ofs_of_node(dn->node_folio), dn->inode) +
956 dn->ofs_in_node;
957 ei.len = 1;
958
959 if (type == EX_READ) {
960 if (dn->data_blkaddr == NEW_ADDR)
961 ei.blk = NULL_ADDR;
962 else
963 ei.blk = dn->data_blkaddr;
964 } else if (type == EX_BLOCK_AGE) {
965 if (__get_new_block_age(dn->inode, &ei, dn->data_blkaddr))
966 return;
967 }
968 __update_extent_tree_range(dn->inode, &ei, type);
969}
970
971static unsigned int __shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink,
972 enum extent_type type)
973{
974 struct extent_tree_info *eti = &sbi->extent_tree[type];
975 struct extent_tree *et, *next;
976 struct extent_node *en;
977 unsigned int node_cnt = 0, tree_cnt = 0;
978 int remained;
979
980 if (!atomic_read(&eti->total_zombie_tree))
981 goto free_node;
982
983 if (!mutex_trylock(&eti->extent_tree_lock))
984 goto out;
985
986 /* 1. remove unreferenced extent tree */
987 list_for_each_entry_safe(et, next, &eti->zombie_list, list) {
988 if (atomic_read(&et->node_cnt)) {
989 write_lock(&et->lock);
990 node_cnt += __free_extent_tree(sbi, et,
991 nr_shrink - node_cnt - tree_cnt);
992 write_unlock(&et->lock);
993 }
994
995 if (atomic_read(&et->node_cnt))
996 goto unlock_out;
997
998 list_del_init(&et->list);
999 radix_tree_delete(&eti->extent_tree_root, et->ino);
1000 kmem_cache_free(extent_tree_slab, et);
1001 atomic_dec(&eti->total_ext_tree);
1002 atomic_dec(&eti->total_zombie_tree);
1003 tree_cnt++;
1004
1005 if (node_cnt + tree_cnt >= nr_shrink)
1006 goto unlock_out;
1007 cond_resched();
1008 }
1009 mutex_unlock(&eti->extent_tree_lock);
1010
1011free_node:
1012 /* 2. remove LRU extent entries */
1013 if (!mutex_trylock(&eti->extent_tree_lock))
1014 goto out;
1015
1016 remained = nr_shrink - (node_cnt + tree_cnt);
1017
1018 spin_lock(&eti->extent_lock);
1019 for (; remained > 0; remained--) {
1020 if (list_empty(&eti->extent_list))
1021 break;
1022 en = list_first_entry(&eti->extent_list,
1023 struct extent_node, list);
1024 et = en->et;
1025 if (!write_trylock(&et->lock)) {
1026 /* refresh this extent node's position in extent list */
1027 list_move_tail(&en->list, &eti->extent_list);
1028 continue;
1029 }
1030
1031 list_del_init(&en->list);
1032 spin_unlock(&eti->extent_lock);
1033
1034 __detach_extent_node(sbi, et, en);
1035
1036 write_unlock(&et->lock);
1037 node_cnt++;
1038 spin_lock(&eti->extent_lock);
1039 }
1040 spin_unlock(&eti->extent_lock);
1041
1042unlock_out:
1043 mutex_unlock(&eti->extent_tree_lock);
1044out:
1045 trace_f2fs_shrink_extent_tree(sbi, node_cnt, tree_cnt, type);
1046
1047 return node_cnt + tree_cnt;
1048}
1049
1050/* read extent cache operations */
1051bool f2fs_lookup_read_extent_cache(struct inode *inode, pgoff_t pgofs,
1052 struct extent_info *ei)
1053{
1054 if (!__may_extent_tree(inode, EX_READ))
1055 return false;
1056
1057 return __lookup_extent_tree(inode, pgofs, ei, EX_READ);
1058}
1059
1060bool f2fs_lookup_read_extent_cache_block(struct inode *inode, pgoff_t index,
1061 block_t *blkaddr)
1062{
1063 struct extent_info ei = {};
1064
1065 if (!f2fs_lookup_read_extent_cache(inode, index, &ei))
1066 return false;
1067 *blkaddr = ei.blk + index - ei.fofs;
1068 return true;
1069}
1070
1071void f2fs_update_read_extent_cache(struct dnode_of_data *dn)
1072{
1073 return __update_extent_cache(dn, EX_READ);
1074}
1075
1076void f2fs_update_read_extent_cache_range(struct dnode_of_data *dn,
1077 pgoff_t fofs, block_t blkaddr, unsigned int len)
1078{
1079 struct extent_info ei = {
1080 .fofs = fofs,
1081 .len = len,
1082 .blk = blkaddr,
1083 };
1084
1085 if (!__may_extent_tree(dn->inode, EX_READ))
1086 return;
1087
1088 __update_extent_tree_range(dn->inode, &ei, EX_READ);
1089}
1090
1091unsigned int f2fs_shrink_read_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1092{
1093 if (!test_opt(sbi, READ_EXTENT_CACHE))
1094 return 0;
1095
1096 return __shrink_extent_tree(sbi, nr_shrink, EX_READ);
1097}
1098
1099/* block age extent cache operations */
1100bool f2fs_lookup_age_extent_cache(struct inode *inode, pgoff_t pgofs,
1101 struct extent_info *ei)
1102{
1103 if (!__may_extent_tree(inode, EX_BLOCK_AGE))
1104 return false;
1105
1106 return __lookup_extent_tree(inode, pgofs, ei, EX_BLOCK_AGE);
1107}
1108
1109void f2fs_update_age_extent_cache(struct dnode_of_data *dn)
1110{
1111 return __update_extent_cache(dn, EX_BLOCK_AGE);
1112}
1113
1114void f2fs_update_age_extent_cache_range(struct dnode_of_data *dn,
1115 pgoff_t fofs, unsigned int len)
1116{
1117 struct extent_info ei = {
1118 .fofs = fofs,
1119 .len = len,
1120 .last_blocks = F2FS_EXTENT_AGE_INVALID,
1121 };
1122
1123 if (!__may_extent_tree(dn->inode, EX_BLOCK_AGE))
1124 return;
1125
1126 __update_extent_tree_range(dn->inode, &ei, EX_BLOCK_AGE);
1127}
1128
1129unsigned int f2fs_shrink_age_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
1130{
1131 if (!test_opt(sbi, AGE_EXTENT_CACHE))
1132 return 0;
1133
1134 return __shrink_extent_tree(sbi, nr_shrink, EX_BLOCK_AGE);
1135}
1136
1137void f2fs_destroy_extent_node(struct inode *inode)
1138{
1139 __destroy_extent_node(inode, EX_READ);
1140 __destroy_extent_node(inode, EX_BLOCK_AGE);
1141}
1142
1143static void __drop_extent_tree(struct inode *inode, enum extent_type type)
1144{
1145 struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1146 bool updated = false;
1147
1148 if (!__may_extent_tree(inode, type))
1149 return;
1150
1151 write_lock(&et->lock);
1152 if (type == EX_READ) {
1153 set_inode_flag(inode, FI_NO_EXTENT);
1154 if (et->largest.len) {
1155 et->largest.len = 0;
1156 updated = true;
1157 }
1158 }
1159 write_unlock(&et->lock);
1160
1161 __destroy_extent_node(inode, type);
1162
1163 if (updated)
1164 f2fs_mark_inode_dirty_sync(inode, true);
1165}
1166
1167void f2fs_drop_extent_tree(struct inode *inode)
1168{
1169 __drop_extent_tree(inode, EX_READ);
1170 __drop_extent_tree(inode, EX_BLOCK_AGE);
1171}
1172
1173static void __destroy_extent_tree(struct inode *inode, enum extent_type type)
1174{
1175 struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
1176 struct extent_tree_info *eti = &sbi->extent_tree[type];
1177 struct extent_tree *et = F2FS_I(inode)->extent_tree[type];
1178 unsigned int node_cnt = 0;
1179
1180 if (!et)
1181 return;
1182
1183 if (inode->i_nlink && !is_bad_inode(inode) &&
1184 atomic_read(&et->node_cnt)) {
1185 mutex_lock(&eti->extent_tree_lock);
1186 list_add_tail(&et->list, &eti->zombie_list);
1187 atomic_inc(&eti->total_zombie_tree);
1188 mutex_unlock(&eti->extent_tree_lock);
1189 return;
1190 }
1191
1192 /* free all extent info belong to this extent tree */
1193 node_cnt = __destroy_extent_node(inode, type);
1194
1195 /* delete extent tree entry in radix tree */
1196 mutex_lock(&eti->extent_tree_lock);
1197 f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
1198 radix_tree_delete(&eti->extent_tree_root, inode->i_ino);
1199 kmem_cache_free(extent_tree_slab, et);
1200 atomic_dec(&eti->total_ext_tree);
1201 mutex_unlock(&eti->extent_tree_lock);
1202
1203 F2FS_I(inode)->extent_tree[type] = NULL;
1204
1205 trace_f2fs_destroy_extent_tree(inode, node_cnt, type);
1206}
1207
1208void f2fs_destroy_extent_tree(struct inode *inode)
1209{
1210 __destroy_extent_tree(inode, EX_READ);
1211 __destroy_extent_tree(inode, EX_BLOCK_AGE);
1212}
1213
1214static void __init_extent_tree_info(struct extent_tree_info *eti)
1215{
1216 INIT_RADIX_TREE(&eti->extent_tree_root, GFP_NOIO);
1217 mutex_init(&eti->extent_tree_lock);
1218 INIT_LIST_HEAD(&eti->extent_list);
1219 spin_lock_init(&eti->extent_lock);
1220 atomic_set(&eti->total_ext_tree, 0);
1221 INIT_LIST_HEAD(&eti->zombie_list);
1222 atomic_set(&eti->total_zombie_tree, 0);
1223 atomic_set(&eti->total_ext_node, 0);
1224}
1225
1226void f2fs_init_extent_cache_info(struct f2fs_sb_info *sbi)
1227{
1228 __init_extent_tree_info(&sbi->extent_tree[EX_READ]);
1229 __init_extent_tree_info(&sbi->extent_tree[EX_BLOCK_AGE]);
1230
1231 /* initialize for block age extents */
1232 atomic64_set(&sbi->allocated_data_blocks, 0);
1233 sbi->hot_data_age_threshold = DEF_HOT_DATA_AGE_THRESHOLD;
1234 sbi->warm_data_age_threshold = DEF_WARM_DATA_AGE_THRESHOLD;
1235 sbi->last_age_weight = LAST_AGE_WEIGHT;
1236 sbi->max_read_extent_count = DEF_MAX_READ_EXTENT_COUNT;
1237}
1238
1239int __init f2fs_create_extent_cache(void)
1240{
1241 extent_tree_slab = f2fs_kmem_cache_create("f2fs_extent_tree",
1242 sizeof(struct extent_tree));
1243 if (!extent_tree_slab)
1244 return -ENOMEM;
1245 extent_node_slab = f2fs_kmem_cache_create("f2fs_extent_node",
1246 sizeof(struct extent_node));
1247 if (!extent_node_slab) {
1248 kmem_cache_destroy(extent_tree_slab);
1249 return -ENOMEM;
1250 }
1251 return 0;
1252}
1253
1254void f2fs_destroy_extent_cache(void)
1255{
1256 kmem_cache_destroy(extent_node_slab);
1257 kmem_cache_destroy(extent_tree_slab);
1258}