Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

dm-bufio: remove maximum age based eviction

Every 30 seconds, dm-bufio evicts all buffers that were not accessed
within the last max_age_seconds, except those pinned in memory via
retain_bytes. By default max_age_seconds is 300 (i.e. 5 minutes), and
retain_bytes is 262144 (i.e. 256 KiB) per dm-bufio client.

This eviction algorithm is much too eager and is also redundant with the
shinker based eviction.

Testing on an Android phone shows that about 30 MB of dm-bufio buffers
(from dm-verity Merkle tree blocks) are loaded at boot time, and then
about 90% of them are suddenly thrown away 5 minutes after boot. This
results in unnecessary Merkle tree I/O later.

Meanwhile, if the system actually encounters memory pressure, testing
also shows that the shrinker is effective at evicting the buffers.

Other major Linux kernel caches, such as the page cache, do not enforce
a maximum age, instead relying on the shrinker.

For these reasons, Android is now setting max_age_seconds to 86400
(i.e. 1 day), which mostly disables it; see
https://android.googlesource.com/platform/system/core/+/cadad290a79d5b0a30add935aaadab7c1b1ef5e9%5E%21/

That is a much better default, but really the maximum age based eviction
should not exist at all. Let's remove it.

Note that this also eliminates the need to run work every 30 seconds,
which is beneficial too.

Signed-off-by: Eric Biggers <ebiggers@google.com>
Signed-off-by: Mikulas Patocka <mpatocka@redhat.com>

authored by

Eric Biggers and committed by
Mikulas Patocka
97693781 f9ed3121

+36 -153
+36 -153
drivers/md/dm-bufio.c
··· 41 41 #define DM_BUFIO_LOW_WATERMARK_RATIO 16 42 42 43 43 /* 44 - * Check buffer ages in this interval (seconds) 45 - */ 46 - #define DM_BUFIO_WORK_TIMER_SECS 30 47 - 48 - /* 49 - * Free buffers when they are older than this (seconds) 50 - */ 51 - #define DM_BUFIO_DEFAULT_AGE_SECS 300 52 - 53 - /* 54 44 * The nr of bytes of cached data to keep around. 55 45 */ 56 46 #define DM_BUFIO_DEFAULT_RETAIN_BYTES (256 * 1024) ··· 1045 1055 1046 1056 static DEFINE_SPINLOCK(global_spinlock); 1047 1057 1048 - /* 1049 - * Buffers are freed after this timeout 1050 - */ 1051 - static unsigned int dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS; 1058 + static unsigned int dm_bufio_max_age; /* No longer does anything */ 1059 + 1052 1060 static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES; 1053 1061 1054 1062 static unsigned long dm_bufio_peak_allocated; ··· 1074 1086 static DEFINE_MUTEX(dm_bufio_clients_lock); 1075 1087 1076 1088 static struct workqueue_struct *dm_bufio_wq; 1077 - static struct delayed_work dm_bufio_cleanup_old_work; 1078 1089 static struct work_struct dm_bufio_replacement_work; 1079 1090 1080 1091 ··· 2660 2673 2661 2674 /*--------------------------------------------------------------*/ 2662 2675 2663 - static unsigned int get_max_age_hz(void) 2664 - { 2665 - unsigned int max_age = READ_ONCE(dm_bufio_max_age); 2666 - 2667 - if (max_age > UINT_MAX / HZ) 2668 - max_age = UINT_MAX / HZ; 2669 - 2670 - return max_age * HZ; 2671 - } 2672 - 2673 - static bool older_than(struct dm_buffer *b, unsigned long age_hz) 2674 - { 2675 - return time_after_eq(jiffies, READ_ONCE(b->last_accessed) + age_hz); 2676 - } 2677 - 2678 - struct evict_params { 2679 - gfp_t gfp; 2680 - unsigned long age_hz; 2681 - 2682 - /* 2683 - * This gets updated with the largest last_accessed (ie. most 2684 - * recently used) of the evicted buffers. It will not be reinitialised 2685 - * by __evict_many(), so you can use it across multiple invocations. 2686 - */ 2687 - unsigned long last_accessed; 2688 - }; 2689 - 2690 - /* 2691 - * We may not be able to evict this buffer if IO pending or the client 2692 - * is still using it. 2693 - * 2694 - * And if GFP_NOFS is used, we must not do any I/O because we hold 2695 - * dm_bufio_clients_lock and we would risk deadlock if the I/O gets 2696 - * rerouted to different bufio client. 2697 - */ 2698 - static enum evict_result select_for_evict(struct dm_buffer *b, void *context) 2699 - { 2700 - struct evict_params *params = context; 2701 - 2702 - if (!(params->gfp & __GFP_FS) || 2703 - (static_branch_unlikely(&no_sleep_enabled) && b->c->no_sleep)) { 2704 - if (test_bit_acquire(B_READING, &b->state) || 2705 - test_bit(B_WRITING, &b->state) || 2706 - test_bit(B_DIRTY, &b->state)) 2707 - return ER_DONT_EVICT; 2708 - } 2709 - 2710 - return older_than(b, params->age_hz) ? ER_EVICT : ER_STOP; 2711 - } 2712 - 2713 - static unsigned long __evict_many(struct dm_bufio_client *c, 2714 - struct evict_params *params, 2715 - int list_mode, unsigned long max_count) 2716 - { 2717 - unsigned long count; 2718 - unsigned long last_accessed; 2719 - struct dm_buffer *b; 2720 - 2721 - for (count = 0; count < max_count; count++) { 2722 - b = cache_evict(&c->cache, list_mode, select_for_evict, params); 2723 - if (!b) 2724 - break; 2725 - 2726 - last_accessed = READ_ONCE(b->last_accessed); 2727 - if (time_after_eq(params->last_accessed, last_accessed)) 2728 - params->last_accessed = last_accessed; 2729 - 2730 - __make_buffer_clean(b); 2731 - __free_buffer_wake(b); 2732 - 2733 - cond_resched(); 2734 - } 2735 - 2736 - return count; 2737 - } 2738 - 2739 - static void evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz) 2740 - { 2741 - struct evict_params params = {.gfp = 0, .age_hz = age_hz, .last_accessed = 0}; 2742 - unsigned long retain = get_retain_buffers(c); 2743 - unsigned long count; 2744 - LIST_HEAD(write_list); 2745 - 2746 - dm_bufio_lock(c); 2747 - 2748 - __check_watermark(c, &write_list); 2749 - if (unlikely(!list_empty(&write_list))) { 2750 - dm_bufio_unlock(c); 2751 - __flush_write_list(&write_list); 2752 - dm_bufio_lock(c); 2753 - } 2754 - 2755 - count = cache_total(&c->cache); 2756 - if (count > retain) 2757 - __evict_many(c, &params, LIST_CLEAN, count - retain); 2758 - 2759 - dm_bufio_unlock(c); 2760 - } 2761 - 2762 - static void cleanup_old_buffers(void) 2763 - { 2764 - unsigned long max_age_hz = get_max_age_hz(); 2765 - struct dm_bufio_client *c; 2766 - 2767 - mutex_lock(&dm_bufio_clients_lock); 2768 - 2769 - __cache_size_refresh(); 2770 - 2771 - list_for_each_entry(c, &dm_bufio_all_clients, client_list) 2772 - evict_old_buffers(c, max_age_hz); 2773 - 2774 - mutex_unlock(&dm_bufio_clients_lock); 2775 - } 2776 - 2777 - static void work_fn(struct work_struct *w) 2778 - { 2779 - cleanup_old_buffers(); 2780 - 2781 - queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work, 2782 - DM_BUFIO_WORK_TIMER_SECS * HZ); 2783 - } 2784 - 2785 - /*--------------------------------------------------------------*/ 2786 - 2787 2676 /* 2788 2677 * Global cleanup tries to evict the oldest buffers from across _all_ 2789 2678 * the clients. It does this by repeatedly evicting a few buffers from ··· 2697 2834 list_add_tail(&new_client->client_list, h); 2698 2835 } 2699 2836 2837 + static enum evict_result select_for_evict(struct dm_buffer *b, void *context) 2838 + { 2839 + /* In no-sleep mode, we cannot wait on IO. */ 2840 + if (static_branch_unlikely(&no_sleep_enabled) && b->c->no_sleep) { 2841 + if (test_bit_acquire(B_READING, &b->state) || 2842 + test_bit(B_WRITING, &b->state) || 2843 + test_bit(B_DIRTY, &b->state)) 2844 + return ER_DONT_EVICT; 2845 + } 2846 + return ER_EVICT; 2847 + } 2848 + 2700 2849 static unsigned long __evict_a_few(unsigned long nr_buffers) 2701 2850 { 2702 - unsigned long count; 2703 2851 struct dm_bufio_client *c; 2704 - struct evict_params params = { 2705 - .gfp = GFP_KERNEL, 2706 - .age_hz = 0, 2707 - /* set to jiffies in case there are no buffers in this client */ 2708 - .last_accessed = jiffies 2709 - }; 2852 + unsigned long oldest_buffer = jiffies; 2853 + unsigned long last_accessed; 2854 + unsigned long count; 2855 + struct dm_buffer *b; 2710 2856 2711 2857 c = __pop_client(); 2712 2858 if (!c) 2713 2859 return 0; 2714 2860 2715 2861 dm_bufio_lock(c); 2716 - count = __evict_many(c, &params, LIST_CLEAN, nr_buffers); 2862 + 2863 + for (count = 0; count < nr_buffers; count++) { 2864 + b = cache_evict(&c->cache, LIST_CLEAN, select_for_evict, NULL); 2865 + if (!b) 2866 + break; 2867 + 2868 + last_accessed = READ_ONCE(b->last_accessed); 2869 + if (time_after_eq(oldest_buffer, last_accessed)) 2870 + oldest_buffer = last_accessed; 2871 + 2872 + __make_buffer_clean(b); 2873 + __free_buffer_wake(b); 2874 + 2875 + cond_resched(); 2876 + } 2877 + 2717 2878 dm_bufio_unlock(c); 2718 2879 2719 2880 if (count) 2720 - c->oldest_buffer = params.last_accessed; 2881 + c->oldest_buffer = oldest_buffer; 2721 2882 __insert_client(c); 2722 2883 2723 2884 return count; ··· 2824 2937 if (!dm_bufio_wq) 2825 2938 return -ENOMEM; 2826 2939 2827 - INIT_DELAYED_WORK(&dm_bufio_cleanup_old_work, work_fn); 2828 2940 INIT_WORK(&dm_bufio_replacement_work, do_global_cleanup); 2829 - queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work, 2830 - DM_BUFIO_WORK_TIMER_SECS * HZ); 2831 2941 2832 2942 return 0; 2833 2943 } ··· 2836 2952 { 2837 2953 int bug = 0; 2838 2954 2839 - cancel_delayed_work_sync(&dm_bufio_cleanup_old_work); 2840 2955 destroy_workqueue(dm_bufio_wq); 2841 2956 2842 2957 if (dm_bufio_client_count) { ··· 2872 2989 MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache"); 2873 2990 2874 2991 module_param_named(max_age_seconds, dm_bufio_max_age, uint, 0644); 2875 - MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds"); 2992 + MODULE_PARM_DESC(max_age_seconds, "No longer does anything"); 2876 2993 2877 2994 module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, 0644); 2878 2995 MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory");