Merge tag 'kvm-x86-dirty_ring-6.17' of https://github.com/kvm-x86/linux into HEAD

+5 -13

include/linux/kvm_dirty_ring.h

··· 49 49 } 50 50 51 51 static inline int kvm_dirty_ring_reset(struct kvm *kvm, 52 - struct kvm_dirty_ring *ring) 52 + struct kvm_dirty_ring *ring, 53 + int *nr_entries_reset) 53 54 { 54 - return 0; 55 + return -ENOENT; 55 56 } 56 57 57 58 static inline void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, ··· 78 77 u32 kvm_dirty_ring_get_rsvd_entries(struct kvm *kvm); 79 78 int kvm_dirty_ring_alloc(struct kvm *kvm, struct kvm_dirty_ring *ring, 80 79 int index, u32 size); 81 - 82 - /* 83 - * called with kvm->slots_lock held, returns the number of 84 - * processed pages. 85 - */ 86 - int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring); 87 - 88 - /* 89 - * returns =0: successfully pushed 90 - * <0: unable to push, need to wait 91 - */ 80 + int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring, 81 + int *nr_entries_reset); 92 82 void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset); 93 83 94 84 bool kvm_dirty_ring_check_request(struct kvm_vcpu *vcpu);

+76 -31

virt/kvm/dirty_ring.c

··· 55 55 struct kvm_memory_slot *memslot; 56 56 int as_id, id; 57 57 58 - if (!mask) 59 - return; 60 - 61 58 as_id = slot >> 16; 62 59 id = (u16)slot; 63 60 ··· 102 105 return smp_load_acquire(&gfn->flags) & KVM_DIRTY_GFN_F_RESET; 103 106 } 104 107 105 - int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring) 108 + int kvm_dirty_ring_reset(struct kvm *kvm, struct kvm_dirty_ring *ring, 109 + int *nr_entries_reset) 106 110 { 111 + /* 112 + * To minimize mmu_lock contention, batch resets for harvested entries 113 + * whose gfns are in the same slot, and are within N frame numbers of 114 + * each other, where N is the number of bits in an unsigned long. For 115 + * simplicity, process the current set of entries when the next entry 116 + * can't be included in the batch. 117 + * 118 + * Track the current batch slot, the gfn offset into the slot for the 119 + * batch, and the bitmask of gfns that need to be reset (relative to 120 + * offset). Note, the offset may be adjusted backwards, e.g. so that 121 + * a sequence of gfns X, X-1, ... X-N-1 can be batched. 122 + */ 107 123 u32 cur_slot, next_slot; 108 124 u64 cur_offset, next_offset; 109 - unsigned long mask; 110 - int count = 0; 125 + unsigned long mask = 0; 111 126 struct kvm_dirty_gfn *entry; 112 - bool first_round = true; 113 127 114 - /* This is only needed to make compilers happy */ 115 - cur_slot = cur_offset = mask = 0; 128 + /* 129 + * Ensure concurrent calls to KVM_RESET_DIRTY_RINGS are serialized, 130 + * e.g. so that KVM fully resets all entries processed by a given call 131 + * before returning to userspace. Holding slots_lock also protects 132 + * the various memslot accesses. 133 + */ 134 + lockdep_assert_held(&kvm->slots_lock); 116 135 117 - while (true) { 136 + while (likely((*nr_entries_reset) < INT_MAX)) { 137 + if (signal_pending(current)) 138 + return -EINTR; 139 + 118 140 entry = &ring->dirty_gfns[ring->reset_index & (ring->size - 1)]; 119 141 120 142 if (!kvm_dirty_gfn_harvested(entry)) ··· 146 130 kvm_dirty_gfn_set_invalid(entry); 147 131 148 132 ring->reset_index++; 149 - count++; 150 - /* 151 - * Try to coalesce the reset operations when the guest is 152 - * scanning pages in the same slot. 153 - */ 154 - if (!first_round && next_slot == cur_slot) { 155 - s64 delta = next_offset - cur_offset; 133 + (*nr_entries_reset)++; 156 134 157 - if (delta >= 0 && delta < BITS_PER_LONG) { 158 - mask |= 1ull << delta; 159 - continue; 135 + if (mask) { 136 + /* 137 + * While the size of each ring is fixed, it's possible 138 + * for the ring to be constantly re-dirtied/harvested 139 + * while the reset is in-progress (the hard limit exists 140 + * only to guard against the count becoming negative). 141 + */ 142 + cond_resched(); 143 + 144 + /* 145 + * Try to coalesce the reset operations when the guest 146 + * is scanning pages in the same slot. 147 + */ 148 + if (next_slot == cur_slot) { 149 + s64 delta = next_offset - cur_offset; 150 + 151 + if (delta >= 0 && delta < BITS_PER_LONG) { 152 + mask |= 1ull << delta; 153 + continue; 154 + } 155 + 156 + /* Backwards visit, careful about overflows! */ 157 + if (delta > -BITS_PER_LONG && delta < 0 && 158 + (mask << -delta >> -delta) == mask) { 159 + cur_offset = next_offset; 160 + mask = (mask << -delta) | 1; 161 + continue; 162 + } 160 163 } 161 164 162 - /* Backwards visit, careful about overflows! */ 163 - if (delta > -BITS_PER_LONG && delta < 0 && 164 - (mask << -delta >> -delta) == mask) { 165 - cur_offset = next_offset; 166 - mask = (mask << -delta) | 1; 167 - continue; 168 - } 165 + /* 166 + * Reset the slot for all the harvested entries that 167 + * have been gathered, but not yet fully processed. 168 + */ 169 + kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask); 169 170 } 170 - kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask); 171 + 172 + /* 173 + * The current slot was reset or this is the first harvested 174 + * entry, (re)initialize the metadata. 175 + */ 171 176 cur_slot = next_slot; 172 177 cur_offset = next_offset; 173 178 mask = 1; 174 - first_round = false; 175 179 } 176 180 177 - kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask); 181 + /* 182 + * Perform a final reset if there are harvested entries that haven't 183 + * been processed, which is guaranteed if at least one harvested was 184 + * found. The loop only performs a reset when the "next" entry can't 185 + * be batched with the "current" entry(s), and that reset processes the 186 + * _current_ entry(s); i.e. the last harvested entry, a.k.a. next, will 187 + * always be left pending. 188 + */ 189 + if (mask) 190 + kvm_reset_dirty_gfn(kvm, cur_slot, cur_offset, mask); 178 191 179 192 /* 180 193 * The request KVM_REQ_DIRTY_RING_SOFT_FULL will be cleared ··· 212 167 213 168 trace_kvm_dirty_ring_reset(ring); 214 169 215 - return count; 170 + return 0; 216 171 } 217 172 218 173 void kvm_dirty_ring_push(struct kvm_vcpu *vcpu, u32 slot, u64 offset)

+6 -3

virt/kvm/kvm_main.c

··· 4967 4967 { 4968 4968 unsigned long i; 4969 4969 struct kvm_vcpu *vcpu; 4970 - int cleared = 0; 4970 + int cleared = 0, r; 4971 4971 4972 4972 if (!kvm->dirty_ring_size) 4973 4973 return -EINVAL; 4974 4974 4975 4975 mutex_lock(&kvm->slots_lock); 4976 4976 4977 - kvm_for_each_vcpu(i, vcpu, kvm) 4978 - cleared += kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring); 4977 + kvm_for_each_vcpu(i, vcpu, kvm) { 4978 + r = kvm_dirty_ring_reset(vcpu->kvm, &vcpu->dirty_ring, &cleared); 4979 + if (r) 4980 + break; 4981 + } 4979 4982 4980 4983 mutex_unlock(&kvm->slots_lock); 4981 4984

Configure Feed

Configure Feed