Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

at ee9dce44362b2d8132c32964656ab6dff7dfbc6a 636 lines 19 kB view raw
1/* SPDX-License-Identifier: GPL-2.0 */ 2#ifndef _LINUX_MMAP_LOCK_H 3#define _LINUX_MMAP_LOCK_H 4 5/* Avoid a dependency loop by declaring here. */ 6extern int rcuwait_wake_up(struct rcuwait *w); 7 8#include <linux/lockdep.h> 9#include <linux/mm_types.h> 10#include <linux/mmdebug.h> 11#include <linux/rwsem.h> 12#include <linux/tracepoint-defs.h> 13#include <linux/types.h> 14#include <linux/cleanup.h> 15#include <linux/sched/mm.h> 16 17#define MMAP_LOCK_INITIALIZER(name) \ 18 .mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock), 19 20DECLARE_TRACEPOINT(mmap_lock_start_locking); 21DECLARE_TRACEPOINT(mmap_lock_acquire_returned); 22DECLARE_TRACEPOINT(mmap_lock_released); 23 24#ifdef CONFIG_TRACING 25 26void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write); 27void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write, 28 bool success); 29void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write); 30 31static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm, 32 bool write) 33{ 34 if (tracepoint_enabled(mmap_lock_start_locking)) 35 __mmap_lock_do_trace_start_locking(mm, write); 36} 37 38static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm, 39 bool write, bool success) 40{ 41 if (tracepoint_enabled(mmap_lock_acquire_returned)) 42 __mmap_lock_do_trace_acquire_returned(mm, write, success); 43} 44 45static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write) 46{ 47 if (tracepoint_enabled(mmap_lock_released)) 48 __mmap_lock_do_trace_released(mm, write); 49} 50 51#else /* !CONFIG_TRACING */ 52 53static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm, 54 bool write) 55{ 56} 57 58static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm, 59 bool write, bool success) 60{ 61} 62 63static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write) 64{ 65} 66 67#endif /* CONFIG_TRACING */ 68 69static inline void mmap_assert_locked(const struct mm_struct *mm) 70{ 71 rwsem_assert_held(&mm->mmap_lock); 72} 73 74static inline void mmap_assert_write_locked(const struct mm_struct *mm) 75{ 76 rwsem_assert_held_write(&mm->mmap_lock); 77} 78 79#ifdef CONFIG_PER_VMA_LOCK 80 81#ifdef CONFIG_LOCKDEP 82#define __vma_lockdep_map(vma) (&vma->vmlock_dep_map) 83#else 84#define __vma_lockdep_map(vma) NULL 85#endif 86 87/* 88 * VMA locks do not behave like most ordinary locks found in the kernel, so we 89 * cannot quite have full lockdep tracking in the way we would ideally prefer. 90 * 91 * Read locks act as shared locks which exclude an exclusive lock being 92 * taken. We therefore mark these accordingly on read lock acquire/release. 93 * 94 * Write locks are acquired exclusively per-VMA, but released in a shared 95 * fashion, that is upon vma_end_write_all(), we update the mmap's seqcount such 96 * that write lock is released. 97 * 98 * We therefore cannot track write locks per-VMA, nor do we try. Mitigating this 99 * is the fact that, of course, we do lockdep-track the mmap lock rwsem which 100 * must be held when taking a VMA write lock. 101 * 102 * We do, however, want to indicate that during either acquisition of a VMA 103 * write lock or detachment of a VMA that we require the lock held be exclusive, 104 * so we utilise lockdep to do so. 105 */ 106#define __vma_lockdep_acquire_read(vma) \ 107 lock_acquire_shared(__vma_lockdep_map(vma), 0, 1, NULL, _RET_IP_) 108#define __vma_lockdep_release_read(vma) \ 109 lock_release(__vma_lockdep_map(vma), _RET_IP_) 110#define __vma_lockdep_acquire_exclusive(vma) \ 111 lock_acquire_exclusive(__vma_lockdep_map(vma), 0, 0, NULL, _RET_IP_) 112#define __vma_lockdep_release_exclusive(vma) \ 113 lock_release(__vma_lockdep_map(vma), _RET_IP_) 114/* Only meaningful if CONFIG_LOCK_STAT is defined. */ 115#define __vma_lockdep_stat_mark_acquired(vma) \ 116 lock_acquired(__vma_lockdep_map(vma), _RET_IP_) 117 118static inline void mm_lock_seqcount_init(struct mm_struct *mm) 119{ 120 seqcount_init(&mm->mm_lock_seq); 121} 122 123static inline void mm_lock_seqcount_begin(struct mm_struct *mm) 124{ 125 do_raw_write_seqcount_begin(&mm->mm_lock_seq); 126} 127 128static inline void mm_lock_seqcount_end(struct mm_struct *mm) 129{ 130 ASSERT_EXCLUSIVE_WRITER(mm->mm_lock_seq); 131 do_raw_write_seqcount_end(&mm->mm_lock_seq); 132} 133 134static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq) 135{ 136 /* 137 * Since mmap_lock is a sleeping lock, and waiting for it to become 138 * unlocked is more or less equivalent with taking it ourselves, don't 139 * bother with the speculative path if mmap_lock is already write-locked 140 * and take the slow path, which takes the lock. 141 */ 142 return raw_seqcount_try_begin(&mm->mm_lock_seq, *seq); 143} 144 145static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq) 146{ 147 return read_seqcount_retry(&mm->mm_lock_seq, seq); 148} 149 150static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) 151{ 152#ifdef CONFIG_DEBUG_LOCK_ALLOC 153 static struct lock_class_key lockdep_key; 154 155 lockdep_init_map(__vma_lockdep_map(vma), "vm_lock", &lockdep_key, 0); 156#endif 157 if (reset_refcnt) 158 refcount_set(&vma->vm_refcnt, 0); 159 vma->vm_lock_seq = UINT_MAX; 160} 161 162/* 163 * This function determines whether the input VMA reference count describes a 164 * VMA which has excluded all VMA read locks. 165 * 166 * In the case of a detached VMA, we may incorrectly indicate that readers are 167 * excluded when one remains, because in that scenario we target a refcount of 168 * VM_REFCNT_EXCLUDE_READERS_FLAG, rather than the attached target of 169 * VM_REFCNT_EXCLUDE_READERS_FLAG + 1. 170 * 171 * However, the race window for that is very small so it is unlikely. 172 * 173 * Returns: true if readers are excluded, false otherwise. 174 */ 175static inline bool __vma_are_readers_excluded(int refcnt) 176{ 177 /* 178 * See the comment describing the vm_area_struct->vm_refcnt field for 179 * details of possible refcnt values. 180 */ 181 return (refcnt & VM_REFCNT_EXCLUDE_READERS_FLAG) && 182 refcnt <= VM_REFCNT_EXCLUDE_READERS_FLAG + 1; 183} 184 185/* 186 * Actually decrement the VMA reference count. 187 * 188 * The function returns the reference count as it was immediately after the 189 * decrement took place. If it returns zero, the VMA is now detached. 190 */ 191static inline __must_check unsigned int 192__vma_refcount_put_return(struct vm_area_struct *vma) 193{ 194 int oldcnt; 195 196 if (__refcount_dec_and_test(&vma->vm_refcnt, &oldcnt)) 197 return 0; 198 199 return oldcnt - 1; 200} 201 202/** 203 * vma_refcount_put() - Drop reference count in VMA vm_refcnt field due to a 204 * read-lock being dropped. 205 * @vma: The VMA whose reference count we wish to decrement. 206 * 207 * If we were the last reader, wake up threads waiting to obtain an exclusive 208 * lock. 209 */ 210static inline void vma_refcount_put(struct vm_area_struct *vma) 211{ 212 /* Use a copy of vm_mm in case vma is freed after we drop vm_refcnt. */ 213 struct mm_struct *mm = vma->vm_mm; 214 int newcnt; 215 216 __vma_lockdep_release_read(vma); 217 newcnt = __vma_refcount_put_return(vma); 218 219 /* 220 * __vma_start_exclude_readers() may be sleeping waiting for readers to 221 * drop their reference count, so wake it up if we were the last reader 222 * blocking it from being acquired. 223 * 224 * We may be raced by other readers temporarily incrementing the 225 * reference count, though the race window is very small, this might 226 * cause spurious wakeups. 227 */ 228 if (newcnt && __vma_are_readers_excluded(newcnt)) 229 rcuwait_wake_up(&mm->vma_writer_wait); 230} 231 232/* 233 * Use only while holding mmap read lock which guarantees that locking will not 234 * fail (nobody can concurrently write-lock the vma). vma_start_read() should 235 * not be used in such cases because it might fail due to mm_lock_seq overflow. 236 * This functionality is used to obtain vma read lock and drop the mmap read lock. 237 */ 238static inline bool vma_start_read_locked_nested(struct vm_area_struct *vma, int subclass) 239{ 240 int oldcnt; 241 242 mmap_assert_locked(vma->vm_mm); 243 if (unlikely(!__refcount_inc_not_zero_limited_acquire(&vma->vm_refcnt, &oldcnt, 244 VM_REFCNT_LIMIT))) 245 return false; 246 247 __vma_lockdep_acquire_read(vma); 248 return true; 249} 250 251/* 252 * Use only while holding mmap read lock which guarantees that locking will not 253 * fail (nobody can concurrently write-lock the vma). vma_start_read() should 254 * not be used in such cases because it might fail due to mm_lock_seq overflow. 255 * This functionality is used to obtain vma read lock and drop the mmap read lock. 256 */ 257static inline bool vma_start_read_locked(struct vm_area_struct *vma) 258{ 259 return vma_start_read_locked_nested(vma, 0); 260} 261 262static inline void vma_end_read(struct vm_area_struct *vma) 263{ 264 vma_refcount_put(vma); 265} 266 267static inline unsigned int __vma_raw_mm_seqnum(struct vm_area_struct *vma) 268{ 269 const struct mm_struct *mm = vma->vm_mm; 270 271 /* We must hold an exclusive write lock for this access to be valid. */ 272 mmap_assert_write_locked(vma->vm_mm); 273 return mm->mm_lock_seq.sequence; 274} 275 276/* 277 * Determine whether a VMA is write-locked. Must be invoked ONLY if the mmap 278 * write lock is held. 279 * 280 * Returns true if write-locked, otherwise false. 281 */ 282static inline bool __is_vma_write_locked(struct vm_area_struct *vma) 283{ 284 /* 285 * current task is holding mmap_write_lock, both vma->vm_lock_seq and 286 * mm->mm_lock_seq can't be concurrently modified. 287 */ 288 return vma->vm_lock_seq == __vma_raw_mm_seqnum(vma); 289} 290 291int __vma_start_write(struct vm_area_struct *vma, int state); 292 293/* 294 * Begin writing to a VMA. 295 * Exclude concurrent readers under the per-VMA lock until the currently 296 * write-locked mmap_lock is dropped or downgraded. 297 */ 298static inline void vma_start_write(struct vm_area_struct *vma) 299{ 300 if (__is_vma_write_locked(vma)) 301 return; 302 303 __vma_start_write(vma, TASK_UNINTERRUPTIBLE); 304} 305 306/** 307 * vma_start_write_killable - Begin writing to a VMA. 308 * @vma: The VMA we are going to modify. 309 * 310 * Exclude concurrent readers under the per-VMA lock until the currently 311 * write-locked mmap_lock is dropped or downgraded. 312 * 313 * Context: May sleep while waiting for readers to drop the vma read lock. 314 * Caller must already hold the mmap_lock for write. 315 * 316 * Return: 0 for a successful acquisition. -EINTR if a fatal signal was 317 * received. 318 */ 319static inline __must_check 320int vma_start_write_killable(struct vm_area_struct *vma) 321{ 322 if (__is_vma_write_locked(vma)) 323 return 0; 324 325 return __vma_start_write(vma, TASK_KILLABLE); 326} 327 328/** 329 * vma_assert_write_locked() - assert that @vma holds a VMA write lock. 330 * @vma: The VMA to assert. 331 */ 332static inline void vma_assert_write_locked(struct vm_area_struct *vma) 333{ 334 VM_WARN_ON_ONCE_VMA(!__is_vma_write_locked(vma), vma); 335} 336 337/** 338 * vma_assert_locked() - assert that @vma holds either a VMA read or a VMA write 339 * lock and is not detached. 340 * @vma: The VMA to assert. 341 */ 342static inline void vma_assert_locked(struct vm_area_struct *vma) 343{ 344 unsigned int refcnt; 345 346 if (IS_ENABLED(CONFIG_LOCKDEP)) { 347 if (!lock_is_held(__vma_lockdep_map(vma))) 348 vma_assert_write_locked(vma); 349 return; 350 } 351 352 /* 353 * See the comment describing the vm_area_struct->vm_refcnt field for 354 * details of possible refcnt values. 355 */ 356 refcnt = refcount_read(&vma->vm_refcnt); 357 358 /* 359 * In this case we're either read-locked, write-locked with temporary 360 * readers, or in the midst of excluding readers, all of which means 361 * we're locked. 362 */ 363 if (refcnt > 1) 364 return; 365 366 /* It is a bug for the VMA to be detached here. */ 367 VM_WARN_ON_ONCE_VMA(!refcnt, vma); 368 369 /* 370 * OK, the VMA has a reference count of 1 which means it is either 371 * unlocked and attached or write-locked, so assert that it is 372 * write-locked. 373 */ 374 vma_assert_write_locked(vma); 375} 376 377/** 378 * vma_assert_stabilised() - assert that this VMA cannot be changed from 379 * underneath us either by having a VMA or mmap lock held. 380 * @vma: The VMA whose stability we wish to assess. 381 * 382 * If lockdep is enabled we can precisely ensure stability via either an mmap 383 * lock owned by us or a specific VMA lock. 384 * 385 * With lockdep disabled we may sometimes race with other threads acquiring the 386 * mmap read lock simultaneous with our VMA read lock. 387 */ 388static inline void vma_assert_stabilised(struct vm_area_struct *vma) 389{ 390 /* 391 * If another thread owns an mmap lock, it may go away at any time, and 392 * thus is no guarantee of stability. 393 * 394 * If lockdep is enabled we can accurately determine if an mmap lock is 395 * held and owned by us. Otherwise we must approximate. 396 * 397 * It doesn't necessarily mean we are not stabilised however, as we may 398 * hold a VMA read lock (not a write lock as this would require an owned 399 * mmap lock). 400 * 401 * If (assuming lockdep is not enabled) we were to assert a VMA read 402 * lock first we may also run into issues, as other threads can hold VMA 403 * read locks simlutaneous to us. 404 * 405 * Therefore if lockdep is not enabled we risk a false negative (i.e. no 406 * assert fired). If accurate checking is required, enable lockdep. 407 */ 408 if (IS_ENABLED(CONFIG_LOCKDEP)) { 409 if (lockdep_is_held(&vma->vm_mm->mmap_lock)) 410 return; 411 } else { 412 if (rwsem_is_locked(&vma->vm_mm->mmap_lock)) 413 return; 414 } 415 416 /* 417 * We're not stabilised by the mmap lock, so assert that we're 418 * stabilised by a VMA lock. 419 */ 420 vma_assert_locked(vma); 421} 422 423static inline bool vma_is_attached(struct vm_area_struct *vma) 424{ 425 return refcount_read(&vma->vm_refcnt); 426} 427 428/* 429 * WARNING: to avoid racing with vma_mark_attached()/vma_mark_detached(), these 430 * assertions should be made either under mmap_write_lock or when the object 431 * has been isolated under mmap_write_lock, ensuring no competing writers. 432 */ 433static inline void vma_assert_attached(struct vm_area_struct *vma) 434{ 435 WARN_ON_ONCE(!vma_is_attached(vma)); 436} 437 438static inline void vma_assert_detached(struct vm_area_struct *vma) 439{ 440 WARN_ON_ONCE(vma_is_attached(vma)); 441} 442 443static inline void vma_mark_attached(struct vm_area_struct *vma) 444{ 445 vma_assert_write_locked(vma); 446 vma_assert_detached(vma); 447 refcount_set_release(&vma->vm_refcnt, 1); 448} 449 450void __vma_exclude_readers_for_detach(struct vm_area_struct *vma); 451 452static inline void vma_mark_detached(struct vm_area_struct *vma) 453{ 454 vma_assert_write_locked(vma); 455 vma_assert_attached(vma); 456 457 /* 458 * The VMA still being attached (refcnt > 0) - is unlikely, because the 459 * vma has been already write-locked and readers can increment vm_refcnt 460 * only temporarily before they check vm_lock_seq, realize the vma is 461 * locked and drop back the vm_refcnt. That is a narrow window for 462 * observing a raised vm_refcnt. 463 * 464 * See the comment describing the vm_area_struct->vm_refcnt field for 465 * details of possible refcnt values. 466 */ 467 if (likely(!__vma_refcount_put_return(vma))) 468 return; 469 470 __vma_exclude_readers_for_detach(vma); 471} 472 473struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, 474 unsigned long address); 475 476/* 477 * Locks next vma pointed by the iterator. Confirms the locked vma has not 478 * been modified and will retry under mmap_lock protection if modification 479 * was detected. Should be called from read RCU section. 480 * Returns either a valid locked VMA, NULL if no more VMAs or -EINTR if the 481 * process was interrupted. 482 */ 483struct vm_area_struct *lock_next_vma(struct mm_struct *mm, 484 struct vma_iterator *iter, 485 unsigned long address); 486 487#else /* CONFIG_PER_VMA_LOCK */ 488 489static inline void mm_lock_seqcount_init(struct mm_struct *mm) {} 490static inline void mm_lock_seqcount_begin(struct mm_struct *mm) {} 491static inline void mm_lock_seqcount_end(struct mm_struct *mm) {} 492 493static inline bool mmap_lock_speculate_try_begin(struct mm_struct *mm, unsigned int *seq) 494{ 495 return false; 496} 497 498static inline bool mmap_lock_speculate_retry(struct mm_struct *mm, unsigned int seq) 499{ 500 return true; 501} 502static inline void vma_lock_init(struct vm_area_struct *vma, bool reset_refcnt) {} 503static inline void vma_end_read(struct vm_area_struct *vma) {} 504static inline void vma_start_write(struct vm_area_struct *vma) {} 505static inline __must_check 506int vma_start_write_killable(struct vm_area_struct *vma) { return 0; } 507static inline void vma_assert_write_locked(struct vm_area_struct *vma) 508 { mmap_assert_write_locked(vma->vm_mm); } 509static inline void vma_assert_attached(struct vm_area_struct *vma) {} 510static inline void vma_assert_detached(struct vm_area_struct *vma) {} 511static inline void vma_mark_attached(struct vm_area_struct *vma) {} 512static inline void vma_mark_detached(struct vm_area_struct *vma) {} 513 514static inline struct vm_area_struct *lock_vma_under_rcu(struct mm_struct *mm, 515 unsigned long address) 516{ 517 return NULL; 518} 519 520static inline void vma_assert_locked(struct vm_area_struct *vma) 521{ 522 mmap_assert_locked(vma->vm_mm); 523} 524 525static inline void vma_assert_stabilised(struct vm_area_struct *vma) 526{ 527 /* If no VMA locks, then either mmap lock suffices to stabilise. */ 528 mmap_assert_locked(vma->vm_mm); 529} 530 531#endif /* CONFIG_PER_VMA_LOCK */ 532 533static inline void mmap_write_lock(struct mm_struct *mm) 534{ 535 __mmap_lock_trace_start_locking(mm, true); 536 down_write(&mm->mmap_lock); 537 mm_lock_seqcount_begin(mm); 538 __mmap_lock_trace_acquire_returned(mm, true, true); 539} 540 541static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass) 542{ 543 __mmap_lock_trace_start_locking(mm, true); 544 down_write_nested(&mm->mmap_lock, subclass); 545 mm_lock_seqcount_begin(mm); 546 __mmap_lock_trace_acquire_returned(mm, true, true); 547} 548 549static inline int __must_check mmap_write_lock_killable(struct mm_struct *mm) 550{ 551 int ret; 552 553 __mmap_lock_trace_start_locking(mm, true); 554 ret = down_write_killable(&mm->mmap_lock); 555 if (!ret) 556 mm_lock_seqcount_begin(mm); 557 __mmap_lock_trace_acquire_returned(mm, true, ret == 0); 558 return ret; 559} 560 561/* 562 * Drop all currently-held per-VMA locks. 563 * This is called from the mmap_lock implementation directly before releasing 564 * a write-locked mmap_lock (or downgrading it to read-locked). 565 * This should normally NOT be called manually from other places. 566 * If you want to call this manually anyway, keep in mind that this will release 567 * *all* VMA write locks, including ones from further up the stack. 568 */ 569static inline void vma_end_write_all(struct mm_struct *mm) 570{ 571 mmap_assert_write_locked(mm); 572 mm_lock_seqcount_end(mm); 573} 574 575static inline void mmap_write_unlock(struct mm_struct *mm) 576{ 577 __mmap_lock_trace_released(mm, true); 578 vma_end_write_all(mm); 579 up_write(&mm->mmap_lock); 580} 581 582static inline void mmap_write_downgrade(struct mm_struct *mm) 583{ 584 __mmap_lock_trace_acquire_returned(mm, false, true); 585 vma_end_write_all(mm); 586 downgrade_write(&mm->mmap_lock); 587} 588 589static inline void mmap_read_lock(struct mm_struct *mm) 590{ 591 __mmap_lock_trace_start_locking(mm, false); 592 down_read(&mm->mmap_lock); 593 __mmap_lock_trace_acquire_returned(mm, false, true); 594} 595 596static inline int __must_check mmap_read_lock_killable(struct mm_struct *mm) 597{ 598 int ret; 599 600 __mmap_lock_trace_start_locking(mm, false); 601 ret = down_read_killable(&mm->mmap_lock); 602 __mmap_lock_trace_acquire_returned(mm, false, ret == 0); 603 return ret; 604} 605 606static inline bool __must_check mmap_read_trylock(struct mm_struct *mm) 607{ 608 bool ret; 609 610 __mmap_lock_trace_start_locking(mm, false); 611 ret = down_read_trylock(&mm->mmap_lock) != 0; 612 __mmap_lock_trace_acquire_returned(mm, false, ret); 613 return ret; 614} 615 616static inline void mmap_read_unlock(struct mm_struct *mm) 617{ 618 __mmap_lock_trace_released(mm, false); 619 up_read(&mm->mmap_lock); 620} 621 622DEFINE_GUARD(mmap_read_lock, struct mm_struct *, 623 mmap_read_lock(_T), mmap_read_unlock(_T)) 624 625static inline void mmap_read_unlock_non_owner(struct mm_struct *mm) 626{ 627 __mmap_lock_trace_released(mm, false); 628 up_read_non_owner(&mm->mmap_lock); 629} 630 631static inline int mmap_lock_is_contended(struct mm_struct *mm) 632{ 633 return rwsem_is_contended(&mm->mmap_lock); 634} 635 636#endif /* _LINUX_MMAP_LOCK_H */