Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'dlm-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm

Pull dlm updates from David Teigland:
"This set includes the normal collection of minor fixes and cleanups,
new kmem caches for network messaging structs, a start on some basic
tracepoints, and some new debugfs files for inserting test messages"

* tag 'dlm-5.17' of git://git.kernel.org/pub/scm/linux/kernel/git/teigland/linux-dlm: (32 commits)
fs: dlm: print cluster addr if non-cluster node connects
fs: dlm: memory cache for lowcomms hotpath
fs: dlm: memory cache for writequeue_entry
fs: dlm: memory cache for midcomms hotpath
fs: dlm: remove wq_alloc mutex
fs: dlm: use event based wait for pending remove
fs: dlm: check for pending users filling buffers
fs: dlm: use list_empty() to check last iteration
fs: dlm: fix build with CONFIG_IPV6 disabled
fs: dlm: replace use of socket sk_callback_lock with sock_lock
fs: dlm: don't call kernel_getpeername() in error_report()
fs: dlm: fix potential buffer overflow
fs: dlm:Remove unneeded semicolon
fs: dlm: remove double list_first_entry call
fs: dlm: filter user dlm messages for kernel locks
fs: dlm: add lkb waiters debugfs functionality
fs: dlm: add lkb debugfs functionality
fs: dlm: allow create lkb with specific id range
fs: dlm: add debugfs rawmsg send functionality
fs: dlm: let handle callback data as void
...

+770 -173
+12 -4
fs/dlm/ast.c
··· 9 9 ******************************************************************************* 10 10 ******************************************************************************/ 11 11 12 + #include <trace/events/dlm.h> 13 + 12 14 #include "dlm_internal.h" 13 15 #include "lock.h" 14 16 #include "user.h" ··· 256 254 continue; 257 255 } else if (callbacks[i].flags & DLM_CB_BAST) { 258 256 bastfn(lkb->lkb_astparam, callbacks[i].mode); 257 + trace_dlm_bast(ls, lkb, callbacks[i].mode); 259 258 } else if (callbacks[i].flags & DLM_CB_CAST) { 260 259 lkb->lkb_lksb->sb_status = callbacks[i].sb_status; 261 260 lkb->lkb_lksb->sb_flags = callbacks[i].sb_flags; 262 261 castfn(lkb->lkb_astparam); 262 + trace_dlm_ast(ls, lkb, lkb->lkb_lksb); 263 263 } 264 264 } 265 265 ··· 299 295 void dlm_callback_resume(struct dlm_ls *ls) 300 296 { 301 297 struct dlm_lkb *lkb, *safe; 302 - int count = 0; 298 + int count = 0, sum = 0; 299 + bool empty; 303 300 304 301 clear_bit(LSFL_CB_DELAY, &ls->ls_flags); 305 302 ··· 316 311 if (count == MAX_CB_QUEUE) 317 312 break; 318 313 } 314 + empty = list_empty(&ls->ls_cb_delay); 319 315 mutex_unlock(&ls->ls_cb_mutex); 320 316 321 - if (count) 322 - log_rinfo(ls, "dlm_callback_resume %d", count); 323 - if (count == MAX_CB_QUEUE) { 317 + sum += count; 318 + if (!empty) { 324 319 count = 0; 325 320 cond_resched(); 326 321 goto more; 327 322 } 323 + 324 + if (sum) 325 + log_rinfo(ls, "%s %d", __func__, sum); 328 326 } 329 327
+94 -2
fs/dlm/debug_fs.c
··· 635 635 return 0; 636 636 } 637 637 638 + static ssize_t table_write2(struct file *file, const char __user *user_buf, 639 + size_t count, loff_t *ppos) 640 + { 641 + struct seq_file *seq = file->private_data; 642 + int n, len, lkb_nodeid, lkb_status, error; 643 + char name[DLM_RESNAME_MAXLEN + 1] = {}; 644 + struct dlm_ls *ls = seq->private; 645 + unsigned int lkb_flags; 646 + char buf[256] = {}; 647 + uint32_t lkb_id; 648 + 649 + if (copy_from_user(buf, user_buf, 650 + min_t(size_t, sizeof(buf) - 1, count))) 651 + return -EFAULT; 652 + 653 + n = sscanf(buf, "%x %" __stringify(DLM_RESNAME_MAXLEN) "s %x %d %d", 654 + &lkb_id, name, &lkb_flags, &lkb_nodeid, &lkb_status); 655 + if (n != 5) 656 + return -EINVAL; 657 + 658 + len = strnlen(name, DLM_RESNAME_MAXLEN); 659 + error = dlm_debug_add_lkb(ls, lkb_id, name, len, lkb_flags, 660 + lkb_nodeid, lkb_status); 661 + if (error) 662 + return error; 663 + 664 + return count; 665 + } 666 + 638 667 static int table_open3(struct inode *inode, struct file *file) 639 668 { 640 669 struct seq_file *seq; ··· 704 675 .owner = THIS_MODULE, 705 676 .open = table_open2, 706 677 .read = seq_read, 678 + .write = table_write2, 707 679 .llseek = seq_lseek, 708 680 .release = seq_release 709 681 }; ··· 754 724 return rv; 755 725 } 756 726 727 + static ssize_t waiters_write(struct file *file, const char __user *user_buf, 728 + size_t count, loff_t *ppos) 729 + { 730 + struct dlm_ls *ls = file->private_data; 731 + int mstype, to_nodeid; 732 + char buf[128] = {}; 733 + uint32_t lkb_id; 734 + int n, error; 735 + 736 + if (copy_from_user(buf, user_buf, 737 + min_t(size_t, sizeof(buf) - 1, count))) 738 + return -EFAULT; 739 + 740 + n = sscanf(buf, "%x %d %d", &lkb_id, &mstype, &to_nodeid); 741 + if (n != 3) 742 + return -EINVAL; 743 + 744 + error = dlm_debug_add_lkb_to_waiters(ls, lkb_id, mstype, to_nodeid); 745 + if (error) 746 + return error; 747 + 748 + return count; 749 + } 750 + 757 751 static const struct file_operations waiters_fops = { 758 752 .owner = THIS_MODULE, 759 753 .open = simple_open, 760 754 .read = waiters_read, 755 + .write = waiters_write, 761 756 .llseek = default_llseek, 762 757 }; 763 758 ··· 823 768 } 824 769 DEFINE_SHOW_ATTRIBUTE(dlm_version); 825 770 771 + static ssize_t dlm_rawmsg_write(struct file *fp, const char __user *user_buf, 772 + size_t count, loff_t *ppos) 773 + { 774 + void *buf; 775 + int ret; 776 + 777 + if (count > PAGE_SIZE || count < sizeof(struct dlm_header)) 778 + return -EINVAL; 779 + 780 + buf = kmalloc(PAGE_SIZE, GFP_NOFS); 781 + if (!buf) 782 + return -ENOMEM; 783 + 784 + if (copy_from_user(buf, user_buf, count)) { 785 + ret = -EFAULT; 786 + goto out; 787 + } 788 + 789 + ret = dlm_midcomms_rawmsg_send(fp->private_data, buf, count); 790 + if (ret) 791 + goto out; 792 + 793 + kfree(buf); 794 + return count; 795 + 796 + out: 797 + kfree(buf); 798 + return ret; 799 + } 800 + 801 + static const struct file_operations dlm_rawmsg_fops = { 802 + .open = simple_open, 803 + .write = dlm_rawmsg_write, 804 + .llseek = no_llseek, 805 + }; 806 + 826 807 void *dlm_create_debug_comms_file(int nodeid, void *data) 827 808 { 828 809 struct dentry *d_node; ··· 873 782 debugfs_create_file("send_queue_count", 0444, d_node, data, 874 783 &dlm_send_queue_cnt_fops); 875 784 debugfs_create_file("version", 0444, d_node, data, &dlm_version_fops); 785 + debugfs_create_file("rawmsg", 0200, d_node, data, &dlm_rawmsg_fops); 876 786 877 787 return d_node; 878 788 } ··· 901 809 snprintf(name, DLM_LOCKSPACE_LEN + 8, "%s_locks", ls->ls_name); 902 810 903 811 ls->ls_debug_locks_dentry = debugfs_create_file(name, 904 - S_IFREG | S_IRUGO, 812 + 0644, 905 813 dlm_root, 906 814 ls, 907 815 &format2_fops); ··· 932 840 snprintf(name, DLM_LOCKSPACE_LEN + 8, "%s_waiters", ls->ls_name); 933 841 934 842 ls->ls_debug_waiters_dentry = debugfs_create_file(name, 935 - S_IFREG | S_IRUGO, 843 + 0644, 936 844 dlm_root, 937 845 ls, 938 846 &waiters_fops);
+1 -2
fs/dlm/dir.c
··· 84 84 85 85 for (;;) { 86 86 int left; 87 - error = dlm_recovery_stopped(ls); 88 - if (error) { 87 + if (dlm_recovery_stopped(ls)) { 89 88 error = -EINTR; 90 89 goto out_free; 91 90 }
+5 -7
fs/dlm/dlm_internal.h
··· 41 41 #include <linux/dlm.h> 42 42 #include "config.h" 43 43 44 - /* Size of the temp buffer midcomms allocates on the stack. 45 - We try to make this large enough so most messages fit. 46 - FIXME: should sctp make this unnecessary? */ 47 - 48 - #define DLM_INBUF_LEN 148 49 - 50 44 struct dlm_ls; 51 45 struct dlm_lkb; 52 46 struct dlm_rsb; ··· 548 554 uint32_t ls_generation; 549 555 uint32_t ls_exflags; 550 556 int ls_lvblen; 551 - int ls_count; /* refcount of processes in 557 + atomic_t ls_count; /* refcount of processes in 552 558 the dlm using this ls */ 559 + wait_queue_head_t ls_count_wait; 553 560 int ls_create_count; /* create/release refcount */ 554 561 unsigned long ls_flags; /* LSFL_ */ 555 562 unsigned long ls_scan_time; ··· 576 581 struct list_head ls_new_rsb; /* new rsb structs */ 577 582 578 583 spinlock_t ls_remove_spin; 584 + wait_queue_head_t ls_remove_wait; 579 585 char ls_remove_name[DLM_RESNAME_MAXLEN+1]; 580 586 char *ls_remove_names[DLM_REMOVE_NAMES_MAX]; 581 587 int ls_remove_len; ··· 628 632 struct rw_semaphore ls_in_recovery; /* block local requests */ 629 633 struct rw_semaphore ls_recv_active; /* block dlm_recv */ 630 634 struct list_head ls_requestqueue;/* queue remote requests */ 635 + atomic_t ls_requestqueue_cnt; 636 + wait_queue_head_t ls_requestqueue_wait; 631 637 struct mutex ls_requestqueue_mutex; 632 638 struct dlm_rcom *ls_recover_buf; 633 639 int ls_recover_nodeid; /* for debugging */
+100 -9
fs/dlm/lock.c
··· 53 53 R: do_xxxx() 54 54 L: receive_xxxx_reply() <- R: send_xxxx_reply() 55 55 */ 56 + #include <trace/events/dlm.h> 57 + 56 58 #include <linux/types.h> 57 59 #include <linux/rbtree.h> 58 60 #include <linux/slab.h> ··· 1180 1178 } 1181 1179 } 1182 1180 1183 - static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) 1181 + static int _create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret, 1182 + int start, int end) 1184 1183 { 1185 1184 struct dlm_lkb *lkb; 1186 1185 int rv; ··· 1202 1199 1203 1200 idr_preload(GFP_NOFS); 1204 1201 spin_lock(&ls->ls_lkbidr_spin); 1205 - rv = idr_alloc(&ls->ls_lkbidr, lkb, 1, 0, GFP_NOWAIT); 1202 + rv = idr_alloc(&ls->ls_lkbidr, lkb, start, end, GFP_NOWAIT); 1206 1203 if (rv >= 0) 1207 1204 lkb->lkb_id = rv; 1208 1205 spin_unlock(&ls->ls_lkbidr_spin); ··· 1216 1213 1217 1214 *lkb_ret = lkb; 1218 1215 return 0; 1216 + } 1217 + 1218 + static int create_lkb(struct dlm_ls *ls, struct dlm_lkb **lkb_ret) 1219 + { 1220 + return _create_lkb(ls, lkb_ret, 1, 0); 1219 1221 } 1220 1222 1221 1223 static int find_lkb(struct dlm_ls *ls, uint32_t lkid, struct dlm_lkb **lkb_ret) ··· 1626 1618 } 1627 1619 1628 1620 /* If there's an rsb for the same resource being removed, ensure 1629 - that the remove message is sent before the new lookup message. 1630 - It should be rare to need a delay here, but if not, then it may 1631 - be worthwhile to add a proper wait mechanism rather than a delay. */ 1621 + * that the remove message is sent before the new lookup message. 1622 + */ 1623 + 1624 + #define DLM_WAIT_PENDING_COND(ls, r) \ 1625 + (ls->ls_remove_len && \ 1626 + !rsb_cmp(r, ls->ls_remove_name, \ 1627 + ls->ls_remove_len)) 1632 1628 1633 1629 static void wait_pending_remove(struct dlm_rsb *r) 1634 1630 { 1635 1631 struct dlm_ls *ls = r->res_ls; 1636 1632 restart: 1637 1633 spin_lock(&ls->ls_remove_spin); 1638 - if (ls->ls_remove_len && 1639 - !rsb_cmp(r, ls->ls_remove_name, ls->ls_remove_len)) { 1634 + if (DLM_WAIT_PENDING_COND(ls, r)) { 1640 1635 log_debug(ls, "delay lookup for remove dir %d %s", 1641 - r->res_dir_nodeid, r->res_name); 1636 + r->res_dir_nodeid, r->res_name); 1642 1637 spin_unlock(&ls->ls_remove_spin); 1643 - msleep(1); 1638 + wait_event(ls->ls_remove_wait, !DLM_WAIT_PENDING_COND(ls, r)); 1644 1639 goto restart; 1645 1640 } 1646 1641 spin_unlock(&ls->ls_remove_spin); ··· 1795 1784 memcpy(ls->ls_remove_name, name, DLM_RESNAME_MAXLEN); 1796 1785 spin_unlock(&ls->ls_remove_spin); 1797 1786 spin_unlock(&ls->ls_rsbtbl[b].lock); 1787 + wake_up(&ls->ls_remove_wait); 1798 1788 1799 1789 send_remove(r); 1800 1790 ··· 3449 3437 if (error) 3450 3438 goto out; 3451 3439 3440 + trace_dlm_lock_start(ls, lkb, mode, flags); 3441 + 3452 3442 error = set_lock_args(mode, lksb, flags, namelen, 0, ast, 3453 3443 astarg, bast, &args); 3454 3444 if (error) ··· 3464 3450 if (error == -EINPROGRESS) 3465 3451 error = 0; 3466 3452 out_put: 3453 + trace_dlm_lock_end(ls, lkb, mode, flags, error); 3454 + 3467 3455 if (convert || error) 3468 3456 __put_lkb(ls, lkb); 3469 3457 if (error == -EAGAIN || error == -EDEADLK) ··· 3497 3481 if (error) 3498 3482 goto out; 3499 3483 3484 + trace_dlm_unlock_start(ls, lkb, flags); 3485 + 3500 3486 error = set_unlock_args(flags, astarg, &args); 3501 3487 if (error) 3502 3488 goto out_put; ··· 3513 3495 if (error == -EBUSY && (flags & (DLM_LKF_CANCEL | DLM_LKF_FORCEUNLOCK))) 3514 3496 error = 0; 3515 3497 out_put: 3498 + trace_dlm_unlock_end(ls, lkb, flags, error); 3499 + 3516 3500 dlm_put_lkb(lkb); 3517 3501 out: 3518 3502 dlm_unlock_recovery(ls); ··· 3993 3973 int from = ms->m_header.h_nodeid; 3994 3974 int error = 0; 3995 3975 3976 + /* currently mixing of user/kernel locks are not supported */ 3977 + if (ms->m_flags & DLM_IFL_USER && ~lkb->lkb_flags & DLM_IFL_USER) { 3978 + log_error(lkb->lkb_resource->res_ls, 3979 + "got user dlm message for a kernel lock"); 3980 + error = -EINVAL; 3981 + goto out; 3982 + } 3983 + 3996 3984 switch (ms->m_type) { 3997 3985 case DLM_MSG_CONVERT: 3998 3986 case DLM_MSG_UNLOCK: ··· 4029 4001 error = -EINVAL; 4030 4002 } 4031 4003 4004 + out: 4032 4005 if (error) 4033 4006 log_error(lkb->lkb_resource->res_ls, 4034 4007 "ignore invalid message %d from %d %x %x %x %d", ··· 4079 4050 memcpy(ls->ls_remove_name, name, DLM_RESNAME_MAXLEN); 4080 4051 spin_unlock(&ls->ls_remove_spin); 4081 4052 spin_unlock(&ls->ls_rsbtbl[b].lock); 4053 + wake_up(&ls->ls_remove_wait); 4082 4054 4083 4055 rv = _create_message(ls, sizeof(struct dlm_message) + len, 4084 4056 dir_nodeid, DLM_MSG_REMOVE, &ms, &mh); ··· 6328 6298 do_purge(ls, nodeid, pid); 6329 6299 dlm_unlock_recovery(ls); 6330 6300 } 6301 + return error; 6302 + } 6303 + 6304 + /* debug functionality */ 6305 + int dlm_debug_add_lkb(struct dlm_ls *ls, uint32_t lkb_id, char *name, int len, 6306 + int lkb_nodeid, unsigned int lkb_flags, int lkb_status) 6307 + { 6308 + struct dlm_lksb *lksb; 6309 + struct dlm_lkb *lkb; 6310 + struct dlm_rsb *r; 6311 + int error; 6312 + 6313 + /* we currently can't set a valid user lock */ 6314 + if (lkb_flags & DLM_IFL_USER) 6315 + return -EOPNOTSUPP; 6316 + 6317 + lksb = kzalloc(sizeof(*lksb), GFP_NOFS); 6318 + if (!lksb) 6319 + return -ENOMEM; 6320 + 6321 + error = _create_lkb(ls, &lkb, lkb_id, lkb_id + 1); 6322 + if (error) { 6323 + kfree(lksb); 6324 + return error; 6325 + } 6326 + 6327 + lkb->lkb_flags = lkb_flags; 6328 + lkb->lkb_nodeid = lkb_nodeid; 6329 + lkb->lkb_lksb = lksb; 6330 + /* user specific pointer, just don't have it NULL for kernel locks */ 6331 + if (~lkb_flags & DLM_IFL_USER) 6332 + lkb->lkb_astparam = (void *)0xDEADBEEF; 6333 + 6334 + error = find_rsb(ls, name, len, 0, R_REQUEST, &r); 6335 + if (error) { 6336 + kfree(lksb); 6337 + __put_lkb(ls, lkb); 6338 + return error; 6339 + } 6340 + 6341 + lock_rsb(r); 6342 + attach_lkb(r, lkb); 6343 + add_lkb(r, lkb, lkb_status); 6344 + unlock_rsb(r); 6345 + put_rsb(r); 6346 + 6347 + return 0; 6348 + } 6349 + 6350 + int dlm_debug_add_lkb_to_waiters(struct dlm_ls *ls, uint32_t lkb_id, 6351 + int mstype, int to_nodeid) 6352 + { 6353 + struct dlm_lkb *lkb; 6354 + int error; 6355 + 6356 + error = find_lkb(ls, lkb_id, &lkb); 6357 + if (error) 6358 + return error; 6359 + 6360 + error = add_to_waiters(lkb, mstype, to_nodeid); 6361 + dlm_put_lkb(lkb); 6331 6362 return error; 6332 6363 } 6333 6364
+4
fs/dlm/lock.h
··· 58 58 int nodeid, int pid); 59 59 int dlm_user_deadlock(struct dlm_ls *ls, uint32_t flags, uint32_t lkid); 60 60 void dlm_clear_proc_locks(struct dlm_ls *ls, struct dlm_user_proc *proc); 61 + int dlm_debug_add_lkb(struct dlm_ls *ls, uint32_t lkb_id, char *name, int len, 62 + int lkb_nodeid, unsigned int lkb_flags, int lkb_status); 63 + int dlm_debug_add_lkb_to_waiters(struct dlm_ls *ls, uint32_t lkb_id, 64 + int mstype, int to_nodeid); 61 65 62 66 static inline int is_master(struct dlm_rsb *r) 63 67 {
+21 -17
fs/dlm/lockspace.c
··· 314 314 315 315 list_for_each_entry(ls, &lslist, ls_list) { 316 316 if (ls->ls_global_id == id) { 317 - ls->ls_count++; 317 + atomic_inc(&ls->ls_count); 318 318 goto out; 319 319 } 320 320 } ··· 331 331 spin_lock(&lslist_lock); 332 332 list_for_each_entry(ls, &lslist, ls_list) { 333 333 if (ls->ls_local_handle == lockspace) { 334 - ls->ls_count++; 334 + atomic_inc(&ls->ls_count); 335 335 goto out; 336 336 } 337 337 } ··· 348 348 spin_lock(&lslist_lock); 349 349 list_for_each_entry(ls, &lslist, ls_list) { 350 350 if (ls->ls_device.minor == minor) { 351 - ls->ls_count++; 351 + atomic_inc(&ls->ls_count); 352 352 goto out; 353 353 } 354 354 } ··· 360 360 361 361 void dlm_put_lockspace(struct dlm_ls *ls) 362 362 { 363 - spin_lock(&lslist_lock); 364 - ls->ls_count--; 365 - spin_unlock(&lslist_lock); 363 + if (atomic_dec_and_test(&ls->ls_count)) 364 + wake_up(&ls->ls_count_wait); 366 365 } 367 366 368 367 static void remove_lockspace(struct dlm_ls *ls) 369 368 { 370 - for (;;) { 371 - spin_lock(&lslist_lock); 372 - if (ls->ls_count == 0) { 373 - WARN_ON(ls->ls_create_count != 0); 374 - list_del(&ls->ls_list); 375 - spin_unlock(&lslist_lock); 376 - return; 377 - } 369 + retry: 370 + wait_event(ls->ls_count_wait, atomic_read(&ls->ls_count) == 0); 371 + 372 + spin_lock(&lslist_lock); 373 + if (atomic_read(&ls->ls_count) != 0) { 378 374 spin_unlock(&lslist_lock); 379 - ssleep(1); 375 + goto retry; 380 376 } 377 + 378 + WARN_ON(ls->ls_create_count != 0); 379 + list_del(&ls->ls_list); 380 + spin_unlock(&lslist_lock); 381 381 } 382 382 383 383 static int threads_start(void) ··· 481 481 memcpy(ls->ls_name, name, namelen); 482 482 ls->ls_namelen = namelen; 483 483 ls->ls_lvblen = lvblen; 484 - ls->ls_count = 0; 484 + atomic_set(&ls->ls_count, 0); 485 + init_waitqueue_head(&ls->ls_count_wait); 485 486 ls->ls_flags = 0; 486 487 ls->ls_scan_time = jiffies; 487 488 ··· 512 511 } 513 512 514 513 spin_lock_init(&ls->ls_remove_spin); 514 + init_waitqueue_head(&ls->ls_remove_wait); 515 515 516 516 for (i = 0; i < DLM_REMOVE_NAMES_MAX; i++) { 517 517 ls->ls_remove_names[i] = kzalloc(DLM_RESNAME_MAXLEN+1, ··· 566 564 init_rwsem(&ls->ls_in_recovery); 567 565 init_rwsem(&ls->ls_recv_active); 568 566 INIT_LIST_HEAD(&ls->ls_requestqueue); 567 + atomic_set(&ls->ls_requestqueue_cnt, 0); 568 + init_waitqueue_head(&ls->ls_requestqueue_wait); 569 569 mutex_init(&ls->ls_requestqueue_mutex); 570 570 mutex_init(&ls->ls_clear_proc_locks); 571 571 ··· 872 868 * until this returns. 873 869 * 874 870 * Force has 4 possible values: 875 - * 0 - don't destroy locksapce if it has any LKBs 871 + * 0 - don't destroy lockspace if it has any LKBs 876 872 * 1 - destroy lockspace if it has remote LKBs but not if it has local LKBs 877 873 * 2 - destroy lockspace regardless of LKBs 878 874 * 3 - destroy lockspace as part of a forced shutdown
+111 -98
fs/dlm/lowcomms.c
··· 53 53 #include <net/sctp/sctp.h> 54 54 #include <net/ipv6.h> 55 55 56 + #include <trace/events/dlm.h> 57 + 56 58 #include "dlm_internal.h" 57 59 #include "lowcomms.h" 58 60 #include "midcomms.h" 61 + #include "memory.h" 59 62 #include "config.h" 60 63 61 64 #define NEEDED_RMEM (4*1024*1024) ··· 87 84 struct list_head writequeue; /* List of outgoing writequeue_entries */ 88 85 spinlock_t writequeue_lock; 89 86 atomic_t writequeue_cnt; 90 - struct mutex wq_alloc; 91 87 int retries; 92 88 #define MAX_CONNECT_RETRIES 3 93 89 struct hlist_node list; ··· 191 189 static void process_recv_sockets(struct work_struct *work); 192 190 static void process_send_sockets(struct work_struct *work); 193 191 192 + static void writequeue_entry_ctor(void *data) 193 + { 194 + struct writequeue_entry *entry = data; 195 + 196 + INIT_LIST_HEAD(&entry->msgs); 197 + } 198 + 199 + struct kmem_cache *dlm_lowcomms_writequeue_cache_create(void) 200 + { 201 + return kmem_cache_create("dlm_writequeue", sizeof(struct writequeue_entry), 202 + 0, 0, writequeue_entry_ctor); 203 + } 204 + 205 + struct kmem_cache *dlm_lowcomms_msg_cache_create(void) 206 + { 207 + return kmem_cache_create("dlm_msg", sizeof(struct dlm_msg), 0, 0, NULL); 208 + } 209 + 194 210 /* need to held writequeue_lock */ 195 211 static struct writequeue_entry *con_next_wq(struct connection *con) 196 212 { ··· 219 199 220 200 e = list_first_entry(&con->writequeue, struct writequeue_entry, 221 201 list); 222 - if (e->len == 0) 202 + /* if len is zero nothing is to send, if there are users filling 203 + * buffers we wait until the users are done so we can send more. 204 + */ 205 + if (e->users || e->len == 0) 223 206 return NULL; 224 207 225 208 return e; ··· 287 264 kfree(con); 288 265 return NULL; 289 266 } 290 - 291 - mutex_init(&con->wq_alloc); 292 267 293 268 spin_lock(&connections_lock); 294 269 /* Because multiple workqueues/threads calls this function it can ··· 507 486 { 508 487 struct connection *con; 509 488 510 - read_lock_bh(&sk->sk_callback_lock); 511 489 con = sock2con(sk); 512 490 if (con && !test_and_set_bit(CF_READ_PENDING, &con->flags)) 513 491 queue_work(recv_workqueue, &con->rwork); 514 - read_unlock_bh(&sk->sk_callback_lock); 515 492 } 516 493 517 494 static void lowcomms_listen_data_ready(struct sock *sk) ··· 524 505 { 525 506 struct connection *con; 526 507 527 - read_lock_bh(&sk->sk_callback_lock); 528 508 con = sock2con(sk); 529 509 if (!con) 530 - goto out; 510 + return; 531 511 532 512 if (!test_and_set_bit(CF_CONNECTED, &con->flags)) { 533 513 log_print("successful connected to node %d", con->nodeid); 534 514 queue_work(send_workqueue, &con->swork); 535 - goto out; 515 + return; 536 516 } 537 517 538 518 clear_bit(SOCK_NOSPACE, &con->sock->flags); ··· 542 524 } 543 525 544 526 queue_work(send_workqueue, &con->swork); 545 - out: 546 - read_unlock_bh(&sk->sk_callback_lock); 547 527 } 548 528 549 529 static inline void lowcomms_connect_sock(struct connection *con) ··· 608 592 static void lowcomms_error_report(struct sock *sk) 609 593 { 610 594 struct connection *con; 611 - struct sockaddr_storage saddr; 612 595 void (*orig_report)(struct sock *) = NULL; 596 + struct inet_sock *inet; 613 597 614 - read_lock_bh(&sk->sk_callback_lock); 615 598 con = sock2con(sk); 616 599 if (con == NULL) 617 600 goto out; 618 601 619 602 orig_report = listen_sock.sk_error_report; 620 - if (kernel_getpeername(sk->sk_socket, (struct sockaddr *)&saddr) < 0) { 621 - printk_ratelimited(KERN_ERR "dlm: node %d: socket error " 622 - "sending to node %d, port %d, " 623 - "sk_err=%d/%d\n", dlm_our_nodeid(), 624 - con->nodeid, dlm_config.ci_tcp_port, 625 - sk->sk_err, sk->sk_err_soft); 626 - } else if (saddr.ss_family == AF_INET) { 627 - struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr; 628 603 604 + inet = inet_sk(sk); 605 + switch (sk->sk_family) { 606 + case AF_INET: 629 607 printk_ratelimited(KERN_ERR "dlm: node %d: socket error " 630 - "sending to node %d at %pI4, port %d, " 608 + "sending to node %d at %pI4, dport %d, " 631 609 "sk_err=%d/%d\n", dlm_our_nodeid(), 632 - con->nodeid, &sin4->sin_addr.s_addr, 633 - dlm_config.ci_tcp_port, sk->sk_err, 610 + con->nodeid, &inet->inet_daddr, 611 + ntohs(inet->inet_dport), sk->sk_err, 634 612 sk->sk_err_soft); 635 - } else { 636 - struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&saddr; 637 - 613 + break; 614 + #if IS_ENABLED(CONFIG_IPV6) 615 + case AF_INET6: 638 616 printk_ratelimited(KERN_ERR "dlm: node %d: socket error " 639 - "sending to node %d at %u.%u.%u.%u, " 640 - "port %d, sk_err=%d/%d\n", dlm_our_nodeid(), 641 - con->nodeid, sin6->sin6_addr.s6_addr32[0], 642 - sin6->sin6_addr.s6_addr32[1], 643 - sin6->sin6_addr.s6_addr32[2], 644 - sin6->sin6_addr.s6_addr32[3], 645 - dlm_config.ci_tcp_port, sk->sk_err, 617 + "sending to node %d at %pI6c, " 618 + "dport %d, sk_err=%d/%d\n", dlm_our_nodeid(), 619 + con->nodeid, &sk->sk_v6_daddr, 620 + ntohs(inet->inet_dport), sk->sk_err, 646 621 sk->sk_err_soft); 622 + break; 623 + #endif 624 + default: 625 + printk_ratelimited(KERN_ERR "dlm: node %d: socket error " 626 + "invalid socket family %d set, " 627 + "sk_err=%d/%d\n", dlm_our_nodeid(), 628 + sk->sk_family, sk->sk_err, sk->sk_err_soft); 629 + goto out; 647 630 } 648 631 649 632 /* below sendcon only handling */ ··· 661 646 queue_work(send_workqueue, &con->swork); 662 647 663 648 out: 664 - read_unlock_bh(&sk->sk_callback_lock); 665 649 if (orig_report) 666 650 orig_report(sk); 667 651 } ··· 680 666 { 681 667 struct sock *sk = sock->sk; 682 668 683 - write_lock_bh(&sk->sk_callback_lock); 669 + lock_sock(sk); 684 670 sk->sk_user_data = NULL; 685 671 sk->sk_data_ready = listen_sock.sk_data_ready; 686 672 sk->sk_state_change = listen_sock.sk_state_change; 687 673 sk->sk_write_space = listen_sock.sk_write_space; 688 674 sk->sk_error_report = listen_sock.sk_error_report; 689 - write_unlock_bh(&sk->sk_callback_lock); 675 + release_sock(sk); 690 676 } 691 677 692 678 static void add_listen_sock(struct socket *sock, struct listen_connection *con) 693 679 { 694 680 struct sock *sk = sock->sk; 695 681 696 - write_lock_bh(&sk->sk_callback_lock); 682 + lock_sock(sk); 697 683 save_listen_callbacks(sock); 698 684 con->sock = sock; 699 685 ··· 701 687 sk->sk_allocation = GFP_NOFS; 702 688 /* Install a data_ready callback */ 703 689 sk->sk_data_ready = lowcomms_listen_data_ready; 704 - write_unlock_bh(&sk->sk_callback_lock); 690 + release_sock(sk); 705 691 } 706 692 707 693 /* Make a socket active */ ··· 709 695 { 710 696 struct sock *sk = sock->sk; 711 697 712 - write_lock_bh(&sk->sk_callback_lock); 698 + lock_sock(sk); 713 699 con->sock = sock; 714 700 715 701 sk->sk_user_data = con; ··· 719 705 sk->sk_state_change = lowcomms_state_change; 720 706 sk->sk_allocation = GFP_NOFS; 721 707 sk->sk_error_report = lowcomms_error_report; 722 - write_unlock_bh(&sk->sk_callback_lock); 708 + release_sock(sk); 723 709 } 724 710 725 711 /* Add the port number to an IPv6 or 4 sockaddr and return the address ··· 747 733 ref); 748 734 749 735 __free_page(e->page); 750 - kfree(e); 736 + dlm_free_writequeue(e); 751 737 } 752 738 753 739 static void dlm_msg_release(struct kref *kref) ··· 755 741 struct dlm_msg *msg = container_of(kref, struct dlm_msg, ref); 756 742 757 743 kref_put(&msg->entry->ref, dlm_page_release); 758 - kfree(msg); 744 + dlm_free_msg(msg); 759 745 } 760 746 761 747 static void free_entry(struct writequeue_entry *e) ··· 939 925 msg.msg_flags = MSG_DONTWAIT | MSG_NOSIGNAL; 940 926 ret = kernel_recvmsg(con->sock, &msg, &iov, 1, iov.iov_len, 941 927 msg.msg_flags); 928 + trace_dlm_recv(con->nodeid, ret); 942 929 if (ret == -EAGAIN) 943 930 break; 944 931 else if (ret <= 0) ··· 1028 1013 /* Get the new node's NODEID */ 1029 1014 make_sockaddr(&peeraddr, 0, &len); 1030 1015 if (addr_to_nodeid(&peeraddr, &nodeid, &mark)) { 1031 - unsigned char *b=(unsigned char *)&peeraddr; 1032 - log_print("connect from non cluster node"); 1033 - print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE, 1034 - b, sizeof(struct sockaddr_storage)); 1016 + switch (peeraddr.ss_family) { 1017 + case AF_INET: { 1018 + struct sockaddr_in *sin = (struct sockaddr_in *)&peeraddr; 1019 + 1020 + log_print("connect from non cluster IPv4 node %pI4", 1021 + &sin->sin_addr); 1022 + break; 1023 + } 1024 + #if IS_ENABLED(CONFIG_IPV6) 1025 + case AF_INET6: { 1026 + struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *)&peeraddr; 1027 + 1028 + log_print("connect from non cluster IPv6 node %pI6c", 1029 + &sin6->sin6_addr); 1030 + break; 1031 + } 1032 + #endif 1033 + default: 1034 + log_print("invalid family from non cluster node"); 1035 + break; 1036 + } 1037 + 1035 1038 sock_release(newsock); 1036 1039 return -1; 1037 1040 } ··· 1210 1177 kfree(dlm_local_addr[i]); 1211 1178 } 1212 1179 1213 - static struct writequeue_entry *new_writequeue_entry(struct connection *con, 1214 - gfp_t allocation) 1180 + static struct writequeue_entry *new_writequeue_entry(struct connection *con) 1215 1181 { 1216 1182 struct writequeue_entry *entry; 1217 1183 1218 - entry = kzalloc(sizeof(*entry), allocation); 1184 + entry = dlm_allocate_writequeue(); 1219 1185 if (!entry) 1220 1186 return NULL; 1221 1187 1222 - entry->page = alloc_page(allocation | __GFP_ZERO); 1188 + entry->page = alloc_page(GFP_ATOMIC | __GFP_ZERO); 1223 1189 if (!entry->page) { 1224 - kfree(entry); 1190 + dlm_free_writequeue(entry); 1225 1191 return NULL; 1226 1192 } 1227 1193 1194 + entry->offset = 0; 1195 + entry->len = 0; 1196 + entry->end = 0; 1197 + entry->dirty = false; 1228 1198 entry->con = con; 1229 1199 entry->users = 1; 1230 1200 kref_init(&entry->ref); 1231 - INIT_LIST_HEAD(&entry->msgs); 1232 - 1233 1201 return entry; 1234 1202 } 1235 1203 1236 1204 static struct writequeue_entry *new_wq_entry(struct connection *con, int len, 1237 - gfp_t allocation, char **ppc, 1238 - void (*cb)(struct dlm_mhandle *mh), 1239 - struct dlm_mhandle *mh) 1205 + char **ppc, void (*cb)(void *data), 1206 + void *data) 1240 1207 { 1241 1208 struct writequeue_entry *e; 1242 1209 ··· 1248 1215 1249 1216 *ppc = page_address(e->page) + e->end; 1250 1217 if (cb) 1251 - cb(mh); 1218 + cb(data); 1252 1219 1253 1220 e->end += len; 1254 1221 e->users++; 1255 - spin_unlock(&con->writequeue_lock); 1256 - 1257 - return e; 1222 + goto out; 1258 1223 } 1259 1224 } 1260 - spin_unlock(&con->writequeue_lock); 1261 1225 1262 - e = new_writequeue_entry(con, allocation); 1226 + e = new_writequeue_entry(con); 1263 1227 if (!e) 1264 - return NULL; 1228 + goto out; 1265 1229 1266 1230 kref_get(&e->ref); 1267 1231 *ppc = page_address(e->page); 1268 1232 e->end += len; 1269 1233 atomic_inc(&con->writequeue_cnt); 1270 - 1271 - spin_lock(&con->writequeue_lock); 1272 1234 if (cb) 1273 - cb(mh); 1235 + cb(data); 1274 1236 1275 1237 list_add_tail(&e->list, &con->writequeue); 1276 - spin_unlock(&con->writequeue_lock); 1277 1238 1239 + out: 1240 + spin_unlock(&con->writequeue_lock); 1278 1241 return e; 1279 1242 }; 1280 1243 1281 1244 static struct dlm_msg *dlm_lowcomms_new_msg_con(struct connection *con, int len, 1282 1245 gfp_t allocation, char **ppc, 1283 - void (*cb)(struct dlm_mhandle *mh), 1284 - struct dlm_mhandle *mh) 1246 + void (*cb)(void *data), 1247 + void *data) 1285 1248 { 1286 1249 struct writequeue_entry *e; 1287 1250 struct dlm_msg *msg; 1288 - bool sleepable; 1289 1251 1290 - msg = kzalloc(sizeof(*msg), allocation); 1252 + msg = dlm_allocate_msg(allocation); 1291 1253 if (!msg) 1292 1254 return NULL; 1293 1255 1294 - /* this mutex is being used as a wait to avoid multiple "fast" 1295 - * new writequeue page list entry allocs in new_wq_entry in 1296 - * normal operation which is sleepable context. Without it 1297 - * we could end in multiple writequeue entries with one 1298 - * dlm message because multiple callers were waiting at 1299 - * the writequeue_lock in new_wq_entry(). 1300 - */ 1301 - sleepable = gfpflags_normal_context(allocation); 1302 - if (sleepable) 1303 - mutex_lock(&con->wq_alloc); 1304 - 1305 1256 kref_init(&msg->ref); 1306 1257 1307 - e = new_wq_entry(con, len, allocation, ppc, cb, mh); 1258 + e = new_wq_entry(con, len, ppc, cb, data); 1308 1259 if (!e) { 1309 - if (sleepable) 1310 - mutex_unlock(&con->wq_alloc); 1311 - 1312 - kfree(msg); 1260 + dlm_free_msg(msg); 1313 1261 return NULL; 1314 1262 } 1315 1263 1316 - if (sleepable) 1317 - mutex_unlock(&con->wq_alloc); 1318 - 1264 + msg->retransmit = false; 1265 + msg->orig_msg = NULL; 1319 1266 msg->ppc = *ppc; 1320 1267 msg->len = len; 1321 1268 msg->entry = e; ··· 1304 1291 } 1305 1292 1306 1293 struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation, 1307 - char **ppc, void (*cb)(struct dlm_mhandle *mh), 1308 - struct dlm_mhandle *mh) 1294 + char **ppc, void (*cb)(void *data), 1295 + void *data) 1309 1296 { 1310 1297 struct connection *con; 1311 1298 struct dlm_msg *msg; ··· 1326 1313 return NULL; 1327 1314 } 1328 1315 1329 - msg = dlm_lowcomms_new_msg_con(con, len, allocation, ppc, cb, mh); 1316 + msg = dlm_lowcomms_new_msg_con(con, len, allocation, ppc, cb, data); 1330 1317 if (!msg) { 1331 1318 srcu_read_unlock(&connections_srcu, idx); 1332 1319 return NULL; ··· 1416 1403 if (!e) 1417 1404 break; 1418 1405 1419 - e = list_first_entry(&con->writequeue, struct writequeue_entry, list); 1420 1406 len = e->len; 1421 1407 offset = e->offset; 1422 1408 BUG_ON(len == 0 && e->users == 0); ··· 1423 1411 1424 1412 ret = kernel_sendpage(con->sock, e->page, offset, len, 1425 1413 msg_flags); 1414 + trace_dlm_send(con->nodeid, ret); 1426 1415 if (ret == -EAGAIN || ret == 0) { 1427 1416 if (ret == -EAGAIN && 1428 1417 test_bit(SOCKWQ_ASYNC_NOSPACE, &con->sock->flags) && ··· 1693 1680 set_bit(CF_READ_PENDING, &con->flags); 1694 1681 set_bit(CF_WRITE_PENDING, &con->flags); 1695 1682 if (con->sock && con->sock->sk) { 1696 - write_lock_bh(&con->sock->sk->sk_callback_lock); 1683 + lock_sock(con->sock->sk); 1697 1684 con->sock->sk->sk_user_data = NULL; 1698 - write_unlock_bh(&con->sock->sk->sk_callback_lock); 1685 + release_sock(con->sock->sk); 1699 1686 } 1700 1687 if (con->othercon && and_other) 1701 1688 _stop_conn(con->othercon, false); ··· 1788 1775 result = sock_create_kern(&init_net, dlm_local_addr[0]->ss_family, 1789 1776 SOCK_STREAM, dlm_proto_ops->proto, &sock); 1790 1777 if (result < 0) { 1791 - log_print("Can't create comms socket, check SCTP is loaded"); 1778 + log_print("Can't create comms socket: %d", result); 1792 1779 goto out; 1793 1780 } 1794 1781
+4 -2
fs/dlm/lowcomms.h
··· 38 38 void dlm_lowcomms_exit(void); 39 39 int dlm_lowcomms_close(int nodeid); 40 40 struct dlm_msg *dlm_lowcomms_new_msg(int nodeid, int len, gfp_t allocation, 41 - char **ppc, void (*cb)(struct dlm_mhandle *mh), 42 - struct dlm_mhandle *mh); 41 + char **ppc, void (*cb)(void *data), 42 + void *data); 43 43 void dlm_lowcomms_commit_msg(struct dlm_msg *msg); 44 44 void dlm_lowcomms_put_msg(struct dlm_msg *msg); 45 45 int dlm_lowcomms_resend_msg(struct dlm_msg *msg); ··· 47 47 int dlm_lowcomms_nodes_set_mark(int nodeid, unsigned int mark); 48 48 int dlm_lowcomms_addr(int nodeid, struct sockaddr_storage *addr, int len); 49 49 void dlm_midcomms_receive_done(int nodeid); 50 + struct kmem_cache *dlm_lowcomms_writequeue_cache_create(void); 51 + struct kmem_cache *dlm_lowcomms_msg_cache_create(void); 50 52 51 53 #endif /* __LOWCOMMS_DOT_H__ */ 52 54
+3
fs/dlm/main.c
··· 19 19 #include "config.h" 20 20 #include "lowcomms.h" 21 21 22 + #define CREATE_TRACE_POINTS 23 + #include <trace/events/dlm.h> 24 + 22 25 static int __init init_dlm(void) 23 26 { 24 27 int error;
+1 -2
fs/dlm/member.c
··· 442 442 int error = 0; 443 443 444 444 list_for_each_entry(memb, &ls->ls_nodes, list) { 445 - error = dlm_recovery_stopped(ls); 446 - if (error) { 445 + if (dlm_recovery_stopped(ls)) { 447 446 error = -EINTR; 448 447 break; 449 448 }
+63 -5
fs/dlm/memory.c
··· 10 10 ******************************************************************************/ 11 11 12 12 #include "dlm_internal.h" 13 + #include "midcomms.h" 14 + #include "lowcomms.h" 13 15 #include "config.h" 14 16 #include "memory.h" 15 17 18 + static struct kmem_cache *writequeue_cache; 19 + static struct kmem_cache *mhandle_cache; 20 + static struct kmem_cache *msg_cache; 16 21 static struct kmem_cache *lkb_cache; 17 22 static struct kmem_cache *rsb_cache; 18 23 19 24 20 25 int __init dlm_memory_init(void) 21 26 { 27 + writequeue_cache = dlm_lowcomms_writequeue_cache_create(); 28 + if (!writequeue_cache) 29 + goto out; 30 + 31 + mhandle_cache = dlm_midcomms_cache_create(); 32 + if (!mhandle_cache) 33 + goto mhandle; 34 + 22 35 lkb_cache = kmem_cache_create("dlm_lkb", sizeof(struct dlm_lkb), 23 36 __alignof__(struct dlm_lkb), 0, NULL); 24 37 if (!lkb_cache) 25 - return -ENOMEM; 38 + goto lkb; 39 + 40 + msg_cache = dlm_lowcomms_msg_cache_create(); 41 + if (!msg_cache) 42 + goto msg; 26 43 27 44 rsb_cache = kmem_cache_create("dlm_rsb", sizeof(struct dlm_rsb), 28 45 __alignof__(struct dlm_rsb), 0, NULL); 29 - if (!rsb_cache) { 30 - kmem_cache_destroy(lkb_cache); 31 - return -ENOMEM; 32 - } 46 + if (!rsb_cache) 47 + goto rsb; 33 48 34 49 return 0; 50 + 51 + rsb: 52 + kmem_cache_destroy(msg_cache); 53 + msg: 54 + kmem_cache_destroy(lkb_cache); 55 + lkb: 56 + kmem_cache_destroy(mhandle_cache); 57 + mhandle: 58 + kmem_cache_destroy(writequeue_cache); 59 + out: 60 + return -ENOMEM; 35 61 } 36 62 37 63 void dlm_memory_exit(void) 38 64 { 65 + kmem_cache_destroy(writequeue_cache); 66 + kmem_cache_destroy(mhandle_cache); 67 + kmem_cache_destroy(msg_cache); 39 68 kmem_cache_destroy(lkb_cache); 40 69 kmem_cache_destroy(rsb_cache); 41 70 } ··· 118 89 kmem_cache_free(lkb_cache, lkb); 119 90 } 120 91 92 + struct dlm_mhandle *dlm_allocate_mhandle(void) 93 + { 94 + return kmem_cache_alloc(mhandle_cache, GFP_NOFS); 95 + } 96 + 97 + void dlm_free_mhandle(struct dlm_mhandle *mhandle) 98 + { 99 + kmem_cache_free(mhandle_cache, mhandle); 100 + } 101 + 102 + struct writequeue_entry *dlm_allocate_writequeue(void) 103 + { 104 + return kmem_cache_alloc(writequeue_cache, GFP_ATOMIC); 105 + } 106 + 107 + void dlm_free_writequeue(struct writequeue_entry *writequeue) 108 + { 109 + kmem_cache_free(writequeue_cache, writequeue); 110 + } 111 + 112 + struct dlm_msg *dlm_allocate_msg(gfp_t allocation) 113 + { 114 + return kmem_cache_alloc(msg_cache, allocation); 115 + } 116 + 117 + void dlm_free_msg(struct dlm_msg *msg) 118 + { 119 + kmem_cache_free(msg_cache, msg); 120 + }
+6
fs/dlm/memory.h
··· 20 20 void dlm_free_lkb(struct dlm_lkb *l); 21 21 char *dlm_allocate_lvb(struct dlm_ls *ls); 22 22 void dlm_free_lvb(char *l); 23 + struct dlm_mhandle *dlm_allocate_mhandle(void); 24 + void dlm_free_mhandle(struct dlm_mhandle *mhandle); 25 + struct writequeue_entry *dlm_allocate_writequeue(void); 26 + void dlm_free_writequeue(struct writequeue_entry *writequeue); 27 + struct dlm_msg *dlm_allocate_msg(gfp_t allocation); 28 + void dlm_free_msg(struct dlm_msg *msg); 23 29 24 30 #endif /* __MEMORY_DOT_H__ */ 25 31
+72 -13
fs/dlm/midcomms.c
··· 137 137 #include "dlm_internal.h" 138 138 #include "lowcomms.h" 139 139 #include "config.h" 140 + #include "memory.h" 140 141 #include "lock.h" 141 142 #include "util.h" 142 143 #include "midcomms.h" ··· 221 220 */ 222 221 static DEFINE_MUTEX(close_lock); 223 222 223 + struct kmem_cache *dlm_midcomms_cache_create(void) 224 + { 225 + return kmem_cache_create("dlm_mhandle", sizeof(struct dlm_mhandle), 226 + 0, 0, NULL); 227 + } 228 + 224 229 static inline const char *dlm_state_str(int state) 225 230 { 226 231 switch (state) { ··· 286 279 struct dlm_mhandle *mh = container_of(rcu, struct dlm_mhandle, rcu); 287 280 288 281 dlm_lowcomms_put_msg(mh->msg); 289 - kfree(mh); 282 + dlm_free_mhandle(mh); 290 283 } 291 284 292 285 static void dlm_mhandle_delete(struct midcomms_node *node, ··· 916 909 if (msglen > len) 917 910 break; 918 911 919 - switch (le32_to_cpu(hd->h_version)) { 920 - case DLM_VERSION_3_1: 912 + switch (hd->h_version) { 913 + case cpu_to_le32(DLM_VERSION_3_1): 921 914 dlm_midcomms_receive_buffer_3_1((union dlm_packet *)ptr, nodeid); 922 915 break; 923 - case DLM_VERSION_3_2: 916 + case cpu_to_le32(DLM_VERSION_3_2): 924 917 dlm_midcomms_receive_buffer_3_2((union dlm_packet *)ptr, nodeid); 925 918 break; 926 919 default: ··· 976 969 spin_unlock(&node->state_lock); 977 970 /* do nothing FIN has it's own ack send */ 978 971 break; 979 - }; 972 + } 980 973 srcu_read_unlock(&nodes_srcu, idx); 981 974 } 982 975 ··· 1027 1020 header_out(&opts->o_header); 1028 1021 } 1029 1022 1030 - static void midcomms_new_msg_cb(struct dlm_mhandle *mh) 1023 + static void midcomms_new_msg_cb(void *data) 1031 1024 { 1025 + struct dlm_mhandle *mh = data; 1026 + 1032 1027 atomic_inc(&mh->node->send_queue_cnt); 1033 1028 1034 1029 spin_lock(&mh->node->send_queue_lock); ··· 1080 1071 /* this is a bug, however we going on and hope it will be resolved */ 1081 1072 WARN_ON(test_bit(DLM_NODE_FLAG_STOP_TX, &node->flags)); 1082 1073 1083 - mh = kzalloc(sizeof(*mh), GFP_NOFS); 1074 + mh = dlm_allocate_mhandle(); 1084 1075 if (!mh) 1085 1076 goto err; 1086 1077 1078 + mh->committed = false; 1079 + mh->ack_rcv = NULL; 1087 1080 mh->idx = idx; 1088 1081 mh->node = node; 1089 1082 ··· 1094 1083 msg = dlm_lowcomms_new_msg(nodeid, len, allocation, ppc, 1095 1084 NULL, NULL); 1096 1085 if (!msg) { 1097 - kfree(mh); 1086 + dlm_free_mhandle(mh); 1098 1087 goto err; 1099 1088 } 1100 1089 ··· 1103 1092 msg = dlm_midcomms_get_msg_3_2(mh, nodeid, len, allocation, 1104 1093 ppc); 1105 1094 if (!msg) { 1106 - kfree(mh); 1095 + dlm_free_mhandle(mh); 1107 1096 goto err; 1108 1097 } 1109 1098 1110 1099 break; 1111 1100 default: 1112 - kfree(mh); 1101 + dlm_free_mhandle(mh); 1113 1102 WARN_ON(1); 1114 1103 goto err; 1115 1104 } ··· 1145 1134 dlm_lowcomms_commit_msg(mh->msg); 1146 1135 dlm_lowcomms_put_msg(mh->msg); 1147 1136 /* mh is not part of rcu list in this case */ 1148 - kfree(mh); 1137 + dlm_free_mhandle(mh); 1149 1138 break; 1150 1139 case DLM_VERSION_3_2: 1151 1140 dlm_midcomms_commit_msg_3_2(mh); ··· 1242 1231 } 1243 1232 1244 1233 node->users++; 1245 - pr_debug("users inc count %d\n", node->users); 1234 + pr_debug("node %d users inc count %d\n", nodeid, node->users); 1246 1235 spin_unlock(&node->state_lock); 1247 1236 1248 1237 srcu_read_unlock(&nodes_srcu, idx); ··· 1265 1254 1266 1255 spin_lock(&node->state_lock); 1267 1256 node->users--; 1268 - pr_debug("users dec count %d\n", node->users); 1257 + pr_debug("node %d users dec count %d\n", nodeid, node->users); 1269 1258 1270 1259 /* hitting users count to zero means the 1271 1260 * other side is running dlm_midcomms_stop() ··· 1436 1425 1437 1426 return ret; 1438 1427 } 1428 + 1429 + /* debug functionality to send raw dlm msg from user space */ 1430 + struct dlm_rawmsg_data { 1431 + struct midcomms_node *node; 1432 + void *buf; 1433 + }; 1434 + 1435 + static void midcomms_new_rawmsg_cb(void *data) 1436 + { 1437 + struct dlm_rawmsg_data *rd = data; 1438 + struct dlm_header *h = rd->buf; 1439 + 1440 + switch (h->h_version) { 1441 + case cpu_to_le32(DLM_VERSION_3_1): 1442 + break; 1443 + default: 1444 + switch (h->h_cmd) { 1445 + case DLM_OPTS: 1446 + if (!h->u.h_seq) 1447 + h->u.h_seq = rd->node->seq_send++; 1448 + break; 1449 + default: 1450 + break; 1451 + } 1452 + break; 1453 + } 1454 + } 1455 + 1456 + int dlm_midcomms_rawmsg_send(struct midcomms_node *node, void *buf, 1457 + int buflen) 1458 + { 1459 + struct dlm_rawmsg_data rd; 1460 + struct dlm_msg *msg; 1461 + char *msgbuf; 1462 + 1463 + rd.node = node; 1464 + rd.buf = buf; 1465 + 1466 + msg = dlm_lowcomms_new_msg(node->nodeid, buflen, GFP_NOFS, 1467 + &msgbuf, midcomms_new_rawmsg_cb, &rd); 1468 + if (!msg) 1469 + return -ENOMEM; 1470 + 1471 + memcpy(msgbuf, buf, buflen); 1472 + dlm_lowcomms_commit_msg(msg); 1473 + return 0; 1474 + } 1475 +
+3
fs/dlm/midcomms.h
··· 28 28 unsigned long dlm_midcomms_flags(struct midcomms_node *node); 29 29 int dlm_midcomms_send_queue_cnt(struct midcomms_node *node); 30 30 uint32_t dlm_midcomms_version(struct midcomms_node *node); 31 + int dlm_midcomms_rawmsg_send(struct midcomms_node *node, void *buf, 32 + int buflen); 33 + struct kmem_cache *dlm_midcomms_cache_create(void); 31 34 32 35 #endif /* __MIDCOMMS_DOT_H__ */ 33 36
+1 -1
fs/dlm/rcom.c
··· 601 601 602 602 spin_lock(&ls->ls_recover_lock); 603 603 status = ls->ls_recover_status; 604 - stop = test_bit(LSFL_RECOVER_STOP, &ls->ls_flags); 604 + stop = dlm_recovery_stopped(ls); 605 605 seq = ls->ls_recover_seq; 606 606 spin_unlock(&ls->ls_recover_lock); 607 607
+1 -2
fs/dlm/recoverd.c
··· 124 124 125 125 dlm_recover_waiters_pre(ls); 126 126 127 - error = dlm_recovery_stopped(ls); 128 - if (error) { 127 + if (dlm_recovery_stopped(ls)) { 129 128 error = -EINTR; 130 129 goto fail; 131 130 }
+8 -9
fs/dlm/requestqueue.c
··· 44 44 e->nodeid = nodeid; 45 45 memcpy(&e->request, ms, ms->m_header.h_length); 46 46 47 + atomic_inc(&ls->ls_requestqueue_cnt); 47 48 mutex_lock(&ls->ls_requestqueue_mutex); 48 49 list_add_tail(&e->list, &ls->ls_requestqueue); 49 50 mutex_unlock(&ls->ls_requestqueue_mutex); ··· 90 89 91 90 mutex_lock(&ls->ls_requestqueue_mutex); 92 91 list_del(&e->list); 92 + if (atomic_dec_and_test(&ls->ls_requestqueue_cnt)) 93 + wake_up(&ls->ls_requestqueue_wait); 93 94 kfree(e); 94 95 95 96 if (dlm_locking_stopped(ls)) { ··· 118 115 119 116 void dlm_wait_requestqueue(struct dlm_ls *ls) 120 117 { 121 - for (;;) { 122 - mutex_lock(&ls->ls_requestqueue_mutex); 123 - if (list_empty(&ls->ls_requestqueue)) 124 - break; 125 - mutex_unlock(&ls->ls_requestqueue_mutex); 126 - schedule(); 127 - } 128 - mutex_unlock(&ls->ls_requestqueue_mutex); 118 + wait_event(ls->ls_requestqueue_wait, 119 + atomic_read(&ls->ls_requestqueue_cnt) == 0); 129 120 } 130 121 131 122 static int purge_request(struct dlm_ls *ls, struct dlm_message *ms, int nodeid) ··· 127 130 uint32_t type = ms->m_type; 128 131 129 132 /* the ls is being cleaned up and freed by release_lockspace */ 130 - if (!ls->ls_count) 133 + if (!atomic_read(&ls->ls_count)) 131 134 return 1; 132 135 133 136 if (dlm_is_removed(ls, nodeid)) ··· 158 161 159 162 if (purge_request(ls, ms, e->nodeid)) { 160 163 list_del(&e->list); 164 + if (atomic_dec_and_test(&ls->ls_requestqueue_cnt)) 165 + wake_up(&ls->ls_requestqueue_wait); 161 166 kfree(e); 162 167 } 163 168 }
+260
include/trace/events/dlm.h
··· 1 + /* SPDX-License-Identifier: GPL-2.0 */ 2 + #undef TRACE_SYSTEM 3 + #define TRACE_SYSTEM dlm 4 + 5 + #if !defined(_TRACE_DLM_H) || defined(TRACE_HEADER_MULTI_READ) 6 + #define _TRACE_DLM_H 7 + 8 + #include <linux/dlm.h> 9 + #include <linux/dlmconstants.h> 10 + #include <linux/tracepoint.h> 11 + 12 + #include "../../../fs/dlm/dlm_internal.h" 13 + 14 + #define show_lock_flags(flags) __print_flags(flags, "|", \ 15 + { DLM_LKF_NOQUEUE, "NOQUEUE" }, \ 16 + { DLM_LKF_CANCEL, "CANCEL" }, \ 17 + { DLM_LKF_CONVERT, "CONVERT" }, \ 18 + { DLM_LKF_VALBLK, "VALBLK" }, \ 19 + { DLM_LKF_QUECVT, "QUECVT" }, \ 20 + { DLM_LKF_IVVALBLK, "IVVALBLK" }, \ 21 + { DLM_LKF_CONVDEADLK, "CONVDEADLK" }, \ 22 + { DLM_LKF_PERSISTENT, "PERSISTENT" }, \ 23 + { DLM_LKF_NODLCKWT, "NODLCKWT" }, \ 24 + { DLM_LKF_NODLCKBLK, "NODLCKBLK" }, \ 25 + { DLM_LKF_EXPEDITE, "EXPEDITE" }, \ 26 + { DLM_LKF_NOQUEUEBAST, "NOQUEUEBAST" }, \ 27 + { DLM_LKF_HEADQUE, "HEADQUE" }, \ 28 + { DLM_LKF_NOORDER, "NOORDER" }, \ 29 + { DLM_LKF_ORPHAN, "ORPHAN" }, \ 30 + { DLM_LKF_ALTPR, "ALTPR" }, \ 31 + { DLM_LKF_ALTCW, "ALTCW" }, \ 32 + { DLM_LKF_FORCEUNLOCK, "FORCEUNLOCK" }, \ 33 + { DLM_LKF_TIMEOUT, "TIMEOUT" }) 34 + 35 + #define show_lock_mode(mode) __print_symbolic(mode, \ 36 + { DLM_LOCK_IV, "IV"}, \ 37 + { DLM_LOCK_NL, "NL"}, \ 38 + { DLM_LOCK_CR, "CR"}, \ 39 + { DLM_LOCK_CW, "CW"}, \ 40 + { DLM_LOCK_PR, "PR"}, \ 41 + { DLM_LOCK_PW, "PW"}, \ 42 + { DLM_LOCK_EX, "EX"}) 43 + 44 + #define show_dlm_sb_flags(flags) __print_flags(flags, "|", \ 45 + { DLM_SBF_DEMOTED, "DEMOTED" }, \ 46 + { DLM_SBF_VALNOTVALID, "VALNOTVALID" }, \ 47 + { DLM_SBF_ALTMODE, "ALTMODE" }) 48 + 49 + /* note: we begin tracing dlm_lock_start() only if ls and lkb are found */ 50 + TRACE_EVENT(dlm_lock_start, 51 + 52 + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode, 53 + __u32 flags), 54 + 55 + TP_ARGS(ls, lkb, mode, flags), 56 + 57 + TP_STRUCT__entry( 58 + __field(__u32, ls_id) 59 + __field(__u32, lkb_id) 60 + __field(int, mode) 61 + __field(__u32, flags) 62 + ), 63 + 64 + TP_fast_assign( 65 + __entry->ls_id = ls->ls_global_id; 66 + __entry->lkb_id = lkb->lkb_id; 67 + __entry->mode = mode; 68 + __entry->flags = flags; 69 + ), 70 + 71 + TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s", 72 + __entry->ls_id, __entry->lkb_id, 73 + show_lock_mode(__entry->mode), 74 + show_lock_flags(__entry->flags)) 75 + 76 + ); 77 + 78 + TRACE_EVENT(dlm_lock_end, 79 + 80 + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode, __u32 flags, 81 + int error), 82 + 83 + TP_ARGS(ls, lkb, mode, flags, error), 84 + 85 + TP_STRUCT__entry( 86 + __field(__u32, ls_id) 87 + __field(__u32, lkb_id) 88 + __field(int, mode) 89 + __field(__u32, flags) 90 + __field(int, error) 91 + ), 92 + 93 + TP_fast_assign( 94 + __entry->ls_id = ls->ls_global_id; 95 + __entry->lkb_id = lkb->lkb_id; 96 + __entry->mode = mode; 97 + __entry->flags = flags; 98 + 99 + /* return value will be zeroed in those cases by dlm_lock() 100 + * we do it here again to not introduce more overhead if 101 + * trace isn't running and error reflects the return value. 102 + */ 103 + if (error == -EAGAIN || error == -EDEADLK) 104 + __entry->error = 0; 105 + else 106 + __entry->error = error; 107 + ), 108 + 109 + TP_printk("ls_id=%u lkb_id=%x mode=%s flags=%s error=%d", 110 + __entry->ls_id, __entry->lkb_id, 111 + show_lock_mode(__entry->mode), 112 + show_lock_flags(__entry->flags), __entry->error) 113 + 114 + ); 115 + 116 + TRACE_EVENT(dlm_bast, 117 + 118 + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, int mode), 119 + 120 + TP_ARGS(ls, lkb, mode), 121 + 122 + TP_STRUCT__entry( 123 + __field(__u32, ls_id) 124 + __field(__u32, lkb_id) 125 + __field(int, mode) 126 + ), 127 + 128 + TP_fast_assign( 129 + __entry->ls_id = ls->ls_global_id; 130 + __entry->lkb_id = lkb->lkb_id; 131 + __entry->mode = mode; 132 + ), 133 + 134 + TP_printk("ls_id=%u lkb_id=%x mode=%s", __entry->ls_id, 135 + __entry->lkb_id, show_lock_mode(__entry->mode)) 136 + 137 + ); 138 + 139 + TRACE_EVENT(dlm_ast, 140 + 141 + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, struct dlm_lksb *lksb), 142 + 143 + TP_ARGS(ls, lkb, lksb), 144 + 145 + TP_STRUCT__entry( 146 + __field(__u32, ls_id) 147 + __field(__u32, lkb_id) 148 + __field(u8, sb_flags) 149 + __field(int, sb_status) 150 + ), 151 + 152 + TP_fast_assign( 153 + __entry->ls_id = ls->ls_global_id; 154 + __entry->lkb_id = lkb->lkb_id; 155 + __entry->sb_flags = lksb->sb_flags; 156 + __entry->sb_status = lksb->sb_status; 157 + ), 158 + 159 + TP_printk("ls_id=%u lkb_id=%x sb_flags=%s sb_status=%d", 160 + __entry->ls_id, __entry->lkb_id, 161 + show_dlm_sb_flags(__entry->sb_flags), __entry->sb_status) 162 + 163 + ); 164 + 165 + /* note: we begin tracing dlm_unlock_start() only if ls and lkb are found */ 166 + TRACE_EVENT(dlm_unlock_start, 167 + 168 + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, __u32 flags), 169 + 170 + TP_ARGS(ls, lkb, flags), 171 + 172 + TP_STRUCT__entry( 173 + __field(__u32, ls_id) 174 + __field(__u32, lkb_id) 175 + __field(__u32, flags) 176 + ), 177 + 178 + TP_fast_assign( 179 + __entry->ls_id = ls->ls_global_id; 180 + __entry->lkb_id = lkb->lkb_id; 181 + __entry->flags = flags; 182 + ), 183 + 184 + TP_printk("ls_id=%u lkb_id=%x flags=%s", 185 + __entry->ls_id, __entry->lkb_id, 186 + show_lock_flags(__entry->flags)) 187 + 188 + ); 189 + 190 + TRACE_EVENT(dlm_unlock_end, 191 + 192 + TP_PROTO(struct dlm_ls *ls, struct dlm_lkb *lkb, __u32 flags, 193 + int error), 194 + 195 + TP_ARGS(ls, lkb, flags, error), 196 + 197 + TP_STRUCT__entry( 198 + __field(__u32, ls_id) 199 + __field(__u32, lkb_id) 200 + __field(__u32, flags) 201 + __field(int, error) 202 + ), 203 + 204 + TP_fast_assign( 205 + __entry->ls_id = ls->ls_global_id; 206 + __entry->lkb_id = lkb->lkb_id; 207 + __entry->flags = flags; 208 + __entry->error = error; 209 + ), 210 + 211 + TP_printk("ls_id=%u lkb_id=%x flags=%s error=%d", 212 + __entry->ls_id, __entry->lkb_id, 213 + show_lock_flags(__entry->flags), __entry->error) 214 + 215 + ); 216 + 217 + TRACE_EVENT(dlm_send, 218 + 219 + TP_PROTO(int nodeid, int ret), 220 + 221 + TP_ARGS(nodeid, ret), 222 + 223 + TP_STRUCT__entry( 224 + __field(int, nodeid) 225 + __field(int, ret) 226 + ), 227 + 228 + TP_fast_assign( 229 + __entry->nodeid = nodeid; 230 + __entry->ret = ret; 231 + ), 232 + 233 + TP_printk("nodeid=%d ret=%d", __entry->nodeid, __entry->ret) 234 + 235 + ); 236 + 237 + TRACE_EVENT(dlm_recv, 238 + 239 + TP_PROTO(int nodeid, int ret), 240 + 241 + TP_ARGS(nodeid, ret), 242 + 243 + TP_STRUCT__entry( 244 + __field(int, nodeid) 245 + __field(int, ret) 246 + ), 247 + 248 + TP_fast_assign( 249 + __entry->nodeid = nodeid; 250 + __entry->ret = ret; 251 + ), 252 + 253 + TP_printk("nodeid=%d ret=%d", __entry->nodeid, __entry->ret) 254 + 255 + ); 256 + 257 + #endif /* if !defined(_TRACE_DLM_H) || defined(TRACE_HEADER_MULTI_READ) */ 258 + 259 + /* This part must be outside protection */ 260 + #include <trace/define_trace.h>