Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge branch 'for-linus' of git://git.kernel.dk/linux-block

Pull block layer updates from Jens Axboe:
"I've got a few bits pending for 3.8 final, that I better get sent out.
It's all been sitting for a while, I consider it safe.

It contains:

- Two bug fixes for mtip32xx, fixing a driver hang and a crash.

- A few-liner protocol error fix for drbd.

- A few fixes for the xen block front/back driver, fixing a potential
data corruption issue.

- A race fix for disk_clear_events(), causing spurious warnings. Out
of the Chrome OS base.

- A deadlock fix for disk_clear_events(), moving it to the a
unfreezable workqueue. Also from the Chrome OS base."

* 'for-linus' of git://git.kernel.dk/linux-block:
drbd: fix potential protocol error and resulting disconnect/reconnect
mtip32xx: fix for crash when the device surprise removed during rebuild
mtip32xx: fix for driver hang after a command timeout
block: prevent race/cleanup
block: remove deadlock in disk_clear_events
xen-blkfront: handle bvecs with partial data
llist/xen-blkfront: implement safe version of llist_for_each_entry
xen-blkback: implement safe iterator for the list of persistent grants

+101 -28
+32 -10
block/genhd.c
··· 35 35 36 36 static struct device_type disk_type; 37 37 38 + static void disk_check_events(struct disk_events *ev, 39 + unsigned int *clearing_ptr); 38 40 static void disk_alloc_events(struct gendisk *disk); 39 41 static void disk_add_events(struct gendisk *disk); 40 42 static void disk_del_events(struct gendisk *disk); ··· 1551 1549 const struct block_device_operations *bdops = disk->fops; 1552 1550 struct disk_events *ev = disk->ev; 1553 1551 unsigned int pending; 1552 + unsigned int clearing = mask; 1554 1553 1555 1554 if (!ev) { 1556 1555 /* for drivers still using the old ->media_changed method */ ··· 1561 1558 return 0; 1562 1559 } 1563 1560 1564 - /* tell the workfn about the events being cleared */ 1561 + disk_block_events(disk); 1562 + 1563 + /* 1564 + * store the union of mask and ev->clearing on the stack so that the 1565 + * race with disk_flush_events does not cause ambiguity (ev->clearing 1566 + * can still be modified even if events are blocked). 1567 + */ 1565 1568 spin_lock_irq(&ev->lock); 1566 - ev->clearing |= mask; 1569 + clearing |= ev->clearing; 1570 + ev->clearing = 0; 1567 1571 spin_unlock_irq(&ev->lock); 1568 1572 1569 - /* uncondtionally schedule event check and wait for it to finish */ 1570 - disk_block_events(disk); 1571 - queue_delayed_work(system_freezable_wq, &ev->dwork, 0); 1572 - flush_delayed_work(&ev->dwork); 1573 - __disk_unblock_events(disk, false); 1573 + disk_check_events(ev, &clearing); 1574 + /* 1575 + * if ev->clearing is not 0, the disk_flush_events got called in the 1576 + * middle of this function, so we want to run the workfn without delay. 1577 + */ 1578 + __disk_unblock_events(disk, ev->clearing ? true : false); 1574 1579 1575 1580 /* then, fetch and clear pending events */ 1576 1581 spin_lock_irq(&ev->lock); 1577 - WARN_ON_ONCE(ev->clearing & mask); /* cleared by workfn */ 1578 1582 pending = ev->pending & mask; 1579 1583 ev->pending &= ~mask; 1580 1584 spin_unlock_irq(&ev->lock); 1585 + WARN_ON_ONCE(clearing & mask); 1581 1586 1582 1587 return pending; 1583 1588 } 1584 1589 1590 + /* 1591 + * Separate this part out so that a different pointer for clearing_ptr can be 1592 + * passed in for disk_clear_events. 1593 + */ 1585 1594 static void disk_events_workfn(struct work_struct *work) 1586 1595 { 1587 1596 struct delayed_work *dwork = to_delayed_work(work); 1588 1597 struct disk_events *ev = container_of(dwork, struct disk_events, dwork); 1598 + 1599 + disk_check_events(ev, &ev->clearing); 1600 + } 1601 + 1602 + static void disk_check_events(struct disk_events *ev, 1603 + unsigned int *clearing_ptr) 1604 + { 1589 1605 struct gendisk *disk = ev->disk; 1590 1606 char *envp[ARRAY_SIZE(disk_uevents) + 1] = { }; 1591 - unsigned int clearing = ev->clearing; 1607 + unsigned int clearing = *clearing_ptr; 1592 1608 unsigned int events; 1593 1609 unsigned long intv; 1594 1610 int nr_events = 0, i; ··· 1620 1598 1621 1599 events &= ~ev->pending; 1622 1600 ev->pending |= events; 1623 - ev->clearing &= ~clearing; 1601 + *clearing_ptr &= ~clearing; 1624 1602 1625 1603 intv = disk_events_poll_jiffies(disk); 1626 1604 if (!ev->block && intv)
+1 -1
drivers/block/drbd/drbd_req.c
··· 168 168 } 169 169 170 170 /* must hold resource->req_lock */ 171 - static void start_new_tl_epoch(struct drbd_tconn *tconn) 171 + void start_new_tl_epoch(struct drbd_tconn *tconn) 172 172 { 173 173 /* no point closing an epoch, if it is empty, anyways. */ 174 174 if (tconn->current_tle_writes == 0)
+1
drivers/block/drbd/drbd_req.h
··· 267 267 int error; 268 268 }; 269 269 270 + extern void start_new_tl_epoch(struct drbd_tconn *tconn); 270 271 extern void drbd_req_destroy(struct kref *kref); 271 272 extern void _req_may_be_done(struct drbd_request *req, 272 273 struct bio_and_error *m);
+7
drivers/block/drbd/drbd_state.c
··· 931 931 enum drbd_state_rv rv = SS_SUCCESS; 932 932 enum sanitize_state_warnings ssw; 933 933 struct after_state_chg_work *ascw; 934 + bool did_remote, should_do_remote; 934 935 935 936 os = drbd_read_state(mdev); 936 937 ··· 982 981 (os.disk != D_DISKLESS && ns.disk == D_DISKLESS)) 983 982 atomic_inc(&mdev->local_cnt); 984 983 984 + did_remote = drbd_should_do_remote(mdev->state); 985 985 mdev->state.i = ns.i; 986 + should_do_remote = drbd_should_do_remote(mdev->state); 986 987 mdev->tconn->susp = ns.susp; 987 988 mdev->tconn->susp_nod = ns.susp_nod; 988 989 mdev->tconn->susp_fen = ns.susp_fen; 990 + 991 + /* put replicated vs not-replicated requests in seperate epochs */ 992 + if (did_remote != should_do_remote) 993 + start_new_tl_epoch(mdev->tconn); 989 994 990 995 if (os.disk == D_ATTACHING && ns.disk >= D_NEGOTIATING) 991 996 drbd_print_uuids(mdev, "attached to UUIDs");
+18 -6
drivers/block/mtip32xx/mtip32xx.c
··· 626 626 } 627 627 } 628 628 629 - if (cmdto_cnt && !test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { 629 + if (cmdto_cnt) { 630 630 print_tags(port->dd, "timed out", tagaccum, cmdto_cnt); 631 - 632 - mtip_restart_port(port); 631 + if (!test_bit(MTIP_PF_IC_ACTIVE_BIT, &port->flags)) { 632 + mtip_restart_port(port); 633 + wake_up_interruptible(&port->svc_wait); 634 + } 633 635 clear_bit(MTIP_PF_EH_ACTIVE_BIT, &port->flags); 634 - wake_up_interruptible(&port->svc_wait); 635 636 } 636 637 637 638 if (port->ic_pause_timer) { ··· 3888 3887 * Delete our gendisk structure. This also removes the device 3889 3888 * from /dev 3890 3889 */ 3891 - del_gendisk(dd->disk); 3890 + if (dd->disk) { 3891 + if (dd->disk->queue) 3892 + del_gendisk(dd->disk); 3893 + else 3894 + put_disk(dd->disk); 3895 + } 3892 3896 3893 3897 spin_lock(&rssd_index_lock); 3894 3898 ida_remove(&rssd_index_ida, dd->index); ··· 3927 3921 "Shutting down %s ...\n", dd->disk->disk_name); 3928 3922 3929 3923 /* Delete our gendisk structure, and cleanup the blk queue. */ 3930 - del_gendisk(dd->disk); 3924 + if (dd->disk) { 3925 + if (dd->disk->queue) 3926 + del_gendisk(dd->disk); 3927 + else 3928 + put_disk(dd->disk); 3929 + } 3930 + 3931 3931 3932 3932 spin_lock(&rssd_index_lock); 3933 3933 ida_remove(&rssd_index_ida, dd->index);
+11 -7
drivers/block/xen-blkback/blkback.c
··· 161 161 static void make_response(struct xen_blkif *blkif, u64 id, 162 162 unsigned short op, int st); 163 163 164 - #define foreach_grant(pos, rbtree, node) \ 165 - for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node); \ 164 + #define foreach_grant_safe(pos, n, rbtree, node) \ 165 + for ((pos) = container_of(rb_first((rbtree)), typeof(*(pos)), node), \ 166 + (n) = rb_next(&(pos)->node); \ 166 167 &(pos)->node != NULL; \ 167 - (pos) = container_of(rb_next(&(pos)->node), typeof(*(pos)), node)) 168 + (pos) = container_of(n, typeof(*(pos)), node), \ 169 + (n) = (&(pos)->node != NULL) ? rb_next(&(pos)->node) : NULL) 168 170 169 171 170 172 static void add_persistent_gnt(struct rb_root *root, ··· 219 217 struct gnttab_unmap_grant_ref unmap[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 220 218 struct page *pages[BLKIF_MAX_SEGMENTS_PER_REQUEST]; 221 219 struct persistent_gnt *persistent_gnt; 220 + struct rb_node *n; 222 221 int ret = 0; 223 222 int segs_to_unmap = 0; 224 223 225 - foreach_grant(persistent_gnt, root, node) { 224 + foreach_grant_safe(persistent_gnt, n, root, node) { 226 225 BUG_ON(persistent_gnt->handle == 227 226 BLKBACK_INVALID_HANDLE); 228 227 gnttab_set_unmap_op(&unmap[segs_to_unmap], ··· 233 230 persistent_gnt->handle); 234 231 235 232 pages[segs_to_unmap] = persistent_gnt->page; 236 - rb_erase(&persistent_gnt->node, root); 237 - kfree(persistent_gnt); 238 - num--; 239 233 240 234 if (++segs_to_unmap == BLKIF_MAX_SEGMENTS_PER_REQUEST || 241 235 !rb_next(&persistent_gnt->node)) { ··· 241 241 BUG_ON(ret); 242 242 segs_to_unmap = 0; 243 243 } 244 + 245 + rb_erase(&persistent_gnt->node, root); 246 + kfree(persistent_gnt); 247 + num--; 244 248 } 245 249 BUG_ON(num != 0); 246 250 }
+6 -4
drivers/block/xen-blkfront.c
··· 792 792 { 793 793 struct llist_node *all_gnts; 794 794 struct grant *persistent_gnt; 795 + struct llist_node *n; 795 796 796 797 /* Prevent new requests being issued until we fix things up. */ 797 798 spin_lock_irq(&info->io_lock); ··· 805 804 /* Remove all persistent grants */ 806 805 if (info->persistent_gnts_c) { 807 806 all_gnts = llist_del_all(&info->persistent_gnts); 808 - llist_for_each_entry(persistent_gnt, all_gnts, node) { 807 + llist_for_each_entry_safe(persistent_gnt, n, all_gnts, node) { 809 808 gnttab_end_foreign_access(persistent_gnt->gref, 0, 0UL); 810 809 __free_page(pfn_to_page(persistent_gnt->pfn)); 811 810 kfree(persistent_gnt); ··· 836 835 static void blkif_completion(struct blk_shadow *s, struct blkfront_info *info, 837 836 struct blkif_response *bret) 838 837 { 839 - int i; 838 + int i = 0; 840 839 struct bio_vec *bvec; 841 840 struct req_iterator iter; 842 841 unsigned long flags; ··· 853 852 */ 854 853 rq_for_each_segment(bvec, s->request, iter) { 855 854 BUG_ON((bvec->bv_offset + bvec->bv_len) > PAGE_SIZE); 856 - i = offset >> PAGE_SHIFT; 855 + if (bvec->bv_offset < offset) 856 + i++; 857 857 BUG_ON(i >= s->req.u.rw.nr_segments); 858 858 shared_data = kmap_atomic( 859 859 pfn_to_page(s->grants_used[i]->pfn)); ··· 863 861 bvec->bv_len); 864 862 bvec_kunmap_irq(bvec_data, &flags); 865 863 kunmap_atomic(shared_data); 866 - offset += bvec->bv_len; 864 + offset = bvec->bv_offset + bvec->bv_len; 867 865 } 868 866 } 869 867 /* Add the persistent grant into the list of free grants */
+25
include/linux/llist.h
··· 125 125 (pos) = llist_entry((pos)->member.next, typeof(*(pos)), member)) 126 126 127 127 /** 128 + * llist_for_each_entry_safe - iterate safely against remove over some entries 129 + * of lock-less list of given type. 130 + * @pos: the type * to use as a loop cursor. 131 + * @n: another type * to use as a temporary storage. 132 + * @node: the fist entry of deleted list entries. 133 + * @member: the name of the llist_node with the struct. 134 + * 135 + * In general, some entries of the lock-less list can be traversed 136 + * safely only after being removed from list, so start with an entry 137 + * instead of list head. This variant allows removal of entries 138 + * as we iterate. 139 + * 140 + * If being used on entries deleted from lock-less list directly, the 141 + * traverse order is from the newest to the oldest added entry. If 142 + * you want to traverse from the oldest to the newest, you must 143 + * reverse the order by yourself before traversing. 144 + */ 145 + #define llist_for_each_entry_safe(pos, n, node, member) \ 146 + for ((pos) = llist_entry((node), typeof(*(pos)), member), \ 147 + (n) = (pos)->member.next; \ 148 + &(pos)->member != NULL; \ 149 + (pos) = llist_entry(n, typeof(*(pos)), member), \ 150 + (n) = (&(pos)->member != NULL) ? (pos)->member.next : NULL) 151 + 152 + /** 128 153 * llist_empty - tests whether a lock-less list is empty 129 154 * @head: the list to test 130 155 *