Linux kernel mirror (for testing) git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux.git
kernel os linux
1
fork

Configure Feed

Select the types of activity you want to include in your feed.

Merge tag 'stable/for-linus-3.6-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen

Pull three xen bug-fixes from Konrad Rzeszutek Wilk:
- Revert the kexec fix which caused on non-kexec shutdowns a race.
- Reuse existing P2M leafs - instead of requiring to allocate a large
area of bootup virtual address estate.
- Fix a one-off error when adding PFNs for balloon pages.

* tag 'stable/for-linus-3.6-rc3-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/konrad/xen:
xen/setup: Fix one-off error when adding for-balloon PFNs to the P2M.
xen/p2m: Reuse existing P2M leafs if they are filled with 1:1 PFNs or INVALID.
Revert "xen PVonHVM: move shared_info to MMIO before kexec"

+113 -130
+11 -107
arch/x86/xen/enlighten.c
··· 31 31 #include <linux/pci.h> 32 32 #include <linux/gfp.h> 33 33 #include <linux/memblock.h> 34 - #include <linux/syscore_ops.h> 35 34 36 35 #include <xen/xen.h> 37 36 #include <xen/interface/xen.h> ··· 1469 1470 #endif 1470 1471 } 1471 1472 1472 - #ifdef CONFIG_XEN_PVHVM 1473 - /* 1474 - * The pfn containing the shared_info is located somewhere in RAM. This 1475 - * will cause trouble if the current kernel is doing a kexec boot into a 1476 - * new kernel. The new kernel (and its startup code) can not know where 1477 - * the pfn is, so it can not reserve the page. The hypervisor will 1478 - * continue to update the pfn, and as a result memory corruption occours 1479 - * in the new kernel. 1480 - * 1481 - * One way to work around this issue is to allocate a page in the 1482 - * xen-platform pci device's BAR memory range. But pci init is done very 1483 - * late and the shared_info page is already in use very early to read 1484 - * the pvclock. So moving the pfn from RAM to MMIO is racy because some 1485 - * code paths on other vcpus could access the pfn during the small 1486 - * window when the old pfn is moved to the new pfn. There is even a 1487 - * small window were the old pfn is not backed by a mfn, and during that 1488 - * time all reads return -1. 1489 - * 1490 - * Because it is not known upfront where the MMIO region is located it 1491 - * can not be used right from the start in xen_hvm_init_shared_info. 1492 - * 1493 - * To minimise trouble the move of the pfn is done shortly before kexec. 1494 - * This does not eliminate the race because all vcpus are still online 1495 - * when the syscore_ops will be called. But hopefully there is no work 1496 - * pending at this point in time. Also the syscore_op is run last which 1497 - * reduces the risk further. 1498 - */ 1499 - 1500 - static struct shared_info *xen_hvm_shared_info; 1501 - 1502 - static void xen_hvm_connect_shared_info(unsigned long pfn) 1473 + void __ref xen_hvm_init_shared_info(void) 1503 1474 { 1475 + int cpu; 1504 1476 struct xen_add_to_physmap xatp; 1477 + static struct shared_info *shared_info_page = 0; 1505 1478 1479 + if (!shared_info_page) 1480 + shared_info_page = (struct shared_info *) 1481 + extend_brk(PAGE_SIZE, PAGE_SIZE); 1506 1482 xatp.domid = DOMID_SELF; 1507 1483 xatp.idx = 0; 1508 1484 xatp.space = XENMAPSPACE_shared_info; 1509 - xatp.gpfn = pfn; 1485 + xatp.gpfn = __pa(shared_info_page) >> PAGE_SHIFT; 1510 1486 if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) 1511 1487 BUG(); 1512 1488 1513 - } 1514 - static void xen_hvm_set_shared_info(struct shared_info *sip) 1515 - { 1516 - int cpu; 1517 - 1518 - HYPERVISOR_shared_info = sip; 1489 + HYPERVISOR_shared_info = (struct shared_info *)shared_info_page; 1519 1490 1520 1491 /* xen_vcpu is a pointer to the vcpu_info struct in the shared_info 1521 1492 * page, we use it in the event channel upcall and in some pvclock 1522 1493 * related functions. We don't need the vcpu_info placement 1523 1494 * optimizations because we don't use any pv_mmu or pv_irq op on 1524 1495 * HVM. 1525 - * When xen_hvm_set_shared_info is run at boot time only vcpu 0 is 1526 - * online but xen_hvm_set_shared_info is run at resume time too and 1496 + * When xen_hvm_init_shared_info is run at boot time only vcpu 0 is 1497 + * online but xen_hvm_init_shared_info is run at resume time too and 1527 1498 * in that case multiple vcpus might be online. */ 1528 1499 for_each_online_cpu(cpu) { 1529 1500 per_cpu(xen_vcpu, cpu) = &HYPERVISOR_shared_info->vcpu_info[cpu]; 1530 1501 } 1531 1502 } 1532 1503 1533 - /* Reconnect the shared_info pfn to a mfn */ 1534 - void xen_hvm_resume_shared_info(void) 1535 - { 1536 - xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); 1537 - } 1538 - 1539 - #ifdef CONFIG_KEXEC 1540 - static struct shared_info *xen_hvm_shared_info_kexec; 1541 - static unsigned long xen_hvm_shared_info_pfn_kexec; 1542 - 1543 - /* Remember a pfn in MMIO space for kexec reboot */ 1544 - void __devinit xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn) 1545 - { 1546 - xen_hvm_shared_info_kexec = sip; 1547 - xen_hvm_shared_info_pfn_kexec = pfn; 1548 - } 1549 - 1550 - static void xen_hvm_syscore_shutdown(void) 1551 - { 1552 - struct xen_memory_reservation reservation = { 1553 - .domid = DOMID_SELF, 1554 - .nr_extents = 1, 1555 - }; 1556 - unsigned long prev_pfn; 1557 - int rc; 1558 - 1559 - if (!xen_hvm_shared_info_kexec) 1560 - return; 1561 - 1562 - prev_pfn = __pa(xen_hvm_shared_info) >> PAGE_SHIFT; 1563 - set_xen_guest_handle(reservation.extent_start, &prev_pfn); 1564 - 1565 - /* Move pfn to MMIO, disconnects previous pfn from mfn */ 1566 - xen_hvm_connect_shared_info(xen_hvm_shared_info_pfn_kexec); 1567 - 1568 - /* Update pointers, following hypercall is also a memory barrier */ 1569 - xen_hvm_set_shared_info(xen_hvm_shared_info_kexec); 1570 - 1571 - /* Allocate new mfn for previous pfn */ 1572 - do { 1573 - rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); 1574 - if (rc == 0) 1575 - msleep(123); 1576 - } while (rc == 0); 1577 - 1578 - /* Make sure the previous pfn is really connected to a (new) mfn */ 1579 - BUG_ON(rc != 1); 1580 - } 1581 - 1582 - static struct syscore_ops xen_hvm_syscore_ops = { 1583 - .shutdown = xen_hvm_syscore_shutdown, 1584 - }; 1585 - #endif 1586 - 1587 - /* Use a pfn in RAM, may move to MMIO before kexec. */ 1588 - static void __init xen_hvm_init_shared_info(void) 1589 - { 1590 - /* Remember pointer for resume */ 1591 - xen_hvm_shared_info = extend_brk(PAGE_SIZE, PAGE_SIZE); 1592 - xen_hvm_connect_shared_info(__pa(xen_hvm_shared_info) >> PAGE_SHIFT); 1593 - xen_hvm_set_shared_info(xen_hvm_shared_info); 1594 - } 1595 - 1504 + #ifdef CONFIG_XEN_PVHVM 1596 1505 static void __init init_hvm_pv_info(void) 1597 1506 { 1598 1507 int major, minor; ··· 1551 1644 init_hvm_pv_info(); 1552 1645 1553 1646 xen_hvm_init_shared_info(); 1554 - #ifdef CONFIG_KEXEC 1555 - register_syscore_ops(&xen_hvm_syscore_ops); 1556 - #endif 1557 1647 1558 1648 if (xen_feature(XENFEAT_hvm_callback_vector)) 1559 1649 xen_have_vector_callback = 1;
+92 -3
arch/x86/xen/p2m.c
··· 196 196 197 197 /* When we populate back during bootup, the amount of pages can vary. The 198 198 * max we have is seen is 395979, but that does not mean it can't be more. 199 - * But some machines can have 3GB I/O holes even. So lets reserve enough 200 - * for 4GB of I/O and E820 holes. */ 201 - RESERVE_BRK(p2m_populated, PMD_SIZE * 4); 199 + * Some machines can have 3GB I/O holes even. With early_can_reuse_p2m_middle 200 + * it can re-use Xen provided mfn_list array, so we only need to allocate at 201 + * most three P2M top nodes. */ 202 + RESERVE_BRK(p2m_populated, PAGE_SIZE * 3); 203 + 202 204 static inline unsigned p2m_top_index(unsigned long pfn) 203 205 { 204 206 BUG_ON(pfn >= MAX_P2M_PFN); ··· 577 575 } 578 576 return true; 579 577 } 578 + 579 + /* 580 + * Skim over the P2M tree looking at pages that are either filled with 581 + * INVALID_P2M_ENTRY or with 1:1 PFNs. If found, re-use that page and 582 + * replace the P2M leaf with a p2m_missing or p2m_identity. 583 + * Stick the old page in the new P2M tree location. 584 + */ 585 + bool __init early_can_reuse_p2m_middle(unsigned long set_pfn, unsigned long set_mfn) 586 + { 587 + unsigned topidx; 588 + unsigned mididx; 589 + unsigned ident_pfns; 590 + unsigned inv_pfns; 591 + unsigned long *p2m; 592 + unsigned long *mid_mfn_p; 593 + unsigned idx; 594 + unsigned long pfn; 595 + 596 + /* We only look when this entails a P2M middle layer */ 597 + if (p2m_index(set_pfn)) 598 + return false; 599 + 600 + for (pfn = 0; pfn <= MAX_DOMAIN_PAGES; pfn += P2M_PER_PAGE) { 601 + topidx = p2m_top_index(pfn); 602 + 603 + if (!p2m_top[topidx]) 604 + continue; 605 + 606 + if (p2m_top[topidx] == p2m_mid_missing) 607 + continue; 608 + 609 + mididx = p2m_mid_index(pfn); 610 + p2m = p2m_top[topidx][mididx]; 611 + if (!p2m) 612 + continue; 613 + 614 + if ((p2m == p2m_missing) || (p2m == p2m_identity)) 615 + continue; 616 + 617 + if ((unsigned long)p2m == INVALID_P2M_ENTRY) 618 + continue; 619 + 620 + ident_pfns = 0; 621 + inv_pfns = 0; 622 + for (idx = 0; idx < P2M_PER_PAGE; idx++) { 623 + /* IDENTITY_PFNs are 1:1 */ 624 + if (p2m[idx] == IDENTITY_FRAME(pfn + idx)) 625 + ident_pfns++; 626 + else if (p2m[idx] == INVALID_P2M_ENTRY) 627 + inv_pfns++; 628 + else 629 + break; 630 + } 631 + if ((ident_pfns == P2M_PER_PAGE) || (inv_pfns == P2M_PER_PAGE)) 632 + goto found; 633 + } 634 + return false; 635 + found: 636 + /* Found one, replace old with p2m_identity or p2m_missing */ 637 + p2m_top[topidx][mididx] = (ident_pfns ? p2m_identity : p2m_missing); 638 + /* And the other for save/restore.. */ 639 + mid_mfn_p = p2m_top_mfn_p[topidx]; 640 + /* NOTE: Even if it is a p2m_identity it should still be point to 641 + * a page filled with INVALID_P2M_ENTRY entries. */ 642 + mid_mfn_p[mididx] = virt_to_mfn(p2m_missing); 643 + 644 + /* Reset where we want to stick the old page in. */ 645 + topidx = p2m_top_index(set_pfn); 646 + mididx = p2m_mid_index(set_pfn); 647 + 648 + /* This shouldn't happen */ 649 + if (WARN_ON(p2m_top[topidx] == p2m_mid_missing)) 650 + early_alloc_p2m(set_pfn); 651 + 652 + if (WARN_ON(p2m_top[topidx][mididx] != p2m_missing)) 653 + return false; 654 + 655 + p2m_init(p2m); 656 + p2m_top[topidx][mididx] = p2m; 657 + mid_mfn_p = p2m_top_mfn_p[topidx]; 658 + mid_mfn_p[mididx] = virt_to_mfn(p2m); 659 + 660 + return true; 661 + } 580 662 bool __init early_set_phys_to_machine(unsigned long pfn, unsigned long mfn) 581 663 { 582 664 if (unlikely(!__set_phys_to_machine(pfn, mfn))) { 583 665 if (!early_alloc_p2m(pfn)) 584 666 return false; 667 + 668 + if (early_can_reuse_p2m_middle(pfn, mfn)) 669 + return __set_phys_to_machine(pfn, mfn); 585 670 586 671 if (!early_alloc_p2m_middle(pfn, false /* boundary crossover OK!*/)) 587 672 return false;
+8 -1
arch/x86/xen/setup.c
··· 78 78 memblock_reserve(start, size); 79 79 80 80 xen_max_p2m_pfn = PFN_DOWN(start + size); 81 + for (pfn = PFN_DOWN(start); pfn < xen_max_p2m_pfn; pfn++) { 82 + unsigned long mfn = pfn_to_mfn(pfn); 81 83 82 - for (pfn = PFN_DOWN(start); pfn <= xen_max_p2m_pfn; pfn++) 84 + if (WARN(mfn == pfn, "Trying to over-write 1-1 mapping (pfn: %lx)\n", pfn)) 85 + continue; 86 + WARN(mfn != INVALID_P2M_ENTRY, "Trying to remove %lx which has %lx mfn!\n", 87 + pfn, mfn); 88 + 83 89 __set_phys_to_machine(pfn, INVALID_P2M_ENTRY); 90 + } 84 91 } 85 92 86 93 static unsigned long __init xen_do_chunk(unsigned long start,
+1 -1
arch/x86/xen/suspend.c
··· 30 30 { 31 31 #ifdef CONFIG_XEN_PVHVM 32 32 int cpu; 33 - xen_hvm_resume_shared_info(); 33 + xen_hvm_init_shared_info(); 34 34 xen_callback_vector(); 35 35 xen_unplug_emulated_devices(); 36 36 if (xen_feature(XENFEAT_hvm_safe_pvclock)) {
+1 -1
arch/x86/xen/xen-ops.h
··· 41 41 void xen_vcpu_restore(void); 42 42 43 43 void xen_callback_vector(void); 44 - void xen_hvm_resume_shared_info(void); 44 + void xen_hvm_init_shared_info(void); 45 45 void xen_unplug_emulated_devices(void); 46 46 47 47 void __init xen_build_dynamic_phys_to_machine(void);
-15
drivers/xen/platform-pci.c
··· 101 101 return 0; 102 102 } 103 103 104 - static void __devinit prepare_shared_info(void) 105 - { 106 - #ifdef CONFIG_KEXEC 107 - unsigned long addr; 108 - struct shared_info *hvm_shared_info; 109 - 110 - addr = alloc_xen_mmio(PAGE_SIZE); 111 - hvm_shared_info = ioremap(addr, PAGE_SIZE); 112 - memset(hvm_shared_info, 0, PAGE_SIZE); 113 - xen_hvm_prepare_kexec(hvm_shared_info, addr >> PAGE_SHIFT); 114 - #endif 115 - } 116 - 117 104 static int __devinit platform_pci_init(struct pci_dev *pdev, 118 105 const struct pci_device_id *ent) 119 106 { ··· 137 150 138 151 platform_mmio = mmio_addr; 139 152 platform_mmiolen = mmio_len; 140 - 141 - prepare_shared_info(); 142 153 143 154 if (!xen_have_vector_callback) { 144 155 ret = xen_allocate_irq(pdev);
-2
include/xen/events.h
··· 58 58 59 59 void xen_irq_resume(void); 60 60 61 - void xen_hvm_prepare_kexec(struct shared_info *sip, unsigned long pfn); 62 - 63 61 /* Clear an irq's pending state, in preparation for polling on it */ 64 62 void xen_clear_irq_pending(int irq); 65 63 void xen_set_irq_pending(int irq);