svn commit: r485430 - in head/emulators/xen-kernel411: . files
Roger Pau Monné
royger at FreeBSD.org
Tue Nov 20 14:17:11 UTC 2018
Author: royger (src committer)
Date: Tue Nov 20 14:17:07 2018
New Revision: 485430
URL: https://svnweb.freebsd.org/changeset/ports/485430
Log:
xen: add XSA patches
Fixes for XSA-{275,276,277,279,280,282}
Sponsored by: Citrix Systems R&D
Added:
head/emulators/xen-kernel411/files/0001-x86-hvm-ioreq-fix-page-referencing.patch (contents, props changed)
head/emulators/xen-kernel411/files/0002-x86-hvm-ioreq-use-ref-counted-target-assigned-shared.patch (contents, props changed)
head/emulators/xen-kernel411/files/xsa275-4.11-1.patch (contents, props changed)
head/emulators/xen-kernel411/files/xsa275-4.11-2.patch (contents, props changed)
head/emulators/xen-kernel411/files/xsa277.patch (contents, props changed)
head/emulators/xen-kernel411/files/xsa279.patch (contents, props changed)
head/emulators/xen-kernel411/files/xsa280-1.patch (contents, props changed)
head/emulators/xen-kernel411/files/xsa280-4.11-2.patch (contents, props changed)
head/emulators/xen-kernel411/files/xsa282-2.patch (contents, props changed)
head/emulators/xen-kernel411/files/xsa282-4.11-1.patch (contents, props changed)
Modified:
head/emulators/xen-kernel411/Makefile
Modified: head/emulators/xen-kernel411/Makefile
==============================================================================
--- head/emulators/xen-kernel411/Makefile Tue Nov 20 14:05:01 2018 (r485429)
+++ head/emulators/xen-kernel411/Makefile Tue Nov 20 14:17:07 2018 (r485430)
@@ -2,7 +2,7 @@
PORTNAME= xen
PORTVERSION= 4.11.0
-PORTREVISION= 2
+PORTREVISION= 3
CATEGORIES= emulators
MASTER_SITES= http://downloads.xenproject.org/release/xen/${PORTVERSION}/
PKGNAMESUFFIX= -kernel411
@@ -92,6 +92,17 @@ EXTRA_PATCHES+= ${FILESDIR}/0001-xen-Port-the-array_in
${FILESDIR}/0042-x86-write-to-correct-variable-in-parse_pv_l1tf.patch:-p1
# XSA-278: x86: Nested VT-x usable even when disabled
EXTRA_PATCHES+= ${FILESDIR}/xsa278-4.11.patch:-p1
+# XSA-{275,276,277,279,280,282}
+EXTRA_PATCHES+= ${FILESDIR}/xsa275-4.11-1.patch:-p1 \
+ ${FILESDIR}/xsa275-4.11-2.patch:-p1 \
+ ${FILESDIR}/0001-x86-hvm-ioreq-fix-page-referencing.patch:-p1 \
+ ${FILESDIR}/0002-x86-hvm-ioreq-use-ref-counted-target-assigned-shared.patch:-p1 \
+ ${FILESDIR}/xsa277.patch:-p1 \
+ ${FILESDIR}/xsa279.patch:-p1 \
+ ${FILESDIR}/xsa280-1.patch:-p1 \
+ ${FILESDIR}/xsa280-4.11-2.patch:-p1 \
+ ${FILESDIR}/xsa282-4.11-1.patch:-p1 \
+ ${FILESDIR}/xsa282-2.patch:-p1
.include <bsd.port.options.mk>
Added: head/emulators/xen-kernel411/files/0001-x86-hvm-ioreq-fix-page-referencing.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0001-x86-hvm-ioreq-fix-page-referencing.patch Tue Nov 20 14:17:07 2018 (r485430)
@@ -0,0 +1,120 @@
+From bcc115ba39d2985dcf356ba8a9ac291e314f1f0f Mon Sep 17 00:00:00 2001
+From: Jan Beulich <JBeulich at suse.com>
+Date: Thu, 11 Oct 2018 04:00:26 -0600
+Subject: [PATCH 1/2] x86/hvm/ioreq: fix page referencing
+
+The code does not take a page reference in hvm_alloc_ioreq_mfn(), only a
+type reference. This can lead to a situation where a malicious domain with
+XSM_DM_PRIV can engineer a sequence as follows:
+
+- create IOREQ server: no pages as yet.
+- acquire resource: page allocated, total 0.
+- decrease reservation: -1 ref, total -1.
+
+This will cause Xen to hit a BUG_ON() in free_domheap_pages().
+
+This patch fixes the issue by changing the call to get_page_type() in
+hvm_alloc_ioreq_mfn() to a call to get_page_and_type(). This change
+in turn requires an extra put_page() in hvm_free_ioreq_mfn() in the case
+that _PGC_allocated is still set (i.e. a decrease reservation has not
+occurred) to avoid the page being leaked.
+
+This is part of XSA-276.
+
+Reported-by: Julien Grall <julien.grall at arm.com>
+Reported-by: Julien Grall <julien.grall at arm.com>
+Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+---
+ xen/arch/x86/hvm/ioreq.c | 46 +++++++++++++++++++++++++++-------------
+ 1 file changed, 31 insertions(+), 15 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c
+index f39f391929..bdc2687014 100644
+--- a/xen/arch/x86/hvm/ioreq.c
++++ b/xen/arch/x86/hvm/ioreq.c
+@@ -327,6 +327,7 @@ static int hvm_map_ioreq_gfn(struct hvm_ioreq_server *s, bool buf)
+ static int hvm_alloc_ioreq_mfn(struct hvm_ioreq_server *s, bool buf)
+ {
+ struct hvm_ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
++ struct page_info *page;
+
+ if ( iorp->page )
+ {
+@@ -349,27 +350,33 @@ static int hvm_alloc_ioreq_mfn(struct hvm_ioreq_server *s, bool buf)
+ * could fail if the emulating domain has already reached its
+ * maximum allocation.
+ */
+- iorp->page = alloc_domheap_page(s->emulator, MEMF_no_refcount);
++ page = alloc_domheap_page(s->emulator, MEMF_no_refcount);
+
+- if ( !iorp->page )
++ if ( !page )
+ return -ENOMEM;
+
+- if ( !get_page_type(iorp->page, PGT_writable_page) )
+- goto fail1;
++ if ( !get_page_and_type(page, s->emulator, PGT_writable_page) )
++ {
++ /*
++ * The domain can't possibly know about this page yet, so failure
++ * here is a clear indication of something fishy going on.
++ */
++ domain_crash(s->emulator);
++ return -ENODATA;
++ }
+
+- iorp->va = __map_domain_page_global(iorp->page);
++ iorp->va = __map_domain_page_global(page);
+ if ( !iorp->va )
+- goto fail2;
++ goto fail;
+
++ iorp->page = page;
+ clear_page(iorp->va);
+ return 0;
+
+- fail2:
+- put_page_type(iorp->page);
+-
+- fail1:
+- put_page(iorp->page);
+- iorp->page = NULL;
++ fail:
++ if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
++ put_page(page);
++ put_page_and_type(page);
+
+ return -ENOMEM;
+ }
+@@ -377,15 +384,24 @@ static int hvm_alloc_ioreq_mfn(struct hvm_ioreq_server *s, bool buf)
+ static void hvm_free_ioreq_mfn(struct hvm_ioreq_server *s, bool buf)
+ {
+ struct hvm_ioreq_page *iorp = buf ? &s->bufioreq : &s->ioreq;
++ struct page_info *page = iorp->page;
+
+- if ( !iorp->page )
++ if ( !page )
+ return;
+
++ iorp->page = NULL;
++
+ unmap_domain_page_global(iorp->va);
+ iorp->va = NULL;
+
+- put_page_and_type(iorp->page);
+- iorp->page = NULL;
++ /*
++ * Check whether we need to clear the allocation reference before
++ * dropping the explicit references taken by get_page_and_type().
++ */
++ if ( test_and_clear_bit(_PGC_allocated, &page->count_info) )
++ put_page(page);
++
++ put_page_and_type(page);
+ }
+
+ bool is_ioreq_server_page(struct domain *d, const struct page_info *page)
+--
+2.19.1
+
Added: head/emulators/xen-kernel411/files/0002-x86-hvm-ioreq-use-ref-counted-target-assigned-shared.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0002-x86-hvm-ioreq-use-ref-counted-target-assigned-shared.patch Tue Nov 20 14:17:07 2018 (r485430)
@@ -0,0 +1,83 @@
+From 0bb2969630fbc92a0510bf120578b58efb74cdab Mon Sep 17 00:00:00 2001
+From: Paul Durrant <Paul.Durrant at citrix.com>
+Date: Thu, 1 Nov 2018 17:30:20 +0000
+Subject: [PATCH 2/2] x86/hvm/ioreq: use ref-counted target-assigned shared
+ pages
+
+Passing MEMF_no_refcount to alloc_domheap_pages() will allocate, as
+expected, a page that is assigned to the specified domain but is not
+accounted for in tot_pages. Unfortunately there is no logic for tracking
+such allocations and avoiding any adjustment to tot_pages when the page
+is freed.
+
+The only caller of alloc_domheap_pages() that passes MEMF_no_refcount is
+hvm_alloc_ioreq_mfn() so this patch removes use of the flag from that
+call-site to avoid the possibility of a domain using an ioreq server as
+a means to adjust its tot_pages and hence allocate more memory than it
+should be able to.
+
+However, the reason for using the flag in the first place was to avoid
+the allocation failing if the emulator domain is already at its maximum
+memory limit. Hence this patch switches to allocating memory from the
+target domain instead of the emulator domain. There is already an extra
+memory allowance of 2MB (LIBXL_HVM_EXTRA_MEMORY) applied to HVM guests,
+which is sufficient to cover the pages required by the supported
+configuration of a single IOREQ server for QEMU. (Stub-domains do not,
+so far, use resource mapping). It also also the case the QEMU will have
+mapped the IOREQ server pages before the guest boots, hence it is not
+possible for the guest to inflate its balloon to consume these pages.
+
+Reported-by: Julien Grall <julien.grall at arm.com>
+Signed-off-by: Paul Durrant <paul.durrant at citrix.com>
+---
+ xen/arch/x86/hvm/ioreq.c | 12 ++----------
+ xen/arch/x86/mm.c | 6 ------
+ 2 files changed, 2 insertions(+), 16 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/ioreq.c b/xen/arch/x86/hvm/ioreq.c
+index bdc2687014..fd10ee6146 100644
+--- a/xen/arch/x86/hvm/ioreq.c
++++ b/xen/arch/x86/hvm/ioreq.c
+@@ -342,20 +342,12 @@ static int hvm_alloc_ioreq_mfn(struct hvm_ioreq_server *s, bool buf)
+ return 0;
+ }
+
+- /*
+- * Allocated IOREQ server pages are assigned to the emulating
+- * domain, not the target domain. This is safe because the emulating
+- * domain cannot be destroyed until the ioreq server is destroyed.
+- * Also we must use MEMF_no_refcount otherwise page allocation
+- * could fail if the emulating domain has already reached its
+- * maximum allocation.
+- */
+- page = alloc_domheap_page(s->emulator, MEMF_no_refcount);
++ page = alloc_domheap_page(s->target, 0);
+
+ if ( !page )
+ return -ENOMEM;
+
+- if ( !get_page_and_type(page, s->emulator, PGT_writable_page) )
++ if ( !get_page_and_type(page, s->target, PGT_writable_page) )
+ {
+ /*
+ * The domain can't possibly know about this page yet, so failure
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 7d4871b791..24b215d785 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -4396,12 +4396,6 @@ int arch_acquire_resource(struct domain *d, unsigned int type,
+
+ mfn_list[i] = mfn_x(mfn);
+ }
+-
+- /*
+- * The frames will have been assigned to the domain that created
+- * the ioreq server.
+- */
+- *flags |= XENMEM_rsrc_acq_caller_owned;
+ break;
+ }
+
+--
+2.19.1
+
Added: head/emulators/xen-kernel411/files/xsa275-4.11-1.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/xsa275-4.11-1.patch Tue Nov 20 14:17:07 2018 (r485430)
@@ -0,0 +1,104 @@
+From: Roger Pau Monné <roger.pau at citrix.com>
+Subject: amd/iommu: fix flush checks
+
+Flush checking for AMD IOMMU didn't check whether the previous entry
+was present, or whether the flags (writable/readable) changed in order
+to decide whether a flush should be executed.
+
+Fix this by taking the writable/readable/next-level fields into account,
+together with the present bit.
+
+Along these lines the flushing in amd_iommu_map_page() must not be
+omitted for PV domains. The comment there was simply wrong: Mappings may
+very well change, both their addresses and their permissions. Ultimately
+this should honor iommu_dont_flush_iotlb, but to achieve this
+amd_iommu_ops first needs to gain an .iotlb_flush hook.
+
+Also make clear_iommu_pte_present() static, to demonstrate there's no
+caller omitting the (subsequent) flush.
+
+This is part of XSA-275.
+
+Reported-by: Paul Durrant <paul.durrant at citrix.com>
+Signed-off-by: Roger Pau Monné <roger.pau at citrix.com>
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+
+--- a/xen/drivers/passthrough/amd/iommu_map.c
++++ b/xen/drivers/passthrough/amd/iommu_map.c
+@@ -35,7 +35,7 @@ static unsigned int pfn_to_pde_idx(unsig
+ return idx;
+ }
+
+-void clear_iommu_pte_present(unsigned long l1_mfn, unsigned long gfn)
++static void clear_iommu_pte_present(unsigned long l1_mfn, unsigned long gfn)
+ {
+ u64 *table, *pte;
+
+@@ -49,23 +49,42 @@ static bool_t set_iommu_pde_present(u32
+ unsigned int next_level,
+ bool_t iw, bool_t ir)
+ {
+- u64 addr_lo, addr_hi, maddr_old, maddr_next;
++ uint64_t addr_lo, addr_hi, maddr_next;
+ u32 entry;
+- bool_t need_flush = 0;
++ bool need_flush = false, old_present;
+
+ maddr_next = (u64)next_mfn << PAGE_SHIFT;
+
+- addr_hi = get_field_from_reg_u32(pde[1],
+- IOMMU_PTE_ADDR_HIGH_MASK,
+- IOMMU_PTE_ADDR_HIGH_SHIFT);
+- addr_lo = get_field_from_reg_u32(pde[0],
+- IOMMU_PTE_ADDR_LOW_MASK,
+- IOMMU_PTE_ADDR_LOW_SHIFT);
+-
+- maddr_old = (addr_hi << 32) | (addr_lo << PAGE_SHIFT);
+-
+- if ( maddr_old != maddr_next )
+- need_flush = 1;
++ old_present = get_field_from_reg_u32(pde[0], IOMMU_PTE_PRESENT_MASK,
++ IOMMU_PTE_PRESENT_SHIFT);
++ if ( old_present )
++ {
++ bool old_r, old_w;
++ unsigned int old_level;
++ uint64_t maddr_old;
++
++ addr_hi = get_field_from_reg_u32(pde[1],
++ IOMMU_PTE_ADDR_HIGH_MASK,
++ IOMMU_PTE_ADDR_HIGH_SHIFT);
++ addr_lo = get_field_from_reg_u32(pde[0],
++ IOMMU_PTE_ADDR_LOW_MASK,
++ IOMMU_PTE_ADDR_LOW_SHIFT);
++ old_level = get_field_from_reg_u32(pde[0],
++ IOMMU_PDE_NEXT_LEVEL_MASK,
++ IOMMU_PDE_NEXT_LEVEL_SHIFT);
++ old_w = get_field_from_reg_u32(pde[1],
++ IOMMU_PTE_IO_WRITE_PERMISSION_MASK,
++ IOMMU_PTE_IO_WRITE_PERMISSION_SHIFT);
++ old_r = get_field_from_reg_u32(pde[1],
++ IOMMU_PTE_IO_READ_PERMISSION_MASK,
++ IOMMU_PTE_IO_READ_PERMISSION_SHIFT);
++
++ maddr_old = (addr_hi << 32) | (addr_lo << PAGE_SHIFT);
++
++ if ( maddr_old != maddr_next || iw != old_w || ir != old_r ||
++ old_level != next_level )
++ need_flush = true;
++ }
+
+ addr_lo = maddr_next & DMA_32BIT_MASK;
+ addr_hi = maddr_next >> 32;
+@@ -687,10 +706,7 @@ int amd_iommu_map_page(struct domain *d,
+ if ( !need_flush )
+ goto out;
+
+- /* 4K mapping for PV guests never changes,
+- * no need to flush if we trust non-present bits */
+- if ( is_hvm_domain(d) )
+- amd_iommu_flush_pages(d, gfn, 0);
++ amd_iommu_flush_pages(d, gfn, 0);
+
+ for ( merge_level = IOMMU_PAGING_MODE_LEVEL_2;
+ merge_level <= hd->arch.paging_mode; merge_level++ )
Added: head/emulators/xen-kernel411/files/xsa275-4.11-2.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/xsa275-4.11-2.patch Tue Nov 20 14:17:07 2018 (r485430)
@@ -0,0 +1,68 @@
+From: Jan Beulich <jbeulich at suse.com>
+Subject: AMD/IOMMU: suppress PTE merging after initial table creation
+
+The logic is not fit for this purpose, so simply disable its use until
+it can be fixed / replaced. Note that this re-enables merging for the
+table creation case, which was disabled as a (perhaps unintended) side
+effect of the earlier "amd/iommu: fix flush checks". It relies on no
+page getting mapped more than once (with different properties) in this
+process, as that would still be beyond what the merging logic can cope
+with. But arch_iommu_populate_page_table() guarantees this afaict.
+
+This is part of XSA-275.
+
+Reported-by: Paul Durrant <paul.durrant at citrix.com>
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+
+--- a/xen/drivers/passthrough/amd/iommu_map.c
++++ b/xen/drivers/passthrough/amd/iommu_map.c
+@@ -702,11 +702,24 @@ int amd_iommu_map_page(struct domain *d,
+ !!(flags & IOMMUF_writable),
+ !!(flags & IOMMUF_readable));
+
+- /* Do not increase pde count if io mapping has not been changed */
+- if ( !need_flush )
+- goto out;
++ if ( need_flush )
++ {
++ amd_iommu_flush_pages(d, gfn, 0);
++ /* No further merging, as the logic doesn't cope. */
++ hd->arch.no_merge = true;
++ }
+
+- amd_iommu_flush_pages(d, gfn, 0);
++ /*
++ * Suppress merging of non-R/W mappings or after initial table creation,
++ * as the merge logic does not cope with this.
++ */
++ if ( hd->arch.no_merge || flags != (IOMMUF_writable | IOMMUF_readable) )
++ goto out;
++ if ( d->creation_finished )
++ {
++ hd->arch.no_merge = true;
++ goto out;
++ }
+
+ for ( merge_level = IOMMU_PAGING_MODE_LEVEL_2;
+ merge_level <= hd->arch.paging_mode; merge_level++ )
+@@ -780,6 +793,10 @@ int amd_iommu_unmap_page(struct domain *
+
+ /* mark PTE as 'page not present' */
+ clear_iommu_pte_present(pt_mfn[1], gfn);
++
++ /* No further merging in amd_iommu_map_page(), as the logic doesn't cope. */
++ hd->arch.no_merge = true;
++
+ spin_unlock(&hd->arch.mapping_lock);
+
+ amd_iommu_flush_pages(d, gfn, 0);
+--- a/xen/include/asm-x86/iommu.h
++++ b/xen/include/asm-x86/iommu.h
+@@ -40,6 +40,7 @@ struct arch_iommu
+
+ /* amd iommu support */
+ int paging_mode;
++ bool no_merge;
+ struct page_info *root_table;
+ struct guest_iommu *g_iommu;
+ };
Added: head/emulators/xen-kernel411/files/xsa277.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/xsa277.patch Tue Nov 20 14:17:07 2018 (r485430)
@@ -0,0 +1,47 @@
+From: Andrew Cooper <andrew.cooper3 at citrix.com>
+Subject: x86/mm: Put the gfn on all paths after get_gfn_query()
+
+c/s 7867181b2 "x86/PoD: correctly handle non-order-0 decrease-reservation
+requests" introduced an early exit in guest_remove_page() for unexpected p2m
+types. However, get_gfn_query() internally takes the p2m lock, and must be
+matched with a put_gfn() call later.
+
+Fix the erroneous comment beside the declaration of get_gfn_query().
+
+This is XSA-277.
+
+Reported-by: Paul Durrant <paul.durrant at citrix.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3 at citrix.com>
+
+diff --git a/xen/common/memory.c b/xen/common/memory.c
+index 987395f..26b7123 100644
+--- a/xen/common/memory.c
++++ b/xen/common/memory.c
+@@ -305,7 +305,11 @@ int guest_remove_page(struct domain *d, unsigned long gmfn)
+ #ifdef CONFIG_X86
+ mfn = get_gfn_query(d, gmfn, &p2mt);
+ if ( unlikely(p2mt == p2m_invalid) || unlikely(p2mt == p2m_mmio_dm) )
++ {
++ put_gfn(d, gmfn);
++
+ return -ENOENT;
++ }
+
+ if ( unlikely(p2m_is_paging(p2mt)) )
+ {
+diff --git a/xen/include/asm-x86/p2m.h b/xen/include/asm-x86/p2m.h
+index ac33f50..6d849a5 100644
+--- a/xen/include/asm-x86/p2m.h
++++ b/xen/include/asm-x86/p2m.h
+@@ -448,10 +448,7 @@ static inline mfn_t __nonnull(3) get_gfn_type(
+ return get_gfn_type_access(p2m_get_hostp2m(d), gfn, t, &a, q, NULL);
+ }
+
+-/* Syntactic sugar: most callers will use one of these.
+- * N.B. get_gfn_query() is the _only_ one guaranteed not to take the
+- * p2m lock; none of the others can be called with the p2m or paging
+- * lock held. */
++/* Syntactic sugar: most callers will use one of these. */
+ #define get_gfn(d, g, t) get_gfn_type((d), (g), (t), P2M_ALLOC)
+ #define get_gfn_query(d, g, t) get_gfn_type((d), (g), (t), 0)
+ #define get_gfn_unshare(d, g, t) get_gfn_type((d), (g), (t), \
Added: head/emulators/xen-kernel411/files/xsa279.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/xsa279.patch Tue Nov 20 14:17:07 2018 (r485430)
@@ -0,0 +1,37 @@
+From: Andrew Cooper <andrew.cooper3 at citrix.com>
+Subject: x86/mm: Don't perform flush after failing to update a guests L1e
+
+If the L1e update hasn't occured, the flush cannot do anything useful. This
+skips the potentially expensive vcpumask_to_pcpumask() conversion, and
+broadcast TLB shootdown.
+
+More importantly however, we might be in the error path due to a bad va
+parameter from the guest, and this should not propagate into the TLB flushing
+logic. The INVPCID instruction for example raises #GP for a non-canonical
+address.
+
+This is XSA-279.
+
+Reported-by: Matthew Daley <mattd at bugfuzz.com>
+Signed-off-by: Andrew Cooper <andrew.cooper3 at citrix.com>
+Reviewed-by: Jan Beulich <jbeulich at suse.com>
+
+diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c
+index 703f330..75663c6 100644
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -4155,6 +4155,14 @@ static int __do_update_va_mapping(
+ if ( pl1e )
+ unmap_domain_page(pl1e);
+
++ /*
++ * Any error at this point means that we haven't change the L1e. Skip the
++ * flush, as it won't do anything useful. Furthermore, va is guest
++ * controlled and not necesserily audited by this point.
++ */
++ if ( rc )
++ return rc;
++
+ switch ( flags & UVMF_FLUSHTYPE_MASK )
+ {
+ case UVMF_TLB_FLUSH:
Added: head/emulators/xen-kernel411/files/xsa280-1.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/xsa280-1.patch Tue Nov 20 14:17:07 2018 (r485430)
@@ -0,0 +1,116 @@
+From: Jan Beulich <jbeulich at suse.com>
+Subject: x86/shadow: move OOS flag bit positions
+
+In preparation of reducing struct page_info's shadow_flags field to 16
+bits, lower the bit positions used for SHF_out_of_sync and
+SHF_oos_may_write.
+
+Instead of also adjusting the open coded use in _get_page_type(),
+introduce shadow_prepare_page_type_change() to contain knowledge of the
+bit positions to shadow code.
+
+This is part of XSA-280.
+
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Reviewed-by: Tim Deegan <tim at xen.org>
+---
+v2: Rename function and pass full type.
+
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -2712,17 +2712,8 @@ static int _get_page_type(struct page_in
+ {
+ struct domain *d = page_get_owner(page);
+
+- /*
+- * Normally we should never let a page go from type count 0
+- * to type count 1 when it is shadowed. One exception:
+- * out-of-sync shadowed pages are allowed to become
+- * writeable.
+- */
+- if ( d && shadow_mode_enabled(d)
+- && (page->count_info & PGC_page_table)
+- && !((page->shadow_flags & (1u<<29))
+- && type == PGT_writable_page) )
+- shadow_remove_all_shadows(d, page_to_mfn(page));
++ if ( d && shadow_mode_enabled(d) )
++ shadow_prepare_page_type_change(d, page, type);
+
+ ASSERT(!(x & PGT_pae_xen_l2));
+ if ( (x & PGT_type_mask) != type )
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -749,6 +749,9 @@ int sh_unsync(struct vcpu *v, mfn_t gmfn
+ || !v->domain->arch.paging.shadow.oos_active )
+ return 0;
+
++ BUILD_BUG_ON(!(typeof(pg->shadow_flags))SHF_out_of_sync);
++ BUILD_BUG_ON(!(typeof(pg->shadow_flags))SHF_oos_may_write);
++
+ pg->shadow_flags |= SHF_out_of_sync|SHF_oos_may_write;
+ oos_hash_add(v, gmfn);
+ perfc_incr(shadow_unsync);
+@@ -2413,6 +2416,26 @@ void sh_remove_shadows(struct domain *d,
+ paging_unlock(d);
+ }
+
++void shadow_prepare_page_type_change(struct domain *d, struct page_info *page,
++ unsigned long new_type)
++{
++ if ( !(page->count_info & PGC_page_table) )
++ return;
++
++#if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC)
++ /*
++ * Normally we should never let a page go from type count 0 to type
++ * count 1 when it is shadowed. One exception: out-of-sync shadowed
++ * pages are allowed to become writeable.
++ */
++ if ( (page->shadow_flags & SHF_oos_may_write) &&
++ new_type == PGT_writable_page )
++ return;
++#endif
++
++ shadow_remove_all_shadows(d, page_to_mfn(page));
++}
++
+ static void
+ sh_remove_all_shadows_and_parents(struct domain *d, mfn_t gmfn)
+ /* Even harsher: this is a HVM page that we thing is no longer a pagetable.
+--- a/xen/arch/x86/mm/shadow/private.h
++++ b/xen/arch/x86/mm/shadow/private.h
+@@ -285,8 +285,8 @@ static inline void sh_terminate_list(str
+ * codepath is called during that time and is sensitive to oos issues, it may
+ * need to use the second flag.
+ */
+-#define SHF_out_of_sync (1u<<30)
+-#define SHF_oos_may_write (1u<<29)
++#define SHF_out_of_sync (1u << (SH_type_max_shadow + 1))
++#define SHF_oos_may_write (1u << (SH_type_max_shadow + 2))
+
+ #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
+
+--- a/xen/include/asm-x86/shadow.h
++++ b/xen/include/asm-x86/shadow.h
+@@ -81,6 +81,10 @@ void shadow_final_teardown(struct domain
+
+ void sh_remove_shadows(struct domain *d, mfn_t gmfn, int fast, int all);
+
++/* Adjust shadows ready for a guest page to change its type. */
++void shadow_prepare_page_type_change(struct domain *d, struct page_info *page,
++ unsigned long new_type);
++
+ /* Discard _all_ mappings from the domain's shadows. */
+ void shadow_blow_tables_per_domain(struct domain *d);
+
+@@ -105,6 +109,10 @@ int shadow_set_allocation(struct domain
+ static inline void sh_remove_shadows(struct domain *d, mfn_t gmfn,
+ int fast, int all) {}
+
++static inline void shadow_prepare_page_type_change(struct domain *d,
++ struct page_info *page,
++ unsigned long new_type) {}
++
+ static inline void shadow_blow_tables_per_domain(struct domain *d) {}
+
+ static inline int shadow_domctl(struct domain *d,
Added: head/emulators/xen-kernel411/files/xsa280-4.11-2.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/xsa280-4.11-2.patch Tue Nov 20 14:17:07 2018 (r485430)
@@ -0,0 +1,141 @@
+From: Jan Beulich <jbeulich at suse.com>
+Subject: x86/shadow: shrink struct page_info's shadow_flags to 16 bits
+
+This is to avoid it overlapping the linear_pt_count field needed for PV
+domains. Introduce a separate, HVM-only pagetable_dying field to replace
+the sole one left in the upper 16 bits.
+
+Note that the accesses to ->shadow_flags in shadow_{pro,de}mote() get
+switched to non-atomic, non-bitops operations, as {test,set,clear}_bit()
+are not allowed on uint16_t fields and hence their use would have
+required ugly casts. This is fine because all updates of the field ought
+to occur with the paging lock held, and other updates of it use |= and
+&= as well (i.e. using atomic operations here didn't really guard
+against potentially racing updates elsewhere).
+
+This is part of XSA-280.
+
+Reported-by: Prgmr.com Security <security at prgmr.com>
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Reviewed-by: Tim Deegan <tim at xen.org>
+
+--- a/xen/arch/x86/mm/shadow/common.c
++++ b/xen/arch/x86/mm/shadow/common.c
+@@ -1028,10 +1028,14 @@ void shadow_promote(struct domain *d, mf
+
+ /* Is the page already shadowed? */
+ if ( !test_and_set_bit(_PGC_page_table, &page->count_info) )
++ {
+ page->shadow_flags = 0;
++ if ( is_hvm_domain(d) )
++ page->pagetable_dying = false;
++ }
+
+- ASSERT(!test_bit(type, &page->shadow_flags));
+- set_bit(type, &page->shadow_flags);
++ ASSERT(!(page->shadow_flags & (1u << type)));
++ page->shadow_flags |= 1u << type;
+ TRACE_SHADOW_PATH_FLAG(TRCE_SFLAG_PROMOTE);
+ }
+
+@@ -1040,9 +1044,9 @@ void shadow_demote(struct domain *d, mfn
+ struct page_info *page = mfn_to_page(gmfn);
+
+ ASSERT(test_bit(_PGC_page_table, &page->count_info));
+- ASSERT(test_bit(type, &page->shadow_flags));
++ ASSERT(page->shadow_flags & (1u << type));
+
+- clear_bit(type, &page->shadow_flags);
++ page->shadow_flags &= ~(1u << type);
+
+ if ( (page->shadow_flags & SHF_page_type_mask) == 0 )
+ {
+@@ -2921,7 +2925,7 @@ void sh_remove_shadows(struct domain *d,
+ if ( !fast && all && (pg->count_info & PGC_page_table) )
+ {
+ SHADOW_ERROR("can't find all shadows of mfn %"PRI_mfn" "
+- "(shadow_flags=%08x)\n",
++ "(shadow_flags=%04x)\n",
+ mfn_x(gmfn), pg->shadow_flags);
+ domain_crash(d);
+ }
+--- a/xen/arch/x86/mm/shadow/multi.c
++++ b/xen/arch/x86/mm/shadow/multi.c
+@@ -3299,8 +3299,8 @@ static int sh_page_fault(struct vcpu *v,
+
+ /* Unshadow if we are writing to a toplevel pagetable that is
+ * flagged as a dying process, and that is not currently used. */
+- if ( sh_mfn_is_a_page_table(gmfn)
+- && (mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying) )
++ if ( sh_mfn_is_a_page_table(gmfn) && is_hvm_domain(d) &&
++ mfn_to_page(gmfn)->pagetable_dying )
+ {
+ int used = 0;
+ struct vcpu *tmp;
+@@ -4254,9 +4254,9 @@ int sh_rm_write_access_from_sl1p(struct
+ ASSERT(mfn_valid(smfn));
+
+ /* Remember if we've been told that this process is being torn down */
+- if ( curr->domain == d )
++ if ( curr->domain == d && is_hvm_domain(d) )
+ curr->arch.paging.shadow.pagetable_dying
+- = !!(mfn_to_page(gmfn)->shadow_flags & SHF_pagetable_dying);
++ = mfn_to_page(gmfn)->pagetable_dying;
+
+ sp = mfn_to_page(smfn);
+
+@@ -4572,10 +4572,10 @@ static void sh_pagetable_dying(struct vc
+ : shadow_hash_lookup(d, mfn_x(gmfn), SH_type_l2_pae_shadow);
+ }
+
+- if ( mfn_valid(smfn) )
++ if ( mfn_valid(smfn) && is_hvm_domain(d) )
+ {
+ gmfn = _mfn(mfn_to_page(smfn)->v.sh.back);
+- mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying;
++ mfn_to_page(gmfn)->pagetable_dying = true;
+ shadow_unhook_mappings(d, smfn, 1/* user pages only */);
+ flush = 1;
+ }
+@@ -4612,9 +4612,9 @@ static void sh_pagetable_dying(struct vc
+ smfn = shadow_hash_lookup(d, mfn_x(gmfn), SH_type_l4_64_shadow);
+ #endif
+
+- if ( mfn_valid(smfn) )
++ if ( mfn_valid(smfn) && is_hvm_domain(d) )
+ {
+- mfn_to_page(gmfn)->shadow_flags |= SHF_pagetable_dying;
++ mfn_to_page(gmfn)->pagetable_dying = true;
+ shadow_unhook_mappings(d, smfn, 1/* user pages only */);
+ /* Now flush the TLB: we removed toplevel mappings. */
+ flush_tlb_mask(d->dirty_cpumask);
+--- a/xen/arch/x86/mm/shadow/private.h
++++ b/xen/arch/x86/mm/shadow/private.h
+@@ -292,8 +292,6 @@ static inline void sh_terminate_list(str
+
+ #endif /* (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) */
+
+-#define SHF_pagetable_dying (1u<<31)
+-
+ static inline int sh_page_has_multiple_shadows(struct page_info *pg)
+ {
+ u32 shadows;
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -259,8 +259,15 @@ struct page_info
+ * Guest pages with a shadow. This does not conflict with
+ * tlbflush_timestamp since page table pages are explicitly not
+ * tracked for TLB-flush avoidance when a guest runs in shadow mode.
++ *
++ * pagetable_dying is used for HVM domains only. The layout here has
++ * to avoid re-use of the space used by linear_pt_count, which (only)
++ * PV guests use.
+ */
+- u32 shadow_flags;
++ struct {
++ uint16_t shadow_flags;
++ bool pagetable_dying;
++ };
+
+ /* When in use as a shadow, next shadow in this hash chain. */
+ __pdx_t next_shadow;
Added: head/emulators/xen-kernel411/files/xsa282-2.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/xsa282-2.patch Tue Nov 20 14:17:07 2018 (r485430)
@@ -0,0 +1,42 @@
+From: Jan Beulich <jbeulich at suse.com>
+Subject: x86: work around HLE host lockup erratum
+
+XACQUIRE prefixed accesses to the 4Mb range of memory starting at 1Gb
+are liable to lock up the processor. Disallow use of this memory range.
+
+Unfortunately the available Core Gen7 and Gen8 spec updates are pretty
+old, so I can only guess that they're similarly affected when Core Gen6
+is and the Xeon counterparts are, too.
+
+This is part of XSA-282.
+
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3 at citrix.com>
+---
+v2: Don't apply the workaround when running ourselves virtualized.
+
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -5853,6 +5853,22 @@ const struct platform_bad_page *__init g
+ { .mfn = 0x20138000 >> PAGE_SHIFT },
+ { .mfn = 0x40004000 >> PAGE_SHIFT },
+ };
++ static const struct platform_bad_page __initconst hle_bad_page = {
++ .mfn = 0x40000000 >> PAGE_SHIFT, .order = 10
++ };
++
++ switch ( cpuid_eax(1) & 0x000f3ff0 )
++ {
++ case 0x000406e0: /* erratum SKL167 */
++ case 0x00050650: /* erratum SKZ63 */
++ case 0x000506e0: /* errata SKL167 / SKW159 */
++ case 0x000806e0: /* erratum KBL??? */
++ case 0x000906e0: /* errata KBL??? / KBW114 / CFW103 */
++ *array_size = (cpuid_eax(0) >= 7 &&
++ !(cpuid_ecx(1) & cpufeat_mask(X86_FEATURE_HYPERVISOR)) &&
++ (cpuid_count_ebx(7, 0) & cpufeat_mask(X86_FEATURE_HLE)));
++ return &hle_bad_page;
++ }
+
+ *array_size = ARRAY_SIZE(snb_bad_pages);
+ igd_id = pci_conf_read32(0, 0, 2, 0, 0);
Added: head/emulators/xen-kernel411/files/xsa282-4.11-1.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/xsa282-4.11-1.patch Tue Nov 20 14:17:07 2018 (r485430)
@@ -0,0 +1,147 @@
+From: Jan Beulich <jbeulich at suse.com>
+Subject: x86: extend get_platform_badpages() interface
+
+Use a structure so along with an address (now frame number) an order can
+also be specified.
+
+This is part of XSA-282.
+
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3 at citrix.com>
+
+--- a/xen/arch/x86/guest/xen.c
++++ b/xen/arch/x86/guest/xen.c
+@@ -40,7 +40,7 @@ bool __read_mostly xen_guest;
+ static __read_mostly uint32_t xen_cpuid_base;
+ extern char hypercall_page[];
+ static struct rangeset *mem;
+-static unsigned long __initdata reserved_pages[2];
++static struct platform_bad_page __initdata reserved_pages[2];
+
+ DEFINE_PER_CPU(unsigned int, vcpu_id);
+
+@@ -326,7 +326,7 @@ void __init hypervisor_fixup_e820(struct
+ panic("Unable to get " #p); \
+ mark_pfn_as_ram(e820, pfn); \
+ ASSERT(i < ARRAY_SIZE(reserved_pages)); \
+- reserved_pages[i++] = pfn << PAGE_SHIFT; \
++ reserved_pages[i++].mfn = pfn; \
+ })
+ MARK_PARAM_RAM(HVM_PARAM_STORE_PFN);
+ if ( !pv_console )
+@@ -334,7 +334,7 @@ void __init hypervisor_fixup_e820(struct
+ #undef MARK_PARAM_RAM
+ }
+
+-const unsigned long *__init hypervisor_reserved_pages(unsigned int *size)
++const struct platform_bad_page *__init hypervisor_reserved_pages(unsigned int *size)
+ {
+ ASSERT(xen_guest);
+
+--- a/xen/arch/x86/mm.c
++++ b/xen/arch/x86/mm.c
+@@ -5768,23 +5768,23 @@ void arch_dump_shared_mem_info(void)
+ mem_sharing_get_nr_saved_mfns());
+ }
+
+-const unsigned long *__init get_platform_badpages(unsigned int *array_size)
++const struct platform_bad_page *__init get_platform_badpages(unsigned int *array_size)
+ {
+ u32 igd_id;
+- static unsigned long __initdata bad_pages[] = {
+- 0x20050000,
+- 0x20110000,
+- 0x20130000,
+- 0x20138000,
+- 0x40004000,
++ static const struct platform_bad_page __initconst snb_bad_pages[] = {
++ { .mfn = 0x20050000 >> PAGE_SHIFT },
++ { .mfn = 0x20110000 >> PAGE_SHIFT },
++ { .mfn = 0x20130000 >> PAGE_SHIFT },
++ { .mfn = 0x20138000 >> PAGE_SHIFT },
++ { .mfn = 0x40004000 >> PAGE_SHIFT },
+ };
+
+- *array_size = ARRAY_SIZE(bad_pages);
++ *array_size = ARRAY_SIZE(snb_bad_pages);
+ igd_id = pci_conf_read32(0, 0, 2, 0, 0);
+- if ( !IS_SNB_GFX(igd_id) )
+- return NULL;
++ if ( IS_SNB_GFX(igd_id) )
++ return snb_bad_pages;
+
+- return bad_pages;
++ return NULL;
+ }
+
+ void paging_invlpg(struct vcpu *v, unsigned long va)
+--- a/xen/common/page_alloc.c
++++ b/xen/common/page_alloc.c
+@@ -270,7 +270,7 @@ void __init init_boot_pages(paddr_t ps,
+ unsigned long bad_spfn, bad_epfn;
+ const char *p;
+ #ifdef CONFIG_X86
+- const unsigned long *badpage = NULL;
++ const struct platform_bad_page *badpage;
+ unsigned int i, array_size;
+
+ BUILD_BUG_ON(8 * sizeof(frame_table->u.free.first_dirty) <
+@@ -299,8 +299,8 @@ void __init init_boot_pages(paddr_t ps,
+ {
+ for ( i = 0; i < array_size; i++ )
+ {
+- bootmem_region_zap(*badpage >> PAGE_SHIFT,
+- (*badpage >> PAGE_SHIFT) + 1);
++ bootmem_region_zap(badpage->mfn,
++ badpage->mfn + (1U << badpage->order));
+ badpage++;
+ }
+ }
+@@ -312,8 +312,8 @@ void __init init_boot_pages(paddr_t ps,
+ {
+ for ( i = 0; i < array_size; i++ )
+ {
+- bootmem_region_zap(*badpage >> PAGE_SHIFT,
+- (*badpage >> PAGE_SHIFT) + 1);
++ bootmem_region_zap(badpage->mfn,
++ badpage->mfn + (1U << badpage->order));
+ badpage++;
+ }
+ }
+--- a/xen/include/asm-x86/guest/xen.h
++++ b/xen/include/asm-x86/guest/xen.h
+@@ -37,7 +37,7 @@ void hypervisor_ap_setup(void);
+ int hypervisor_alloc_unused_page(mfn_t *mfn);
+ int hypervisor_free_unused_page(mfn_t mfn);
+ void hypervisor_fixup_e820(struct e820map *e820);
+-const unsigned long *hypervisor_reserved_pages(unsigned int *size);
++const struct platform_bad_page *hypervisor_reserved_pages(unsigned int *size);
+ uint32_t hypervisor_cpuid_base(void);
+ void hypervisor_resume(void);
+
+@@ -65,7 +65,7 @@ static inline void hypervisor_fixup_e820
+ ASSERT_UNREACHABLE();
+ }
+
+-static inline const unsigned long *hypervisor_reserved_pages(unsigned int *size)
++static inline const struct platform_bad_page *hypervisor_reserved_pages(unsigned int *size)
+ {
+ ASSERT_UNREACHABLE();
+ return NULL;
+--- a/xen/include/asm-x86/mm.h
++++ b/xen/include/asm-x86/mm.h
+@@ -348,7 +348,13 @@ void zap_ro_mpt(mfn_t mfn);
+
+ bool is_iomem_page(mfn_t mfn);
+
+-const unsigned long *get_platform_badpages(unsigned int *array_size);
++struct platform_bad_page {
++ unsigned long mfn;
++ unsigned int order;
++};
++
++const struct platform_bad_page *get_platform_badpages(unsigned int *array_size);
++
+ /* Per page locks:
+ * page_lock() is used for two purposes: pte serialization, and memory sharing.
+ *
More information about the svn-ports-head
mailing list