svn commit: r477316 - in head: emulators/xen-kernel411 emulators/xen-kernel411/files sysutils/xen-tools411 sysutils/xen-tools411/files
Roger Pau Monné
royger at FreeBSD.org
Thu Aug 16 09:02:05 UTC 2018
Author: royger (src committer)
Date: Thu Aug 16 09:02:02 2018
New Revision: 477316
URL: https://svnweb.freebsd.org/changeset/ports/477316
Log:
xen411: apply fixes for XSA-269, XSA-272 and XSA-273
Added:
head/emulators/xen-kernel411/files/0001-xen-Port-the-array_index_nospec-infrastructure-from-.patch (contents, props changed)
head/emulators/xen-kernel411/files/0002-x86-correctly-set-nonlazy_xstate_used-when-loading-f.patch (contents, props changed)
head/emulators/xen-kernel411/files/0003-x86-spec-ctrl-command-line-handling-adjustments.patch (contents, props changed)
head/emulators/xen-kernel411/files/0005-mm-page_alloc-correct-first_dirty-calculations-durin.patch (contents, props changed)
head/emulators/xen-kernel411/files/0006-allow-cpu_down-to-be-called-earlier.patch (contents, props changed)
head/emulators/xen-kernel411/files/0007-x86-svm-Fixes-and-cleanup-to-svm_inject_event.patch (contents, props changed)
head/emulators/xen-kernel411/files/0008-cpupools-fix-state-when-downing-a-CPU-failed.patch (contents, props changed)
head/emulators/xen-kernel411/files/0009-x86-AMD-distinguish-compute-units-from-hyper-threads.patch (contents, props changed)
head/emulators/xen-kernel411/files/0010-x86-distinguish-CPU-offlining-from-CPU-removal.patch (contents, props changed)
head/emulators/xen-kernel411/files/0011-x86-possibly-bring-up-all-CPUs-even-if-not-all-are-s.patch (contents, props changed)
head/emulators/xen-kernel411/files/0012-x86-command-line-option-to-avoid-use-of-secondary-hy.patch (contents, props changed)
head/emulators/xen-kernel411/files/0013-x86-vmx-Don-t-clobber-dr6-while-debugging-state-is-l.patch (contents, props changed)
head/emulators/xen-kernel411/files/0014-x86-xstate-Use-a-guests-CPUID-policy-rather-than-all.patch (contents, props changed)
head/emulators/xen-kernel411/files/0015-x86-xstate-Make-errors-in-xstate-calculations-more-o.patch (contents, props changed)
head/emulators/xen-kernel411/files/0016-x86-hvm-Disallow-unknown-MSR_EFER-bits.patch (contents, props changed)
head/emulators/xen-kernel411/files/0017-x86-spec-ctrl-Fix-the-parsing-of-xpti-on-fixed-Intel.patch (contents, props changed)
head/emulators/xen-kernel411/files/0018-x86-spec-ctrl-Yet-more-fixes-for-xpti-parsing.patch (contents, props changed)
head/emulators/xen-kernel411/files/0019-x86-vmx-Fix-handing-of-MSR_DEBUGCTL-on-VMExit.patch (contents, props changed)
head/emulators/xen-kernel411/files/0020-x86-vmx-Defer-vmx_vmcs_exit-as-long-as-possible-in-c.patch (contents, props changed)
head/emulators/xen-kernel411/files/0021-x86-vmx-API-improvements-for-MSR-load-save-infrastru.patch (contents, props changed)
head/emulators/xen-kernel411/files/0022-x86-vmx-Internal-cleanup-for-MSR-load-save-infrastru.patch (contents, props changed)
head/emulators/xen-kernel411/files/0023-x86-vmx-Factor-locate_msr_entry-out-of-vmx_find_msr-.patch (contents, props changed)
head/emulators/xen-kernel411/files/0024-x86-vmx-Support-remote-access-to-the-MSR-lists.patch (contents, props changed)
head/emulators/xen-kernel411/files/0025-x86-vmx-Improvements-to-LBR-MSR-handling.patch (contents, props changed)
head/emulators/xen-kernel411/files/0026-x86-vmx-Pass-an-MSR-value-into-vmx_msr_add.patch (contents, props changed)
head/emulators/xen-kernel411/files/0027-x86-vmx-Support-load-only-guest-MSR-list-entries.patch (contents, props changed)
head/emulators/xen-kernel411/files/0028-VMX-fix-vmx_-find-del-_msr-build.patch (contents, props changed)
head/emulators/xen-kernel411/files/0029-ARM-disable-grant-table-v2.patch (contents, props changed)
head/emulators/xen-kernel411/files/0030-x86-vtx-Fix-the-checking-for-unknown-invalid-MSR_DEB.patch (contents, props changed)
head/emulators/xen-kernel411/files/0032-x86-spec-ctrl-Calculate-safe-PTE-addresses-for-L1TF-.patch (contents, props changed)
head/emulators/xen-kernel411/files/0033-x86-spec-ctrl-Introduce-an-option-to-control-L1TF-mi.patch (contents, props changed)
head/emulators/xen-kernel411/files/0034-x86-shadow-Infrastructure-to-force-a-PV-guest-into-s.patch (contents, props changed)
head/emulators/xen-kernel411/files/0035-x86-mm-Plumbing-to-allow-any-PTE-update-to-fail-with.patch (contents, props changed)
head/emulators/xen-kernel411/files/0036-x86-pv-Force-a-guest-into-shadow-mode-when-it-writes.patch (contents, props changed)
head/emulators/xen-kernel411/files/0037-x86-spec-ctrl-CPUID-MSR-definitions-for-L1D_FLUSH.patch (contents, props changed)
head/emulators/xen-kernel411/files/0038-x86-msr-Virtualise-MSR_FLUSH_CMD-for-guests.patch (contents, props changed)
head/emulators/xen-kernel411/files/0039-x86-spec-ctrl-Introduce-an-option-to-control-L1D_FLU.patch (contents, props changed)
head/emulators/xen-kernel411/files/0040-x86-Make-spec-ctrl-no-a-global-disable-of-all-mitiga.patch (contents, props changed)
head/emulators/xen-kernel411/files/0042-x86-write-to-correct-variable-in-parse_pv_l1tf.patch (contents, props changed)
head/sysutils/xen-tools411/files/0031-tools-oxenstored-Make-evaluation-order-explicit.patch (contents, props changed)
head/sysutils/xen-tools411/files/0041-xl.conf-Add-global-affinity-masks.patch (contents, props changed)
Modified:
head/emulators/xen-kernel411/Makefile
head/sysutils/xen-tools411/Makefile
Modified: head/emulators/xen-kernel411/Makefile
==============================================================================
--- head/emulators/xen-kernel411/Makefile Thu Aug 16 08:56:17 2018 (r477315)
+++ head/emulators/xen-kernel411/Makefile Thu Aug 16 09:02:02 2018 (r477316)
@@ -2,7 +2,7 @@
PORTNAME= xen
PORTVERSION= 4.11.0
-PORTREVISION= 0
+PORTREVISION= 1
CATEGORIES= emulators
MASTER_SITES= http://downloads.xenproject.org/release/xen/${PORTVERSION}/
PKGNAMESUFFIX= -kernel411
@@ -47,6 +47,49 @@ EXTRA_PATCHES+= ${FILESDIR}/0001-x86-replace-usage-in-
${FILESDIR}/0002-x86-efi-split-compiler-vs-linker-support.patch:-p1
# Fix PVH Dom0 build with shadow paging
EXTRA_PATCHES+= ${FILESDIR}/0001-x86-pvh-change-the-order-of-the-iommu-initialization.patch:-p1
+# XSA-269 (MSR_DEBUGCTL handling) and XSA-273 (L1TF)
+# Note that due to the high value of patches needed to fix L1TF the package is
+# brought up to the state of the staging-4.11 branch. This can be removed when
+# 4.11.1 is released.
+EXTRA_PATCHES+= ${FILESDIR}/0001-xen-Port-the-array_index_nospec-infrastructure-from-.patch:-p1 \
+ ${FILESDIR}/0002-x86-correctly-set-nonlazy_xstate_used-when-loading-f.patch:-p1 \
+ ${FILESDIR}/0003-x86-spec-ctrl-command-line-handling-adjustments.patch:-p1 \
+ ${FILESDIR}/0005-mm-page_alloc-correct-first_dirty-calculations-durin.patch:-p1 \
+ ${FILESDIR}/0006-allow-cpu_down-to-be-called-earlier.patch:-p1 \
+ ${FILESDIR}/0007-x86-svm-Fixes-and-cleanup-to-svm_inject_event.patch:-p1 \
+ ${FILESDIR}/0008-cpupools-fix-state-when-downing-a-CPU-failed.patch:-p1 \
+ ${FILESDIR}/0009-x86-AMD-distinguish-compute-units-from-hyper-threads.patch:-p1 \
+ ${FILESDIR}/0010-x86-distinguish-CPU-offlining-from-CPU-removal.patch:-p1 \
+ ${FILESDIR}/0011-x86-possibly-bring-up-all-CPUs-even-if-not-all-are-s.patch:-p1 \
+ ${FILESDIR}/0012-x86-command-line-option-to-avoid-use-of-secondary-hy.patch:-p1 \
+ ${FILESDIR}/0013-x86-vmx-Don-t-clobber-dr6-while-debugging-state-is-l.patch:-p1 \
+ ${FILESDIR}/0014-x86-xstate-Use-a-guests-CPUID-policy-rather-than-all.patch:-p1 \
+ ${FILESDIR}/0015-x86-xstate-Make-errors-in-xstate-calculations-more-o.patch:-p1 \
+ ${FILESDIR}/0016-x86-hvm-Disallow-unknown-MSR_EFER-bits.patch:-p1 \
+ ${FILESDIR}/0017-x86-spec-ctrl-Fix-the-parsing-of-xpti-on-fixed-Intel.patch:-p1 \
+ ${FILESDIR}/0018-x86-spec-ctrl-Yet-more-fixes-for-xpti-parsing.patch:-p1 \
+ ${FILESDIR}/0019-x86-vmx-Fix-handing-of-MSR_DEBUGCTL-on-VMExit.patch:-p1 \
+ ${FILESDIR}/0020-x86-vmx-Defer-vmx_vmcs_exit-as-long-as-possible-in-c.patch:-p1 \
+ ${FILESDIR}/0021-x86-vmx-API-improvements-for-MSR-load-save-infrastru.patch:-p1 \
+ ${FILESDIR}/0022-x86-vmx-Internal-cleanup-for-MSR-load-save-infrastru.patch:-p1 \
+ ${FILESDIR}/0023-x86-vmx-Factor-locate_msr_entry-out-of-vmx_find_msr-.patch:-p1 \
+ ${FILESDIR}/0024-x86-vmx-Support-remote-access-to-the-MSR-lists.patch:-p1 \
+ ${FILESDIR}/0025-x86-vmx-Improvements-to-LBR-MSR-handling.patch:-p1 \
+ ${FILESDIR}/0026-x86-vmx-Pass-an-MSR-value-into-vmx_msr_add.patch:-p1 \
+ ${FILESDIR}/0027-x86-vmx-Support-load-only-guest-MSR-list-entries.patch:-p1 \
+ ${FILESDIR}/0028-VMX-fix-vmx_-find-del-_msr-build.patch:-p1 \
+ ${FILESDIR}/0029-ARM-disable-grant-table-v2.patch:-p1 \
+ ${FILESDIR}/0030-x86-vtx-Fix-the-checking-for-unknown-invalid-MSR_DEB.patch:-p1 \
+ ${FILESDIR}/0032-x86-spec-ctrl-Calculate-safe-PTE-addresses-for-L1TF-.patch:-p1 \
+ ${FILESDIR}/0033-x86-spec-ctrl-Introduce-an-option-to-control-L1TF-mi.patch:-p1 \
+ ${FILESDIR}/0034-x86-shadow-Infrastructure-to-force-a-PV-guest-into-s.patch:-p1 \
+ ${FILESDIR}/0035-x86-mm-Plumbing-to-allow-any-PTE-update-to-fail-with.patch:-p1 \
+ ${FILESDIR}/0036-x86-pv-Force-a-guest-into-shadow-mode-when-it-writes.patch:-p1 \
+ ${FILESDIR}/0037-x86-spec-ctrl-CPUID-MSR-definitions-for-L1D_FLUSH.patch:-p1 \
+ ${FILESDIR}/0038-x86-msr-Virtualise-MSR_FLUSH_CMD-for-guests.patch:-p1 \
+ ${FILESDIR}/0039-x86-spec-ctrl-Introduce-an-option-to-control-L1D_FLU.patch:-p1 \
+ ${FILESDIR}/0040-x86-Make-spec-ctrl-no-a-global-disable-of-all-mitiga.patch:-p1 \
+ ${FILESDIR}/0042-x86-write-to-correct-variable-in-parse_pv_l1tf.patch:-p1
.include <bsd.port.options.mk>
Added: head/emulators/xen-kernel411/files/0001-xen-Port-the-array_index_nospec-infrastructure-from-.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0001-xen-Port-the-array_index_nospec-infrastructure-from-.patch Thu Aug 16 09:02:02 2018 (r477316)
@@ -0,0 +1,213 @@
+From e932371d6ae0f69b89abb2dce725483c75356de2 Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3 at citrix.com>
+Date: Mon, 30 Jul 2018 11:17:27 +0200
+Subject: [PATCH 01/42] xen: Port the array_index_nospec() infrastructure from
+ Linux
+
+This is as the infrastructure appeared in Linux 4.17, adapted slightly for
+Xen.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3 at citrix.com>
+Signed-off-by: Julien Grall <julien.grall at arm.com>
+Acked-by: Jan Beulich <jbeulich at suse.com>
+master commit: 2ddfae51d8b1d7b8cd33a4f6ad4d16d27cb869ae
+master date: 2018-07-06 16:49:57 +0100
+---
+ xen/include/asm-arm/arm32/system.h | 18 ++++++++
+ xen/include/asm-arm/arm64/system.h | 22 ++++++++++
+ xen/include/asm-x86/system.h | 24 ++++++++++
+ xen/include/xen/compiler.h | 3 ++
+ xen/include/xen/nospec.h | 70 ++++++++++++++++++++++++++++++
+ 5 files changed, 137 insertions(+)
+ create mode 100644 xen/include/xen/nospec.h
+
+diff --git a/xen/include/asm-arm/arm32/system.h b/xen/include/asm-arm/arm32/system.h
+index c617b40438..ab57abfbc5 100644
+--- a/xen/include/asm-arm/arm32/system.h
++++ b/xen/include/asm-arm/arm32/system.h
+@@ -48,6 +48,24 @@ static inline int local_fiq_is_enabled(void)
+ return !(flags & PSR_FIQ_MASK);
+ }
+
++#define CSDB ".inst 0xe320f014"
++
++static inline unsigned long array_index_mask_nospec(unsigned long idx,
++ unsigned long sz)
++{
++ unsigned long mask;
++
++ asm volatile( "cmp %1, %2\n"
++ "sbc %0, %1, %1\n"
++ CSDB
++ : "=r" (mask)
++ : "r" (idx), "Ir" (sz)
++ : "cc" );
++
++ return mask;
++}
++#define array_index_mask_nospec array_index_mask_nospec
++
+ #endif
+ /*
+ * Local variables:
+diff --git a/xen/include/asm-arm/arm64/system.h b/xen/include/asm-arm/arm64/system.h
+index 2e2ee212a1..2e36573ac6 100644
+--- a/xen/include/asm-arm/arm64/system.h
++++ b/xen/include/asm-arm/arm64/system.h
+@@ -58,6 +58,28 @@ static inline int local_fiq_is_enabled(void)
+ return !(flags & PSR_FIQ_MASK);
+ }
+
++#define csdb() asm volatile ( "hint #20" : : : "memory" )
++
++/*
++ * Generate a mask for array_index__nospec() that is ~0UL when 0 <= idx < sz
++ * and 0 otherwise.
++ */
++static inline unsigned long array_index_mask_nospec(unsigned long idx,
++ unsigned long sz)
++{
++ unsigned long mask;
++
++ asm volatile ( "cmp %1, %2\n"
++ "sbc %0, xzr, xzr\n"
++ : "=r" (mask)
++ : "r" (idx), "Ir" (sz)
++ : "cc" );
++ csdb();
++
++ return mask;
++}
++#define array_index_mask_nospec array_index_mask_nospec
++
+ #endif
+ /*
+ * Local variables:
+diff --git a/xen/include/asm-x86/system.h b/xen/include/asm-x86/system.h
+index 43fb6fe489..483cd20afd 100644
+--- a/xen/include/asm-x86/system.h
++++ b/xen/include/asm-x86/system.h
+@@ -221,6 +221,30 @@ static always_inline unsigned long __xadd(
+ #define set_mb(var, value) do { xchg(&var, value); } while (0)
+ #define set_wmb(var, value) do { var = value; smp_wmb(); } while (0)
+
++/**
++ * array_index_mask_nospec() - generate a mask that is ~0UL when the
++ * bounds check succeeds and 0 otherwise
++ * @index: array element index
++ * @size: number of elements in array
++ *
++ * Returns:
++ * 0 - (index < size)
++ */
++static inline unsigned long array_index_mask_nospec(unsigned long index,
++ unsigned long size)
++{
++ unsigned long mask;
++
++ asm volatile ( "cmp %[size], %[index]; sbb %[mask], %[mask];"
++ : [mask] "=r" (mask)
++ : [size] "g" (size), [index] "r" (index) );
++
++ return mask;
++}
++
++/* Override default implementation in nospec.h. */
++#define array_index_mask_nospec array_index_mask_nospec
++
+ #define local_irq_disable() asm volatile ( "cli" : : : "memory" )
+ #define local_irq_enable() asm volatile ( "sti" : : : "memory" )
+
+diff --git a/xen/include/xen/compiler.h b/xen/include/xen/compiler.h
+index 533a8ea0f3..a7e05681c9 100644
+--- a/xen/include/xen/compiler.h
++++ b/xen/include/xen/compiler.h
+@@ -81,6 +81,9 @@
+ #pragma GCC visibility push(hidden)
+ #endif
+
++/* Make the optimizer believe the variable can be manipulated arbitrarily. */
++#define OPTIMIZER_HIDE_VAR(var) __asm__ ( "" : "+g" (var) )
++
+ /* This macro obfuscates arithmetic on a variable address so that gcc
+ shouldn't recognize the original var, and make assumptions about it */
+ /*
+diff --git a/xen/include/xen/nospec.h b/xen/include/xen/nospec.h
+new file mode 100644
+index 0000000000..48793996e8
+--- /dev/null
++++ b/xen/include/xen/nospec.h
+@@ -0,0 +1,70 @@
++/* SPDX-License-Identifier: GPL-2.0 */
++/* Copyright(c) 2018 Linus Torvalds. All rights reserved. */
++/* Copyright(c) 2018 Alexei Starovoitov. All rights reserved. */
++/* Copyright(c) 2018 Intel Corporation. All rights reserved. */
++/* Copyright(c) 2018 Citrix Systems R&D Ltd. All rights reserved. */
++
++#ifndef XEN_NOSPEC_H
++#define XEN_NOSPEC_H
++
++#include <asm/system.h>
++
++/**
++ * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise
++ * @index: array element index
++ * @size: number of elements in array
++ *
++ * When @index is out of bounds (@index >= @size), the sign bit will be
++ * set. Extend the sign bit to all bits and invert, giving a result of
++ * zero for an out of bounds index, or ~0 if within bounds [0, @size).
++ */
++#ifndef array_index_mask_nospec
++static inline unsigned long array_index_mask_nospec(unsigned long index,
++ unsigned long size)
++{
++ /*
++ * Always calculate and emit the mask even if the compiler
++ * thinks the mask is not needed. The compiler does not take
++ * into account the value of @index under speculation.
++ */
++ OPTIMIZER_HIDE_VAR(index);
++ return ~(long)(index | (size - 1UL - index)) >> (BITS_PER_LONG - 1);
++}
++#endif
++
++/*
++ * array_index_nospec - sanitize an array index after a bounds check
++ *
++ * For a code sequence like:
++ *
++ * if (index < size) {
++ * index = array_index_nospec(index, size);
++ * val = array[index];
++ * }
++ *
++ * ...if the CPU speculates past the bounds check then
++ * array_index_nospec() will clamp the index within the range of [0,
++ * size).
++ */
++#define array_index_nospec(index, size) \
++({ \
++ typeof(index) _i = (index); \
++ typeof(size) _s = (size); \
++ unsigned long _mask = array_index_mask_nospec(_i, _s); \
++ \
++ BUILD_BUG_ON(sizeof(_i) > sizeof(long)); \
++ BUILD_BUG_ON(sizeof(_s) > sizeof(long)); \
++ \
++ (typeof(_i)) (_i & _mask); \
++})
++
++#endif /* XEN_NOSPEC_H */
++
++/*
++ * Local variables:
++ * mode: C
++ * c-file-style: "BSD"
++ * c-basic-offset: 4
++ * indent-tabs-mode: nil
++ * End:
++ */
+--
+2.18.0
+
Added: head/emulators/xen-kernel411/files/0002-x86-correctly-set-nonlazy_xstate_used-when-loading-f.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0002-x86-correctly-set-nonlazy_xstate_used-when-loading-f.patch Thu Aug 16 09:02:02 2018 (r477316)
@@ -0,0 +1,51 @@
+From da33530ab393dcc04d3e35424956277669b8d8ce Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich at suse.com>
+Date: Mon, 30 Jul 2018 11:18:54 +0200
+Subject: [PATCH 02/42] x86: correctly set nonlazy_xstate_used when loading
+ full state
+
+In this case, just like xcr0_accum, nonlazy_xstate_used should always be
+set to the intended new value, rather than possibly leaving the flag set
+from a prior state load.
+
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Reviewed-by: Wei Liu <wei.liu2 at citrix.com>
+Acked-by: Andrew Cooper <andrew.cooper3 at citrix.com>
+master commit: f46bf0e101ca63118b9db2616e8f51e972d7f563
+master date: 2018-07-09 10:51:02 +0200
+---
+ xen/arch/x86/domctl.c | 3 +--
+ xen/arch/x86/hvm/hvm.c | 3 +--
+ 2 files changed, 2 insertions(+), 4 deletions(-)
+
+diff --git a/xen/arch/x86/domctl.c b/xen/arch/x86/domctl.c
+index 8fbbf3aeb3..b04388d663 100644
+--- a/xen/arch/x86/domctl.c
++++ b/xen/arch/x86/domctl.c
+@@ -1187,8 +1187,7 @@ long arch_do_domctl(
+ vcpu_pause(v);
+ v->arch.xcr0 = _xcr0;
+ v->arch.xcr0_accum = _xcr0_accum;
+- if ( _xcr0_accum & XSTATE_NONLAZY )
+- v->arch.nonlazy_xstate_used = 1;
++ v->arch.nonlazy_xstate_used = _xcr0_accum & XSTATE_NONLAZY;
+ compress_xsave_states(v, _xsave_area,
+ evc->size - PV_XSAVE_HDR_SIZE);
+ vcpu_unpause(v);
+diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c
+index c23983cdff..279cb88e45 100644
+--- a/xen/arch/x86/hvm/hvm.c
++++ b/xen/arch/x86/hvm/hvm.c
+@@ -1324,8 +1324,7 @@ static int hvm_load_cpu_xsave_states(struct domain *d, hvm_domain_context_t *h)
+
+ v->arch.xcr0 = ctxt->xcr0;
+ v->arch.xcr0_accum = ctxt->xcr0_accum;
+- if ( ctxt->xcr0_accum & XSTATE_NONLAZY )
+- v->arch.nonlazy_xstate_used = 1;
++ v->arch.nonlazy_xstate_used = ctxt->xcr0_accum & XSTATE_NONLAZY;
+ compress_xsave_states(v, &ctxt->save_area,
+ size - offsetof(struct hvm_hw_cpu_xsave, save_area));
+
+--
+2.18.0
+
Added: head/emulators/xen-kernel411/files/0003-x86-spec-ctrl-command-line-handling-adjustments.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0003-x86-spec-ctrl-command-line-handling-adjustments.patch Thu Aug 16 09:02:02 2018 (r477316)
@@ -0,0 +1,45 @@
+From 4bdeedbd611c59f07878eb22955f655a81452835 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich at suse.com>
+Date: Mon, 30 Jul 2018 11:19:41 +0200
+Subject: [PATCH 03/42] x86/spec-ctrl: command line handling adjustments
+
+For one, "no-xen" should not imply "no-eager-fpu", as "eager FPU" mode
+is to guard guests, not Xen itself, which is also expressed so by
+print_details().
+
+And then opt_ssbd, despite being off by default, should also be cleared
+by the "no" and "no-xen" sub-options.
+
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3 at citrix.com>
+master commit: ac3f9a72141a48d40fabfff561d5a7dc0e1b810d
+master date: 2018-07-10 12:22:31 +0200
+---
+ xen/arch/x86/spec_ctrl.c | 4 +++-
+ 1 file changed, 3 insertions(+), 1 deletion(-)
+
+diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c
+index 08e6784c4c..73dc7170c7 100644
+--- a/xen/arch/x86/spec_ctrl.c
++++ b/xen/arch/x86/spec_ctrl.c
+@@ -124,6 +124,8 @@ static int __init parse_spec_ctrl(const char *s)
+ opt_msr_sc_pv = false;
+ opt_msr_sc_hvm = false;
+
++ opt_eager_fpu = 0;
++
+ disable_common:
+ opt_rsb_pv = false;
+ opt_rsb_hvm = false;
+@@ -131,7 +133,7 @@ static int __init parse_spec_ctrl(const char *s)
+ opt_thunk = THUNK_JMP;
+ opt_ibrs = 0;
+ opt_ibpb = false;
+- opt_eager_fpu = 0;
++ opt_ssbd = false;
+ }
+ else if ( val > 0 )
+ rc = -EINVAL;
+--
+2.18.0
+
Added: head/emulators/xen-kernel411/files/0005-mm-page_alloc-correct-first_dirty-calculations-durin.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0005-mm-page_alloc-correct-first_dirty-calculations-durin.patch Thu Aug 16 09:02:02 2018 (r477316)
@@ -0,0 +1,66 @@
+From ac35e050b64a565fe234dd42e8dac163e946e58d Mon Sep 17 00:00:00 2001
+From: Sergey Dyasli <sergey.dyasli at citrix.com>
+Date: Mon, 30 Jul 2018 11:21:28 +0200
+Subject: [PATCH 05/42] mm/page_alloc: correct first_dirty calculations during
+ block merging
+
+Currently it's possible to hit an assertion in alloc_heap_pages():
+
+Assertion 'first_dirty != INVALID_DIRTY_IDX || !(pg[i].count_info & PGC_need_scrub)' failed at page_alloc.c:988
+
+This can happen because a piece of logic to calculate first_dirty
+during block merging in free_heap_pages() is missing for the following
+scenario:
+
+1. Current block's first_dirty equals to INVALID_DIRTY_IDX
+2. Successor block is free but its first_dirty != INVALID_DIRTY_IDX
+3. The successor is merged into current block
+4. Current block's first_dirty still equals to INVALID_DIRTY_IDX
+
+This will trigger the assertion during allocation of such block in
+alloc_heap_pages() because there will be pages with PGC_need_scrub
+bit set despite the claim of first_dirty that the block is scrubbed.
+
+Add the missing piece of logic and slightly update the comment for
+the predecessor case to better capture the code's intent.
+
+Fixes 1a37f33ea613 ("mm: Place unscrubbed pages at the end of pagelist")
+
+Signed-off-by: Sergey Dyasli <sergey.dyasli at citrix.com>
+Reviewed-by: Jan Beulich <jbeulich at suse.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky at oracle.com>
+master commit: 1e2df9608857b5355f2ec3b1a34b87a2007dcd16
+master date: 2018-07-12 10:45:11 +0200
+---
+ xen/common/page_alloc.c | 8 +++++++-
+ 1 file changed, 7 insertions(+), 1 deletion(-)
+
+diff --git a/xen/common/page_alloc.c b/xen/common/page_alloc.c
+index 20ee1e4897..02aeed7c47 100644
+--- a/xen/common/page_alloc.c
++++ b/xen/common/page_alloc.c
+@@ -1426,7 +1426,7 @@ static void free_heap_pages(
+
+ page_list_del(predecessor, &heap(node, zone, order));
+
+- /* Keep predecessor's first_dirty if it is already set. */
++ /* Update predecessor's first_dirty if necessary. */
+ if ( predecessor->u.free.first_dirty == INVALID_DIRTY_IDX &&
+ pg->u.free.first_dirty != INVALID_DIRTY_IDX )
+ predecessor->u.free.first_dirty = (1U << order) +
+@@ -1447,6 +1447,12 @@ static void free_heap_pages(
+
+ check_and_stop_scrub(successor);
+
++ /* Update pg's first_dirty if necessary. */
++ if ( pg->u.free.first_dirty == INVALID_DIRTY_IDX &&
++ successor->u.free.first_dirty != INVALID_DIRTY_IDX )
++ pg->u.free.first_dirty = (1U << order) +
++ successor->u.free.first_dirty;
++
+ page_list_del(successor, &heap(node, zone, order));
+ }
+
+--
+2.18.0
+
Added: head/emulators/xen-kernel411/files/0006-allow-cpu_down-to-be-called-earlier.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0006-allow-cpu_down-to-be-called-earlier.patch Thu Aug 16 09:02:02 2018 (r477316)
@@ -0,0 +1,58 @@
+From a44cf0c8728e08858638170a057675ca5479fdc7 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich at suse.com>
+Date: Mon, 30 Jul 2018 11:22:06 +0200
+Subject: [PATCH 06/42] allow cpu_down() to be called earlier
+
+The function's use of the stop-machine logic has so far prevented its
+use ahead of the processing of the "ordinary" initcalls. Since at this
+early time we're in a controlled environment anyway, there's no need for
+such a heavy tool. Additionally this ought to have less of a performance
+impact especially on large systems, compared to the alternative of
+making stop-machine functionality available earlier.
+
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Reviewed-by: Wei Liu <wei.liu2 at citrix.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3 at citrix.com>
+master commit: 5894c0a2da66243a89088d309c7e1ea212ab28d6
+master date: 2018-07-16 15:15:12 +0200
+---
+ xen/common/cpu.c | 11 +++++++++--
+ 1 file changed, 9 insertions(+), 2 deletions(-)
+
+diff --git a/xen/common/cpu.c b/xen/common/cpu.c
+index 6350f150bd..653a56b840 100644
+--- a/xen/common/cpu.c
++++ b/xen/common/cpu.c
+@@ -67,12 +67,17 @@ void __init register_cpu_notifier(struct notifier_block *nb)
+ spin_unlock(&cpu_add_remove_lock);
+ }
+
+-static int take_cpu_down(void *unused)
++static void _take_cpu_down(void *unused)
+ {
+ void *hcpu = (void *)(long)smp_processor_id();
+ int notifier_rc = notifier_call_chain(&cpu_chain, CPU_DYING, hcpu, NULL);
+ BUG_ON(notifier_rc != NOTIFY_DONE);
+ __cpu_disable();
++}
++
++static int take_cpu_down(void *arg)
++{
++ _take_cpu_down(arg);
+ return 0;
+ }
+
+@@ -98,7 +103,9 @@ int cpu_down(unsigned int cpu)
+ goto fail;
+ }
+
+- if ( (err = stop_machine_run(take_cpu_down, NULL, cpu)) < 0 )
++ if ( unlikely(system_state < SYS_STATE_active) )
++ on_selected_cpus(cpumask_of(cpu), _take_cpu_down, NULL, true);
++ else if ( (err = stop_machine_run(take_cpu_down, NULL, cpu)) < 0 )
+ goto fail;
+
+ __cpu_die(cpu);
+--
+2.18.0
+
Added: head/emulators/xen-kernel411/files/0007-x86-svm-Fixes-and-cleanup-to-svm_inject_event.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0007-x86-svm-Fixes-and-cleanup-to-svm_inject_event.patch Thu Aug 16 09:02:02 2018 (r477316)
@@ -0,0 +1,109 @@
+From b53e0defcea1400c03f83d1d5cc30a3b237c8cfe Mon Sep 17 00:00:00 2001
+From: Andrew Cooper <andrew.cooper3 at citrix.com>
+Date: Mon, 30 Jul 2018 11:22:42 +0200
+Subject: [PATCH 07/42] x86/svm Fixes and cleanup to svm_inject_event()
+
+ * State adjustments (and debug tracing) for #DB/#BP/#PF should not be done
+ for `int $n` instructions. Updates to %cr2 occur even if the exception
+ combines to #DF.
+ * Don't opencode DR_STEP when updating %dr6.
+ * Simplify the logic for calling svm_emul_swint_injection() as in the common
+ case, every condition needs checking.
+ * Fix comments which have become stale as code has moved between components.
+
+Signed-off-by: Andrew Cooper <andrew.cooper3 at citrix.com>
+Reviewed-by: Jan Beulich <jbeulich at suse.com>
+Reviewed-by: Boris Ostrovsky <boris.ostrovsky at oracle.com>
+master commit: 8dab867c81ede455009028a9a88edc4ff3b9da88
+master date: 2018-07-17 10:12:40 +0100
+---
+ xen/arch/x86/hvm/svm/svm.c | 41 ++++++++++++++++----------------------
+ 1 file changed, 17 insertions(+), 24 deletions(-)
+
+diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c
+index 165500e3f2..b964c59dad 100644
+--- a/xen/arch/x86/hvm/svm/svm.c
++++ b/xen/arch/x86/hvm/svm/svm.c
+@@ -1432,24 +1432,18 @@ static void svm_inject_event(const struct x86_event *event)
+ * Xen must emulate enough of the event injection to be sure that a
+ * further fault shouldn't occur during delivery. This covers the fact
+ * that hardware doesn't perform DPL checking on injection.
+- *
+- * Also, it accounts for proper positioning of %rip for an event with trap
+- * semantics (where %rip should point after the instruction) which suffers
+- * a fault during injection (at which point %rip should point at the
+- * instruction).
+ */
+ if ( event->type == X86_EVENTTYPE_PRI_SW_EXCEPTION ||
+- (!cpu_has_svm_nrips && (event->type == X86_EVENTTYPE_SW_INTERRUPT ||
+- event->type == X86_EVENTTYPE_SW_EXCEPTION)) )
++ (!cpu_has_svm_nrips && (event->type >= X86_EVENTTYPE_SW_INTERRUPT)) )
+ svm_emul_swint_injection(&_event);
+
+- switch ( _event.vector )
++ switch ( _event.vector | -(_event.type == X86_EVENTTYPE_SW_INTERRUPT) )
+ {
+ case TRAP_debug:
+ if ( regs->eflags & X86_EFLAGS_TF )
+ {
+ __restore_debug_registers(vmcb, curr);
+- vmcb_set_dr6(vmcb, vmcb_get_dr6(vmcb) | 0x4000);
++ vmcb_set_dr6(vmcb, vmcb_get_dr6(vmcb) | DR_STEP);
+ }
+ /* fall through */
+ case TRAP_int3:
+@@ -1459,6 +1453,13 @@ static void svm_inject_event(const struct x86_event *event)
+ domain_pause_for_debugger();
+ return;
+ }
++ break;
++
++ case TRAP_page_fault:
++ ASSERT(_event.type == X86_EVENTTYPE_HW_EXCEPTION);
++ curr->arch.hvm_vcpu.guest_cr[2] = _event.cr2;
++ vmcb_set_cr2(vmcb, _event.cr2);
++ break;
+ }
+
+ if ( unlikely(eventinj.fields.v) &&
+@@ -1481,13 +1482,9 @@ static void svm_inject_event(const struct x86_event *event)
+ * icebp, software events with trap semantics need emulating, so %rip in
+ * the trap frame points after the instruction.
+ *
+- * The x86 emulator (if requested by the x86_swint_emulate_* choice) will
+- * have performed checks such as presence/dpl/etc and believes that the
+- * event injection will succeed without faulting.
+- *
+- * The x86 emulator will always provide fault semantics for software
+- * events, with _trap.insn_len set appropriately. If the injection
+- * requires emulation, move %rip forwards at this point.
++ * svm_emul_swint_injection() has already confirmed that events with trap
++ * semantics won't fault on injection. Position %rip/NextRIP suitably,
++ * and restrict the event type to what hardware will tolerate.
+ */
+ switch ( _event.type )
+ {
+@@ -1544,16 +1541,12 @@ static void svm_inject_event(const struct x86_event *event)
+ eventinj.fields.errorcode == (uint16_t)eventinj.fields.errorcode);
+ vmcb->eventinj = eventinj;
+
+- if ( _event.vector == TRAP_page_fault )
+- {
+- curr->arch.hvm_vcpu.guest_cr[2] = _event.cr2;
+- vmcb_set_cr2(vmcb, _event.cr2);
+- HVMTRACE_LONG_2D(PF_INJECT, _event.error_code, TRC_PAR_LONG(_event.cr2));
+- }
++ if ( _event.vector == TRAP_page_fault &&
++ _event.type == X86_EVENTTYPE_HW_EXCEPTION )
++ HVMTRACE_LONG_2D(PF_INJECT, _event.error_code,
++ TRC_PAR_LONG(_event.cr2));
+ else
+- {
+ HVMTRACE_2D(INJ_EXC, _event.vector, _event.error_code);
+- }
+ }
+
+ static int svm_event_pending(struct vcpu *v)
+--
+2.18.0
+
Added: head/emulators/xen-kernel411/files/0008-cpupools-fix-state-when-downing-a-CPU-failed.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0008-cpupools-fix-state-when-downing-a-CPU-failed.patch Thu Aug 16 09:02:02 2018 (r477316)
@@ -0,0 +1,55 @@
+From 0a2016ca2fabfe674c311dcfd8e15fec0ba3f7b6 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich at suse.com>
+Date: Mon, 30 Jul 2018 11:23:22 +0200
+Subject: [PATCH 08/42] cpupools: fix state when downing a CPU failed
+
+While I've run into the issue with further patches in place which no
+longer guarantee the per-CPU area to start out as all zeros, the
+CPU_DOWN_FAILED processing looks to have the same issue: By not zapping
+the per-CPU cpupool pointer, cpupool_cpu_add()'s (indirect) invocation
+of schedule_cpu_switch() will trigger the "c != old_pool" assertion
+there.
+
+Clearing the field during CPU_DOWN_PREPARE is too early (afaict this
+should not happen before cpu_disable_scheduler()). Clearing it in
+CPU_DEAD and CPU_DOWN_FAILED would be an option, but would take the same
+piece of code twice. Since the field's value shouldn't matter while the
+CPU is offline, simply clear it (implicitly) for CPU_ONLINE and
+CPU_DOWN_FAILED, but only for other than the suspend/resume case (which
+gets specially handled in cpupool_cpu_remove()).
+
+By adjusting the conditional in cpupool_cpu_add() CPU_DOWN_FAILED
+handling in the suspend case should now also be handled better.
+
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Reviewed-by: Juergen Gross <jgross at suse.com>
+master commit: cb1ae9a27819cea0c5008773c68a7be6f37eb0e5
+master date: 2018-07-19 09:41:55 +0200
+---
+ xen/common/cpupool.c | 3 ++-
+ 1 file changed, 2 insertions(+), 1 deletion(-)
+
+diff --git a/xen/common/cpupool.c b/xen/common/cpupool.c
+index 999839444e..1e8edcbd57 100644
+--- a/xen/common/cpupool.c
++++ b/xen/common/cpupool.c
+@@ -490,7 +490,7 @@ static int cpupool_cpu_add(unsigned int cpu)
+ cpumask_clear_cpu(cpu, &cpupool_locked_cpus);
+ cpumask_set_cpu(cpu, &cpupool_free_cpus);
+
+- if ( system_state == SYS_STATE_resume )
++ if ( system_state == SYS_STATE_suspend || system_state == SYS_STATE_resume )
+ {
+ struct cpupool **c;
+
+@@ -522,6 +522,7 @@ static int cpupool_cpu_add(unsigned int cpu)
+ * (or unplugging would have failed) and that is the default behavior
+ * anyway.
+ */
++ per_cpu(cpupool, cpu) = NULL;
+ ret = cpupool_assign_cpu_locked(cpupool0, cpu);
+ }
+ out:
+--
+2.18.0
+
Added: head/emulators/xen-kernel411/files/0009-x86-AMD-distinguish-compute-units-from-hyper-threads.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0009-x86-AMD-distinguish-compute-units-from-hyper-threads.patch Thu Aug 16 09:02:02 2018 (r477316)
@@ -0,0 +1,121 @@
+From bd51a6424202a5f1cd13dee6614bcb69ecbd2458 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich at suse.com>
+Date: Mon, 30 Jul 2018 11:24:01 +0200
+Subject: [PATCH 09/42] x86/AMD: distinguish compute units from hyper-threads
+MIME-Version: 1.0
+Content-Type: text/plain; charset=UTF-8
+Content-Transfer-Encoding: 8bit
+
+Fam17 replaces CUs by HTs, which we should reflect accordingly, even if
+the difference is not very big. The most relevant change (requiring some
+code restructuring) is that the topoext feature no longer means there is
+a valid CU ID.
+
+Take the opportunity and convert wrongly plain int variables in
+set_cpu_sibling_map() to unsigned int.
+
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Reviewed-by: Brian Woods <brian.woods at amd.com>
+Reviewed-by: Roger Pau Monné <roger.pau at citrix.com>
+Acked-by: Andrew Cooper <andrew.cooper3 at citrix.com>
+master commit: 9429b07a0af7f92a5f25e4068e11db881e157495
+master date: 2018-07-19 09:42:42 +0200
+---
+ xen/arch/x86/cpu/amd.c | 16 +++++++++++-----
+ xen/arch/x86/smpboot.c | 32 ++++++++++++++++++++------------
+ 2 files changed, 31 insertions(+), 17 deletions(-)
+
+diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c
+index 458a3fe60c..76078b55b2 100644
+--- a/xen/arch/x86/cpu/amd.c
++++ b/xen/arch/x86/cpu/amd.c
+@@ -505,17 +505,23 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
+ u32 eax, ebx, ecx, edx;
+
+ cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
+- c->compute_unit_id = ebx & 0xFF;
+ c->x86_num_siblings = ((ebx >> 8) & 0x3) + 1;
++
++ if (c->x86 < 0x17)
++ c->compute_unit_id = ebx & 0xFF;
++ else {
++ c->cpu_core_id = ebx & 0xFF;
++ c->x86_max_cores /= c->x86_num_siblings;
++ }
+ }
+
+ if (opt_cpu_info)
+ printk("CPU %d(%d) -> Processor %d, %s %d\n",
+ cpu, c->x86_max_cores, c->phys_proc_id,
+- cpu_has(c, X86_FEATURE_TOPOEXT) ? "Compute Unit" :
+- "Core",
+- cpu_has(c, X86_FEATURE_TOPOEXT) ? c->compute_unit_id :
+- c->cpu_core_id);
++ c->compute_unit_id != INVALID_CUID ? "Compute Unit"
++ : "Core",
++ c->compute_unit_id != INVALID_CUID ? c->compute_unit_id
++ : c->cpu_core_id);
+ }
+
+ static void early_init_amd(struct cpuinfo_x86 *c)
+diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
+index d4478e6132..78ba73578a 100644
+--- a/xen/arch/x86/smpboot.c
++++ b/xen/arch/x86/smpboot.c
+@@ -234,33 +234,41 @@ static void link_thread_siblings(int cpu1, int cpu2)
+ cpumask_set_cpu(cpu2, per_cpu(cpu_core_mask, cpu1));
+ }
+
+-static void set_cpu_sibling_map(int cpu)
++static void set_cpu_sibling_map(unsigned int cpu)
+ {
+- int i;
++ unsigned int i;
+ struct cpuinfo_x86 *c = cpu_data;
+
+ cpumask_set_cpu(cpu, &cpu_sibling_setup_map);
+
+ cpumask_set_cpu(cpu, socket_cpumask[cpu_to_socket(cpu)]);
++ cpumask_set_cpu(cpu, per_cpu(cpu_core_mask, cpu));
++ cpumask_set_cpu(cpu, per_cpu(cpu_sibling_mask, cpu));
+
+ if ( c[cpu].x86_num_siblings > 1 )
+ {
+ for_each_cpu ( i, &cpu_sibling_setup_map )
+ {
+- if ( cpu_has(c, X86_FEATURE_TOPOEXT) ) {
+- if ( (c[cpu].phys_proc_id == c[i].phys_proc_id) &&
+- (c[cpu].compute_unit_id == c[i].compute_unit_id) )
++ if ( cpu == i || c[cpu].phys_proc_id != c[i].phys_proc_id )
++ continue;
++ if ( c[cpu].compute_unit_id != INVALID_CUID &&
++ c[i].compute_unit_id != INVALID_CUID )
++ {
++ if ( c[cpu].compute_unit_id == c[i].compute_unit_id )
++ link_thread_siblings(cpu, i);
++ }
++ else if ( c[cpu].cpu_core_id != XEN_INVALID_CORE_ID &&
++ c[i].cpu_core_id != XEN_INVALID_CORE_ID )
++ {
++ if ( c[cpu].cpu_core_id == c[i].cpu_core_id )
+ link_thread_siblings(cpu, i);
+- } else if ( (c[cpu].phys_proc_id == c[i].phys_proc_id) &&
+- (c[cpu].cpu_core_id == c[i].cpu_core_id) ) {
+- link_thread_siblings(cpu, i);
+ }
++ else
++ printk(XENLOG_WARNING
++ "CPU%u: unclear relationship with CPU%u\n",
++ cpu, i);
+ }
+ }
+- else
+- {
+- cpumask_set_cpu(cpu, per_cpu(cpu_sibling_mask, cpu));
+- }
+
+ if ( c[cpu].x86_max_cores == 1 )
+ {
+--
+2.18.0
+
Added: head/emulators/xen-kernel411/files/0010-x86-distinguish-CPU-offlining-from-CPU-removal.patch
==============================================================================
--- /dev/null 00:00:00 1970 (empty, because file is newly added)
+++ head/emulators/xen-kernel411/files/0010-x86-distinguish-CPU-offlining-from-CPU-removal.patch Thu Aug 16 09:02:02 2018 (r477316)
@@ -0,0 +1,423 @@
+From 5908b4866b682d9189c36eddf7c898fd95b27ec1 Mon Sep 17 00:00:00 2001
+From: Jan Beulich <jbeulich at suse.com>
+Date: Mon, 30 Jul 2018 11:24:53 +0200
+Subject: [PATCH 10/42] x86: distinguish CPU offlining from CPU removal
+
+In order to be able to service #MC on offlined CPUs, the GDT, IDT,
+stack, and per-CPU data (which includes the TSS) need to be kept
+allocated. They should only be freed upon CPU removal (which we
+currently don't support, so some code is becoming effectively dead for
+the moment).
+
+Note that for now park_offline_cpus doesn't get set to true anywhere -
+this is going to be the subject of a subsequent patch.
+
+Signed-off-by: Jan Beulich <jbeulich at suse.com>
+Reviewed-by: Wei Liu <wei.liu2 at citrix.com>
+Reviewed-by: Andrew Cooper <andrew.cooper3 at citrix.com>
+master commit: 2e6c8f182c9c50129b1c7a620242861e6ad6a9fb
+master date: 2018-07-19 13:43:33 +0100
+---
+ xen/arch/x86/cpu/mcheck/mce.c | 15 ++++++--
+ xen/arch/x86/domain.c | 9 +++--
+ xen/arch/x86/genapic/x2apic.c | 9 +++--
+ xen/arch/x86/percpu.c | 9 +++--
+ xen/arch/x86/smpboot.c | 71 ++++++++++++++++++++++-------------
+ xen/include/asm-x86/smp.h | 2 +
+ xen/include/xen/cpu.h | 2 +
+ xen/include/xen/cpumask.h | 23 ++++++++++++
+ xen/include/xen/mm.h | 8 ++++
+ xen/include/xen/xmalloc.h | 6 +++
+ 10 files changed, 115 insertions(+), 39 deletions(-)
+
+diff --git a/xen/arch/x86/cpu/mcheck/mce.c b/xen/arch/x86/cpu/mcheck/mce.c
+index a8c287d124..32273d9208 100644
+--- a/xen/arch/x86/cpu/mcheck/mce.c
++++ b/xen/arch/x86/cpu/mcheck/mce.c
+@@ -692,12 +692,15 @@ static void cpu_bank_free(unsigned int cpu)
+
+ mcabanks_free(poll);
+ mcabanks_free(clr);
++
++ per_cpu(poll_bankmask, cpu) = NULL;
++ per_cpu(mce_clear_banks, cpu) = NULL;
+ }
+
+ static int cpu_bank_alloc(unsigned int cpu)
+ {
+- struct mca_banks *poll = mcabanks_alloc();
+- struct mca_banks *clr = mcabanks_alloc();
++ struct mca_banks *poll = per_cpu(poll_bankmask, cpu) ?: mcabanks_alloc();
++ struct mca_banks *clr = per_cpu(mce_clear_banks, cpu) ?: mcabanks_alloc();
+
+ if ( !poll || !clr )
+ {
+@@ -725,7 +728,13 @@ static int cpu_callback(
+
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+- cpu_bank_free(cpu);
++ if ( !park_offline_cpus )
++ cpu_bank_free(cpu);
++ break;
++
++ case CPU_REMOVE:
++ if ( park_offline_cpus )
++ cpu_bank_free(cpu);
+ break;
+ }
+
+diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c
+index 9850a782ec..c39cf2c6e5 100644
+--- a/xen/arch/x86/domain.c
++++ b/xen/arch/x86/domain.c
+@@ -107,10 +107,11 @@ static void play_dead(void)
+ local_irq_disable();
+
+ /*
+- * NOTE: After cpu_exit_clear, per-cpu variables are no longer accessible,
+- * as they may be freed at any time. In this case, heap corruption or
+- * #PF can occur (when heap debugging is enabled). For example, even
+- * printk() can involve tasklet scheduling, which touches per-cpu vars.
++ * NOTE: After cpu_exit_clear, per-cpu variables may no longer accessible,
++ * as they may be freed at any time if offline CPUs don't get parked. In
++ * this case, heap corruption or #PF can occur (when heap debugging is
++ * enabled). For example, even printk() can involve tasklet scheduling,
++ * which touches per-cpu vars.
+ *
+ * Consider very carefully when adding code to *dead_idle. Most hypervisor
+ * subsystems are unsafe to call.
+diff --git a/xen/arch/x86/genapic/x2apic.c b/xen/arch/x86/genapic/x2apic.c
+index 4779b0d0d5..d997806272 100644
+--- a/xen/arch/x86/genapic/x2apic.c
++++ b/xen/arch/x86/genapic/x2apic.c
+@@ -201,18 +201,21 @@ static int update_clusterinfo(
+ if ( !cluster_cpus_spare )
+ cluster_cpus_spare = xzalloc(cpumask_t);
+ if ( !cluster_cpus_spare ||
+- !alloc_cpumask_var(&per_cpu(scratch_mask, cpu)) )
++ !cond_alloc_cpumask_var(&per_cpu(scratch_mask, cpu)) )
+ err = -ENOMEM;
+ break;
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
++ case CPU_REMOVE:
++ if ( park_offline_cpus == (action != CPU_REMOVE) )
++ break;
+ if ( per_cpu(cluster_cpus, cpu) )
+ {
+ cpumask_clear_cpu(cpu, per_cpu(cluster_cpus, cpu));
+ if ( cpumask_empty(per_cpu(cluster_cpus, cpu)) )
+- xfree(per_cpu(cluster_cpus, cpu));
++ XFREE(per_cpu(cluster_cpus, cpu));
+ }
+- free_cpumask_var(per_cpu(scratch_mask, cpu));
++ FREE_CPUMASK_VAR(per_cpu(scratch_mask, cpu));
+ break;
+ }
+
+diff --git a/xen/arch/x86/percpu.c b/xen/arch/x86/percpu.c
+index c9997b7937..8be4ebddf4 100644
+--- a/xen/arch/x86/percpu.c
++++ b/xen/arch/x86/percpu.c
+@@ -28,7 +28,7 @@ static int init_percpu_area(unsigned int cpu)
+ char *p;
+
+ if ( __per_cpu_offset[cpu] != INVALID_PERCPU_AREA )
+- return -EBUSY;
++ return 0;
+
+ if ( (p = alloc_xenheap_pages(PERCPU_ORDER, 0)) == NULL )
+ return -ENOMEM;
+@@ -76,9 +76,12 @@ static int cpu_percpu_callback(
+ break;
+ case CPU_UP_CANCELED:
+ case CPU_DEAD:
+- free_percpu_area(cpu);
++ if ( !park_offline_cpus )
++ free_percpu_area(cpu);
+ break;
+- default:
++ case CPU_REMOVE:
++ if ( park_offline_cpus )
++ free_percpu_area(cpu);
+ break;
+ }
+
+diff --git a/xen/arch/x86/smpboot.c b/xen/arch/x86/smpboot.c
+index 78ba73578a..7e76cc3d68 100644
+--- a/xen/arch/x86/smpboot.c
++++ b/xen/arch/x86/smpboot.c
+@@ -63,6 +63,8 @@ static cpumask_t scratch_cpu0mask;
+ cpumask_t cpu_online_map __read_mostly;
+ EXPORT_SYMBOL(cpu_online_map);
+
++bool __read_mostly park_offline_cpus;
++
+ unsigned int __read_mostly nr_sockets;
+ cpumask_t **__read_mostly socket_cpumask;
+ static cpumask_t *secondary_socket_cpumask;
+@@ -895,7 +897,14 @@ static void cleanup_cpu_root_pgt(unsigned int cpu)
+ }
+ }
+
+-static void cpu_smpboot_free(unsigned int cpu)
++/*
++ * The 'remove' boolean controls whether a CPU is just getting offlined (and
++ * parked), or outright removed / offlined without parking. Parked CPUs need
++ * things like their stack, GDT, IDT, TSS, and per-CPU data still available.
++ * A few other items, in particular CPU masks, are also retained, as it's
++ * difficult to prove that they're entirely unreferenced from parked CPUs.
++ */
++static void cpu_smpboot_free(unsigned int cpu, bool remove)
+ {
+ unsigned int order, socket = cpu_to_socket(cpu);
+ struct cpuinfo_x86 *c = cpu_data;
+@@ -906,15 +915,19 @@ static void cpu_smpboot_free(unsigned int cpu)
+ socket_cpumask[socket] = NULL;
+ }
+
+- c[cpu].phys_proc_id = XEN_INVALID_SOCKET_ID;
+- c[cpu].cpu_core_id = XEN_INVALID_CORE_ID;
+- c[cpu].compute_unit_id = INVALID_CUID;
+ cpumask_clear_cpu(cpu, &cpu_sibling_setup_map);
+
+- free_cpumask_var(per_cpu(cpu_sibling_mask, cpu));
+- free_cpumask_var(per_cpu(cpu_core_mask, cpu));
+- if ( per_cpu(scratch_cpumask, cpu) != &scratch_cpu0mask )
+- free_cpumask_var(per_cpu(scratch_cpumask, cpu));
++ if ( remove )
++ {
++ c[cpu].phys_proc_id = XEN_INVALID_SOCKET_ID;
++ c[cpu].cpu_core_id = XEN_INVALID_CORE_ID;
++ c[cpu].compute_unit_id = INVALID_CUID;
++
*** DIFF OUTPUT TRUNCATED AT 1000 LINES ***
More information about the svn-ports-all
mailing list