diff options
author | Tomáš Mózes <tomas.mozes@gmail.com> | 2024-08-01 15:02:58 +0200 |
---|---|---|
committer | Tomáš Mózes <tomas.mozes@gmail.com> | 2024-08-01 15:02:58 +0200 |
commit | 212febf72900c12405591dcc5902d4cfa11173bf (patch) | |
tree | 7a093fae6f723d02b6c4a573669615024fe65e4d | |
parent | Xen 4.17.4-pre-patchset-1 (diff) | |
download | xen-upstream-patches-212febf72900c12405591dcc5902d4cfa11173bf.tar.gz xen-upstream-patches-212febf72900c12405591dcc5902d4cfa11173bf.tar.bz2 xen-upstream-patches-212febf72900c12405591dcc5902d4cfa11173bf.zip |
Xen 4.18.3-pre-patchset-04.18.3-pre-patchset-0
Signed-off-by: Tomáš Mózes <tomas.mozes@gmail.com>
123 files changed, 4574 insertions, 7274 deletions
diff --git a/0001-x86-entry-Fix-build-with-older-toolchains.patch b/0001-x86-entry-Fix-build-with-older-toolchains.patch new file mode 100644 index 0000000..ad6e76a --- /dev/null +++ b/0001-x86-entry-Fix-build-with-older-toolchains.patch @@ -0,0 +1,32 @@ +From 2d38302c33b117aa9a417056db241aefc840c2f0 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Tue, 9 Apr 2024 21:39:51 +0100 +Subject: [PATCH 01/56] x86/entry: Fix build with older toolchains + +Binutils older than 2.29 doesn't know INCSSPD. + +Fixes: 8e186f98ce0e ("x86: Use indirect calls in reset-stack infrastructure") +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> +(cherry picked from commit a9fa82500818a8d8ce5f2843f1577bd2c29d088e) +--- + xen/arch/x86/x86_64/entry.S | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index ad7dd3b23b..054fcb225f 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -643,7 +643,9 @@ ENTRY(continue_pv_domain) + * JMPed to. Drop the return address. + */ + add $8, %rsp ++#ifdef CONFIG_XEN_SHSTK + ALTERNATIVE "", "mov $2, %eax; incsspd %eax", X86_FEATURE_XEN_SHSTK ++#endif + + call check_wakeup_from_wait + ret_from_intr: +-- +2.45.2 + diff --git a/0002-altcall-fix-__alt_call_maybe_initdata-so-it-s-safe-f.patch b/0002-altcall-fix-__alt_call_maybe_initdata-so-it-s-safe-f.patch new file mode 100644 index 0000000..05ecd83 --- /dev/null +++ b/0002-altcall-fix-__alt_call_maybe_initdata-so-it-s-safe-f.patch @@ -0,0 +1,49 @@ +From 8bdcb0b98b53140102031ceca0611f22190227fd Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> +Date: Mon, 29 Apr 2024 09:35:21 +0200 +Subject: [PATCH 02/56] altcall: fix __alt_call_maybe_initdata so it's safe for + livepatch +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Setting alternative call variables as __init is not safe for use with +livepatch, as livepatches can rightfully introduce new alternative calls to +structures marked as __alt_call_maybe_initdata (possibly just indirectly due to +replacing existing functions that use those). Attempting to resolve those +alternative calls then results in page faults as the variable that holds the +function pointer address has been freed. + +When livepatch is supported use the __ro_after_init attribute instead of +__initdata for __alt_call_maybe_initdata. + +Fixes: f26bb285949b ('xen: Implement xen/alternative-call.h for use in common code') +Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> +Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> +master commit: af4cd0a6a61cdb03bc1afca9478b05b0c9703599 +master date: 2024-04-11 18:51:36 +0100 +--- + xen/include/xen/alternative-call.h | 7 ++++++- + 1 file changed, 6 insertions(+), 1 deletion(-) + +diff --git a/xen/include/xen/alternative-call.h b/xen/include/xen/alternative-call.h +index 5c6b9a562b..10f7d7637e 100644 +--- a/xen/include/xen/alternative-call.h ++++ b/xen/include/xen/alternative-call.h +@@ -50,7 +50,12 @@ + + #include <asm/alternative.h> + +-#define __alt_call_maybe_initdata __initdata ++#ifdef CONFIG_LIVEPATCH ++/* Must keep for livepatches to resolve alternative calls. */ ++# define __alt_call_maybe_initdata __ro_after_init ++#else ++# define __alt_call_maybe_initdata __initdata ++#endif + + #else + +-- +2.45.2 + diff --git a/0002-pci-fail-device-assignment-if-phantom-functions-cann.patch b/0002-pci-fail-device-assignment-if-phantom-functions-cann.patch deleted file mode 100644 index bafad55..0000000 --- a/0002-pci-fail-device-assignment-if-phantom-functions-cann.patch +++ /dev/null @@ -1,91 +0,0 @@ -From f9e1ed51bdba31017ea17e1819eb2ade6b5c8615 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 30 Jan 2024 14:37:39 +0100 -Subject: [PATCH 02/67] pci: fail device assignment if phantom functions cannot - be assigned -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The current behavior is that no error is reported if (some) phantom functions -fail to be assigned during device add or assignment, so the operation succeeds -even if some phantom functions are not correctly setup. - -This can lead to devices possibly being successfully assigned to a domU while -some of the device phantom functions are still assigned to dom0. Even when the -device is assigned domIO before being assigned to a domU phantom functions -might fail to be assigned to domIO, and also fail to be assigned to the domU, -leaving them assigned to dom0. - -Since the device can generate requests using the IDs of those phantom -functions, given the scenario above a device in such state would be in control -of a domU, but still capable of generating transactions that use a context ID -targeting dom0 owned memory. - -Modify device assign in order to attempt to deassign the device if phantom -functions failed to be assigned. - -Note that device addition is not modified in the same way, as in that case the -device is assigned to a trusted domain, and hence partial assign can lead to -device malfunction but not a security issue. - -This is XSA-449 / CVE-2023-46839 - -Fixes: 4e9950dc1bd2 ('IOMMU: add phantom function support') -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: cb4ecb3cc17b02c2814bc817efd05f3f3ba33d1e -master date: 2024-01-30 14:28:01 +0100 ---- - xen/drivers/passthrough/pci.c | 27 +++++++++++++++++++++------ - 1 file changed, 21 insertions(+), 6 deletions(-) - -diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c -index 07d1986d33..8c62b14d19 100644 ---- a/xen/drivers/passthrough/pci.c -+++ b/xen/drivers/passthrough/pci.c -@@ -1444,11 +1444,10 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) - - pdev->fault.count = 0; - -- if ( (rc = iommu_call(hd->platform_ops, assign_device, d, devfn, -- pci_to_dev(pdev), flag)) ) -- goto done; -+ rc = iommu_call(hd->platform_ops, assign_device, d, devfn, pci_to_dev(pdev), -+ flag); - -- for ( ; pdev->phantom_stride; rc = 0 ) -+ while ( pdev->phantom_stride && !rc ) - { - devfn += pdev->phantom_stride; - if ( PCI_SLOT(devfn) != PCI_SLOT(pdev->devfn) ) -@@ -1459,8 +1458,24 @@ static int assign_device(struct domain *d, u16 seg, u8 bus, u8 devfn, u32 flag) - - done: - if ( rc ) -- printk(XENLOG_G_WARNING "%pd: assign (%pp) failed (%d)\n", -- d, &PCI_SBDF(seg, bus, devfn), rc); -+ { -+ printk(XENLOG_G_WARNING "%pd: assign %s(%pp) failed (%d)\n", -+ d, devfn != pdev->devfn ? "phantom function " : "", -+ &PCI_SBDF(seg, bus, devfn), rc); -+ -+ if ( devfn != pdev->devfn && deassign_device(d, seg, bus, pdev->devfn) ) -+ { -+ /* -+ * Device with phantom functions that failed to both assign and -+ * rollback. Mark the device as broken and crash the target domain, -+ * as the state of the functions at this point is unknown and Xen -+ * has no way to assert consistent context assignment among them. -+ */ -+ pdev->broken = true; -+ if ( !is_hardware_domain(d) && d != dom_io ) -+ domain_crash(d); -+ } -+ } - /* The device is assigned to dom_io so mark it as quarantined */ - else if ( d == dom_io ) - pdev->quarantine = true; --- -2.44.0 - diff --git a/0003-VT-d-Fix-else-vs-endif-misplacement.patch b/0003-VT-d-Fix-else-vs-endif-misplacement.patch deleted file mode 100644 index 622fa18..0000000 --- a/0003-VT-d-Fix-else-vs-endif-misplacement.patch +++ /dev/null @@ -1,70 +0,0 @@ -From 6b1864afc14d484cdbc9754ce3172ac3dc189846 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 30 Jan 2024 14:38:38 +0100 -Subject: [PATCH 03/67] VT-d: Fix "else" vs "#endif" misplacement - -In domain_pgd_maddr() the "#endif" is misplaced with respect to "else". This -generates incorrect logic when CONFIG_HVM is compiled out, as the "else" body -is executed unconditionally. - -Rework the logic to use IS_ENABLED() instead of explicit #ifdef-ary, as it's -clearer to follow. This in turn involves adjusting p2m_get_pagetable() to -compile when CONFIG_HVM is disabled. - -This is XSA-450 / CVE-2023-46840. - -Fixes: 033ff90aa9c1 ("x86/P2M: p2m_{alloc,free}_ptp() and p2m_alloc_table() are HVM-only") -Reported-by: Teddy Astie <teddy.astie@vates.tech> -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: cc6ba68edf6dcd18c3865e7d7c0f1ed822796426 -master date: 2024-01-30 14:29:15 +0100 ---- - xen/arch/x86/include/asm/p2m.h | 9 ++++++++- - xen/drivers/passthrough/vtd/iommu.c | 4 +--- - 2 files changed, 9 insertions(+), 4 deletions(-) - -diff --git a/xen/arch/x86/include/asm/p2m.h b/xen/arch/x86/include/asm/p2m.h -index cd43d8621a..4f691533d5 100644 ---- a/xen/arch/x86/include/asm/p2m.h -+++ b/xen/arch/x86/include/asm/p2m.h -@@ -447,7 +447,14 @@ static inline bool_t p2m_is_altp2m(const struct p2m_domain *p2m) - return p2m->p2m_class == p2m_alternate; - } - --#define p2m_get_pagetable(p2m) ((p2m)->phys_table) -+#ifdef CONFIG_HVM -+static inline pagetable_t p2m_get_pagetable(const struct p2m_domain *p2m) -+{ -+ return p2m->phys_table; -+} -+#else -+pagetable_t p2m_get_pagetable(const struct p2m_domain *p2m); -+#endif - - /* - * Ensure any deferred p2m TLB flush has been completed on all VCPUs. -diff --git a/xen/drivers/passthrough/vtd/iommu.c b/xen/drivers/passthrough/vtd/iommu.c -index b4c11a6b48..908b3ba6ee 100644 ---- a/xen/drivers/passthrough/vtd/iommu.c -+++ b/xen/drivers/passthrough/vtd/iommu.c -@@ -441,15 +441,13 @@ static paddr_t domain_pgd_maddr(struct domain *d, paddr_t pgd_maddr, - - if ( pgd_maddr ) - /* nothing */; --#ifdef CONFIG_HVM -- else if ( iommu_use_hap_pt(d) ) -+ else if ( IS_ENABLED(CONFIG_HVM) && iommu_use_hap_pt(d) ) - { - pagetable_t pgt = p2m_get_pagetable(p2m_get_hostp2m(d)); - - pgd_maddr = pagetable_get_paddr(pgt); - } - else --#endif - { - if ( !hd->arch.vtd.pgd_maddr ) - { --- -2.44.0 - diff --git a/0003-x86-rtc-Avoid-UIP-flag-being-set-for-longer-than-exp.patch b/0003-x86-rtc-Avoid-UIP-flag-being-set-for-longer-than-exp.patch new file mode 100644 index 0000000..8307630 --- /dev/null +++ b/0003-x86-rtc-Avoid-UIP-flag-being-set-for-longer-than-exp.patch @@ -0,0 +1,57 @@ +From af0e9ba44a58c87d6d135d8ffbf468b4ceac0a41 Mon Sep 17 00:00:00 2001 +From: Ross Lagerwall <ross.lagerwall@citrix.com> +Date: Mon, 29 Apr 2024 09:36:04 +0200 +Subject: [PATCH 03/56] x86/rtc: Avoid UIP flag being set for longer than + expected + +In a test, OVMF reported an error initializing the RTC without +indicating the precise nature of the error. The only plausible +explanation I can find is as follows: + +As part of the initialization, OVMF reads register C and then reads +register A repatedly until the UIP flag is not set. If this takes longer +than 100 ms, OVMF fails and reports an error. This may happen with the +following sequence of events: + +At guest time=0s, rtc_init() calls check_update_timer() which schedules +update_timer for t=(1 - 244us). + +At t=1s, the update_timer function happens to have been called >= 244us +late. In the timer callback, it sets the UIP flag and schedules +update_timer2 for t=1s. + +Before update_timer2 runs, the guest reads register C which calls +check_update_timer(). check_update_timer() stops the scheduled +update_timer2 and since the guest time is now outside of the update +cycle, it schedules update_timer for t=(2 - 244us). + +The UIP flag will therefore be set for a whole second from t=1 to t=2 +while the guest repeatedly reads register A waiting for the UIP flag to +clear. Fix it by clearing the UIP flag when scheduling update_timer. + +I was able to reproduce this issue with a synthetic test and this +resolves the issue. + +Signed-off-by: Ross Lagerwall <ross.lagerwall@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +master commit: 43a07069863b419433dee12c9b58c1f7ce70aa97 +master date: 2024-04-23 14:09:18 +0200 +--- + xen/arch/x86/hvm/rtc.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xen/arch/x86/hvm/rtc.c b/xen/arch/x86/hvm/rtc.c +index 206b4296e9..4839374352 100644 +--- a/xen/arch/x86/hvm/rtc.c ++++ b/xen/arch/x86/hvm/rtc.c +@@ -202,6 +202,7 @@ static void check_update_timer(RTCState *s) + } + else + { ++ s->hw.cmos_data[RTC_REG_A] &= ~RTC_UIP; + next_update_time = (USEC_PER_SEC - guest_usec - 244) * NS_PER_USEC; + expire_time = NOW() + next_update_time; + s->next_update_time = expire_time; +-- +2.45.2 + diff --git a/0004-x86-MTRR-correct-inadvertently-inverted-WC-check.patch b/0004-x86-MTRR-correct-inadvertently-inverted-WC-check.patch new file mode 100644 index 0000000..ed7754d --- /dev/null +++ b/0004-x86-MTRR-correct-inadvertently-inverted-WC-check.patch @@ -0,0 +1,36 @@ +From eb7059767c82d833ebecdf8106e96482b04f3c40 Mon Sep 17 00:00:00 2001 +From: Jan Beulich <jbeulich@suse.com> +Date: Mon, 29 Apr 2024 09:36:37 +0200 +Subject: [PATCH 04/56] x86/MTRR: correct inadvertently inverted WC check +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The ! clearly got lost by mistake. + +Fixes: e9e0eb30d4d6 ("x86/MTRR: avoid several indirect calls") +Reported-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com> +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Acked-by: Roger Pau Monné <roger.pau@citrix.com> +master commit: 77e25f0e30ddd11e043e6fce84bf108ce7de5b6f +master date: 2024-04-23 14:13:48 +0200 +--- + xen/arch/x86/cpu/mtrr/main.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xen/arch/x86/cpu/mtrr/main.c b/xen/arch/x86/cpu/mtrr/main.c +index 55a4da54a7..90b235f57e 100644 +--- a/xen/arch/x86/cpu/mtrr/main.c ++++ b/xen/arch/x86/cpu/mtrr/main.c +@@ -316,7 +316,7 @@ int mtrr_add_page(unsigned long base, unsigned long size, + } + + /* If the type is WC, check that this processor supports it */ +- if ((type == X86_MT_WC) && mtrr_have_wrcomb()) { ++ if ((type == X86_MT_WC) && !mtrr_have_wrcomb()) { + printk(KERN_WARNING + "mtrr: your processor doesn't support write-combining\n"); + return -EOPNOTSUPP; +-- +2.45.2 + diff --git a/0004-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch b/0004-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch deleted file mode 100644 index fa90a46..0000000 --- a/0004-x86-amd-Extend-CPU-erratum-1474-fix-to-more-affected.patch +++ /dev/null @@ -1,123 +0,0 @@ -From abcc32f0634627fe21117a48bd10e792bfbdd6dc Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Fri, 2 Feb 2024 08:01:09 +0100 -Subject: [PATCH 04/67] x86/amd: Extend CPU erratum #1474 fix to more affected - models -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Erratum #1474 has now been extended to cover models from family 17h ranges -00-2Fh, so the errata now covers all the models released under Family -17h (Zen, Zen+ and Zen2). - -Additionally extend the workaround to Family 18h (Hygon), since it's based on -the Zen architecture and very likely affected. - -Rename all the zen2 related symbols to fam17, since the errata doesn't -exclusively affect Zen2 anymore. - -Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> -master commit: 23db507a01a4ec5259ec0ab43d296a41b1c326ba -master date: 2023-12-21 12:19:40 +0000 ---- - xen/arch/x86/cpu/amd.c | 27 ++++++++++++++------------- - 1 file changed, 14 insertions(+), 13 deletions(-) - -diff --git a/xen/arch/x86/cpu/amd.c b/xen/arch/x86/cpu/amd.c -index 29ae97e7c0..3d85e9797d 100644 ---- a/xen/arch/x86/cpu/amd.c -+++ b/xen/arch/x86/cpu/amd.c -@@ -54,7 +54,7 @@ bool __read_mostly amd_acpi_c1e_quirk; - bool __ro_after_init amd_legacy_ssbd; - bool __initdata amd_virt_spec_ctrl; - --static bool __read_mostly zen2_c6_disabled; -+static bool __read_mostly fam17_c6_disabled; - - static inline int rdmsr_amd_safe(unsigned int msr, unsigned int *lo, - unsigned int *hi) -@@ -951,24 +951,24 @@ void amd_check_zenbleed(void) - val & chickenbit ? "chickenbit" : "microcode"); - } - --static void cf_check zen2_disable_c6(void *arg) -+static void cf_check fam17_disable_c6(void *arg) - { - /* Disable C6 by clearing the CCR{0,1,2}_CC6EN bits. */ - const uint64_t mask = ~((1ul << 6) | (1ul << 14) | (1ul << 22)); - uint64_t val; - -- if (!zen2_c6_disabled) { -+ if (!fam17_c6_disabled) { - printk(XENLOG_WARNING - "Disabling C6 after 1000 days apparent uptime due to AMD errata 1474\n"); -- zen2_c6_disabled = true; -+ fam17_c6_disabled = true; - /* - * Prevent CPU hotplug so that started CPUs will either see -- * zen2_c6_disabled set, or will be handled by -+ * zen_c6_disabled set, or will be handled by - * smp_call_function(). - */ - while (!get_cpu_maps()) - process_pending_softirqs(); -- smp_call_function(zen2_disable_c6, NULL, 0); -+ smp_call_function(fam17_disable_c6, NULL, 0); - put_cpu_maps(); - } - -@@ -1273,8 +1273,8 @@ static void cf_check init_amd(struct cpuinfo_x86 *c) - amd_check_zenbleed(); - amd_check_erratum_1485(); - -- if (zen2_c6_disabled) -- zen2_disable_c6(NULL); -+ if (fam17_c6_disabled) -+ fam17_disable_c6(NULL); - - check_syscfg_dram_mod_en(); - -@@ -1286,7 +1286,7 @@ const struct cpu_dev amd_cpu_dev = { - .c_init = init_amd, - }; - --static int __init cf_check zen2_c6_errata_check(void) -+static int __init cf_check amd_check_erratum_1474(void) - { - /* - * Errata #1474: A Core May Hang After About 1044 Days -@@ -1294,7 +1294,8 @@ static int __init cf_check zen2_c6_errata_check(void) - */ - s_time_t delta; - -- if (cpu_has_hypervisor || boot_cpu_data.x86 != 0x17 || !is_zen2_uarch()) -+ if (cpu_has_hypervisor || -+ (boot_cpu_data.x86 != 0x17 && boot_cpu_data.x86 != 0x18)) - return 0; - - /* -@@ -1309,10 +1310,10 @@ static int __init cf_check zen2_c6_errata_check(void) - if (delta > 0) { - static struct timer errata_c6; - -- init_timer(&errata_c6, zen2_disable_c6, NULL, 0); -+ init_timer(&errata_c6, fam17_disable_c6, NULL, 0); - set_timer(&errata_c6, NOW() + delta); - } else -- zen2_disable_c6(NULL); -+ fam17_disable_c6(NULL); - - return 0; - } -@@ -1320,4 +1321,4 @@ static int __init cf_check zen2_c6_errata_check(void) - * Must be executed after early_time_init() for tsc_ticks2ns() to have been - * calibrated. That prevents us doing the check in init_amd(). - */ --presmp_initcall(zen2_c6_errata_check); -+presmp_initcall(amd_check_erratum_1474); --- -2.44.0 - diff --git a/0005-CirrusCI-drop-FreeBSD-12.patch b/0005-CirrusCI-drop-FreeBSD-12.patch deleted file mode 100644 index dac712b..0000000 --- a/0005-CirrusCI-drop-FreeBSD-12.patch +++ /dev/null @@ -1,39 +0,0 @@ -From 0ef1fb43ddd61b3c4c953e833e012ac21ad5ca0f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Fri, 2 Feb 2024 08:01:50 +0100 -Subject: [PATCH 05/67] CirrusCI: drop FreeBSD 12 -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Went EOL by the end of December 2023, and the pkg repos have been shut down. - -Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> -master commit: c2ce3466472e9c9eda79f5dc98eb701bc6fdba20 -master date: 2024-01-15 12:20:11 +0100 ---- - .cirrus.yml | 6 ------ - 1 file changed, 6 deletions(-) - -diff --git a/.cirrus.yml b/.cirrus.yml -index 7e0beb200d..63f3afb104 100644 ---- a/.cirrus.yml -+++ b/.cirrus.yml -@@ -14,12 +14,6 @@ freebsd_template: &FREEBSD_TEMPLATE - - ./configure --with-system-seabios=/usr/local/share/seabios/bios.bin - - gmake -j`sysctl -n hw.ncpu` clang=y - --task: -- name: 'FreeBSD 12' -- freebsd_instance: -- image_family: freebsd-12-4 -- << : *FREEBSD_TEMPLATE -- - task: - name: 'FreeBSD 13' - freebsd_instance: --- -2.44.0 - diff --git a/0005-x86-spec-fix-reporting-of-BHB-clearing-usage-from-gu.patch b/0005-x86-spec-fix-reporting-of-BHB-clearing-usage-from-gu.patch new file mode 100644 index 0000000..bad0428 --- /dev/null +++ b/0005-x86-spec-fix-reporting-of-BHB-clearing-usage-from-gu.patch @@ -0,0 +1,69 @@ +From 0b0c7dca70d64c35c86e5d503f67366ebe2b9138 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> +Date: Mon, 29 Apr 2024 09:37:04 +0200 +Subject: [PATCH 05/56] x86/spec: fix reporting of BHB clearing usage from + guest entry points +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Reporting whether the BHB clearing on entry is done for the different domains +types based on cpu_has_bhb_seq is unhelpful, as that variable signals whether +there's a BHB clearing sequence selected, but that alone doesn't imply that +such sequence is used from the PV and/or HVM entry points. + +Instead use opt_bhb_entry_{pv,hvm} which do signal whether BHB clearing is +performed on entry from PV/HVM. + +Fixes: 689ad48ce9cf ('x86/spec-ctrl: Wire up the Native-BHI software sequences') +Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> +master commit: 049ab0b2c9f1f5edb54b505fef0bc575787dafe9 +master date: 2024-04-25 16:35:56 +0200 +--- + xen/arch/x86/spec_ctrl.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index ba4349a024..8c67d6256a 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -634,7 +634,7 @@ static void __init print_details(enum ind_thunk thunk) + (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) || + boot_cpu_has(X86_FEATURE_SC_RSB_HVM) || + boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) || +- cpu_has_bhb_seq || amd_virt_spec_ctrl || ++ opt_bhb_entry_hvm || amd_virt_spec_ctrl || + opt_eager_fpu || opt_verw_hvm) ? "" : " None", + boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "", + (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) || +@@ -643,7 +643,7 @@ static void __init print_details(enum ind_thunk thunk) + opt_eager_fpu ? " EAGER_FPU" : "", + opt_verw_hvm ? " VERW" : "", + boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ? " IBPB-entry" : "", +- cpu_has_bhb_seq ? " BHB-entry" : ""); ++ opt_bhb_entry_hvm ? " BHB-entry" : ""); + + #endif + #ifdef CONFIG_PV +@@ -651,14 +651,14 @@ static void __init print_details(enum ind_thunk thunk) + (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || + boot_cpu_has(X86_FEATURE_SC_RSB_PV) || + boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) || +- cpu_has_bhb_seq || ++ opt_bhb_entry_pv || + opt_eager_fpu || opt_verw_pv) ? "" : " None", + boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "", + boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "", + opt_eager_fpu ? " EAGER_FPU" : "", + opt_verw_pv ? " VERW" : "", + boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ? " IBPB-entry" : "", +- cpu_has_bhb_seq ? " BHB-entry" : ""); ++ opt_bhb_entry_pv ? " BHB-entry" : ""); + + printk(" XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n", + opt_xpti_hwdom ? "enabled" : "disabled", +-- +2.45.2 + diff --git a/0006-x86-intel-ensure-Global-Performance-Counter-Control-.patch b/0006-x86-intel-ensure-Global-Performance-Counter-Control-.patch deleted file mode 100644 index ce07803..0000000 --- a/0006-x86-intel-ensure-Global-Performance-Counter-Control-.patch +++ /dev/null @@ -1,74 +0,0 @@ -From d0ad2cc5eac1b5d3cfd14204d377ce2384f52607 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Fri, 2 Feb 2024 08:02:20 +0100 -Subject: [PATCH 06/67] x86/intel: ensure Global Performance Counter Control is - setup correctly -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When Architectural Performance Monitoring is available, the PERF_GLOBAL_CTRL -MSR contains per-counter enable bits that is ANDed with the enable bit in the -counter EVNTSEL MSR in order for a PMC counter to be enabled. - -So far the watchdog code seems to have relied on the PERF_GLOBAL_CTRL enable -bits being set by default, but at least on some Intel Sapphire and Emerald -Rapids this is no longer the case, and Xen reports: - -Testing NMI watchdog on all CPUs: 0 40 stuck - -The first CPU on each package is started with PERF_GLOBAL_CTRL zeroed, so PMC0 -doesn't start counting when the enable bit in EVNTSEL0 is set, due to the -relevant enable bit in PERF_GLOBAL_CTRL not being set. - -Check and adjust PERF_GLOBAL_CTRL during CPU initialization so that all the -general-purpose PMCs are enabled. Doing so brings the state of the package-BSP -PERF_GLOBAL_CTRL in line with the rest of the CPUs on the system. - -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Acked-by: Jan Beulich <jbeulich@suse.com> -master commit: 6bdb965178bbb3fc50cd4418d4770a7789956e2c -master date: 2024-01-17 10:40:52 +0100 ---- - xen/arch/x86/cpu/intel.c | 23 ++++++++++++++++++++++- - 1 file changed, 22 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c -index b40ac696e6..96723b5d44 100644 ---- a/xen/arch/x86/cpu/intel.c -+++ b/xen/arch/x86/cpu/intel.c -@@ -528,9 +528,30 @@ static void cf_check init_intel(struct cpuinfo_x86 *c) - init_intel_cacheinfo(c); - if (c->cpuid_level > 9) { - unsigned eax = cpuid_eax(10); -+ unsigned int cnt = (eax >> 8) & 0xff; -+ - /* Check for version and the number of counters */ -- if ((eax & 0xff) && (((eax>>8) & 0xff) > 1)) -+ if ((eax & 0xff) && (cnt > 1) && (cnt <= 32)) { -+ uint64_t global_ctrl; -+ unsigned int cnt_mask = (1UL << cnt) - 1; -+ -+ /* -+ * On (some?) Sapphire/Emerald Rapids platforms each -+ * package-BSP starts with all the enable bits for the -+ * general-purpose PMCs cleared. Adjust so counters -+ * can be enabled from EVNTSEL. -+ */ -+ rdmsrl(MSR_CORE_PERF_GLOBAL_CTRL, global_ctrl); -+ if ((global_ctrl & cnt_mask) != cnt_mask) { -+ printk("CPU%u: invalid PERF_GLOBAL_CTRL: %#" -+ PRIx64 " adjusting to %#" PRIx64 "\n", -+ smp_processor_id(), global_ctrl, -+ global_ctrl | cnt_mask); -+ wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, -+ global_ctrl | cnt_mask); -+ } - __set_bit(X86_FEATURE_ARCH_PERFMON, c->x86_capability); -+ } - } - - if ( !cpu_has(c, X86_FEATURE_XTOPOLOGY) ) --- -2.44.0 - diff --git a/0006-x86-spec-adjust-logic-that-elides-lfence.patch b/0006-x86-spec-adjust-logic-that-elides-lfence.patch new file mode 100644 index 0000000..6da96c4 --- /dev/null +++ b/0006-x86-spec-adjust-logic-that-elides-lfence.patch @@ -0,0 +1,75 @@ +From f0ff1d9cb96041a84a24857a6464628240deed4f Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> +Date: Mon, 29 Apr 2024 09:37:29 +0200 +Subject: [PATCH 06/56] x86/spec: adjust logic that elides lfence +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +It's currently too restrictive by just checking whether there's a BHB clearing +sequence selected. It should instead check whether BHB clearing is used on +entry from PV or HVM specifically. + +Switch to use opt_bhb_entry_{pv,hvm} instead, and then remove cpu_has_bhb_seq +since it no longer has any users. + +Reported-by: Jan Beulich <jbeulich@suse.com> +Fixes: 954c983abcee ('x86/spec-ctrl: Software BHB-clearing sequences') +Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> +master commit: 656ae8f1091bcefec9c46ec3ea3ac2118742d4f6 +master date: 2024-04-25 16:37:01 +0200 +--- + xen/arch/x86/include/asm/cpufeature.h | 3 --- + xen/arch/x86/spec_ctrl.c | 6 +++--- + 2 files changed, 3 insertions(+), 6 deletions(-) + +diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h +index 7a312c485e..3c57f55de0 100644 +--- a/xen/arch/x86/include/asm/cpufeature.h ++++ b/xen/arch/x86/include/asm/cpufeature.h +@@ -228,9 +228,6 @@ static inline bool boot_cpu_has(unsigned int feat) + #define cpu_bug_fpu_ptrs boot_cpu_has(X86_BUG_FPU_PTRS) + #define cpu_bug_null_seg boot_cpu_has(X86_BUG_NULL_SEG) + +-#define cpu_has_bhb_seq (boot_cpu_has(X86_SPEC_BHB_TSX) || \ +- boot_cpu_has(X86_SPEC_BHB_LOOPS)) +- + enum _cache_type { + CACHE_TYPE_NULL = 0, + CACHE_TYPE_DATA = 1, +diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c +index 8c67d6256a..12c19b7eca 100644 +--- a/xen/arch/x86/spec_ctrl.c ++++ b/xen/arch/x86/spec_ctrl.c +@@ -2328,7 +2328,7 @@ void __init init_speculation_mitigations(void) + * unconditional WRMSR. If we do have it, or we're not using any + * prior conditional block, then it's safe to drop the LFENCE. + */ +- if ( !cpu_has_bhb_seq && ++ if ( !opt_bhb_entry_pv && + (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || + !boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV)) ) + setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_PV); +@@ -2344,7 +2344,7 @@ void __init init_speculation_mitigations(void) + * active in the block that is skipped when interrupting guest + * context, then it's safe to drop the LFENCE. + */ +- if ( !cpu_has_bhb_seq && ++ if ( !opt_bhb_entry_pv && + (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || + (!boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) && + !boot_cpu_has(X86_FEATURE_SC_RSB_PV))) ) +@@ -2356,7 +2356,7 @@ void __init init_speculation_mitigations(void) + * A BHB sequence, if used, is the only conditional action, so if we + * don't have it, we don't need the safety LFENCE. + */ +- if ( !cpu_has_bhb_seq ) ++ if ( !opt_bhb_entry_hvm ) + setup_force_cpu_cap(X86_SPEC_NO_LFENCE_ENTRY_VMX); + } + +-- +2.45.2 + diff --git a/0007-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch b/0007-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch deleted file mode 100644 index 2100acc..0000000 --- a/0007-x86-vmx-Fix-IRQ-handling-for-EXIT_REASON_INIT.patch +++ /dev/null @@ -1,65 +0,0 @@ -From eca5416f9b0e179de9553900de8de660ab09199d Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Fri, 2 Feb 2024 08:02:51 +0100 -Subject: [PATCH 07/67] x86/vmx: Fix IRQ handling for EXIT_REASON_INIT - -When receiving an INIT, a prior bugfix tried to ignore the INIT and continue -onwards. - -Unfortunately it's not safe to return at that point in vmx_vmexit_handler(). -Just out of context in the first hunk is a local_irqs_enabled() which is -depended-upon by the return-to-guest path, causing the following checklock -failure in debug builds: - - (XEN) Error: INIT received - ignoring - (XEN) CHECKLOCK FAILURE: prev irqsafe: 0, curr irqsafe 1 - (XEN) Xen BUG at common/spinlock.c:132 - (XEN) ----[ Xen-4.19-unstable x86_64 debug=y Tainted: H ]---- - ... - (XEN) Xen call trace: - (XEN) [<ffff82d040238e10>] R check_lock+0xcd/0xe1 - (XEN) [<ffff82d040238fe3>] F _spin_lock+0x1b/0x60 - (XEN) [<ffff82d0402ed6a8>] F pt_update_irq+0x32/0x3bb - (XEN) [<ffff82d0402b9632>] F vmx_intr_assist+0x3b/0x51d - (XEN) [<ffff82d040206447>] F vmx_asm_vmexit_handler+0xf7/0x210 - -Luckily, this is benign in release builds. Accidentally having IRQs disabled -when trying to take an IRQs-on lock isn't a deadlock-vulnerable pattern. - -Drop the problematic early return. In hindsight, it's wrong to skip other -normal VMExit steps. - -Fixes: b1f11273d5a7 ("x86/vmx: Don't spuriously crash the domain when INIT is received") -Reported-by: Reima ISHII <ishiir@g.ecc.u-tokyo.ac.jp> -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: d1f8883aebe00f6a9632d77ab0cd5c6d02c9cbe4 -master date: 2024-01-18 20:59:06 +0000 ---- - xen/arch/x86/hvm/vmx/vmx.c | 3 ++- - 1 file changed, 2 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c -index 072288a5ef..31f4a861c6 100644 ---- a/xen/arch/x86/hvm/vmx/vmx.c -+++ b/xen/arch/x86/hvm/vmx/vmx.c -@@ -4037,7 +4037,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs) - - case EXIT_REASON_INIT: - printk(XENLOG_ERR "Error: INIT received - ignoring\n"); -- return; /* Renter the guest without further processing */ -+ break; - } - - /* Now enable interrupts so it's safe to take locks. */ -@@ -4323,6 +4323,7 @@ void vmx_vmexit_handler(struct cpu_user_regs *regs) - break; - } - case EXIT_REASON_EXTERNAL_INTERRUPT: -+ case EXIT_REASON_INIT: - /* Already handled above. */ - break; - case EXIT_REASON_TRIPLE_FAULT: --- -2.44.0 - diff --git a/0007-xen-xsm-Wire-up-get_dom0_console.patch b/0007-xen-xsm-Wire-up-get_dom0_console.patch new file mode 100644 index 0000000..540541c --- /dev/null +++ b/0007-xen-xsm-Wire-up-get_dom0_console.patch @@ -0,0 +1,66 @@ +From 026542c8577ab6af7c1dbc7446547bdc2bc705fd Mon Sep 17 00:00:00 2001 +From: Jason Andryuk <jason.andryuk@amd.com> +Date: Tue, 21 May 2024 10:19:43 +0200 +Subject: [PATCH 07/56] xen/xsm: Wire up get_dom0_console + +An XSM hook for get_dom0_console is currently missing. Using XSM with +a PVH dom0 shows: +(XEN) FLASK: Denying unknown platform_op: 64. + +Wire up the hook, and allow it for dom0. + +Fixes: 4dd160583c ("x86/platform: introduce hypercall to get initial video console settings") +Signed-off-by: Jason Andryuk <jason.andryuk@amd.com> +Acked-by: Daniel P. Smith <dpsmith@apertussolutions.com> +master commit: 647f7e50ebeeb8152974cad6a12affe474c74513 +master date: 2024-04-30 08:33:41 +0200 +--- + tools/flask/policy/modules/dom0.te | 2 +- + xen/xsm/flask/hooks.c | 4 ++++ + xen/xsm/flask/policy/access_vectors | 2 ++ + 3 files changed, 7 insertions(+), 1 deletion(-) + +diff --git a/tools/flask/policy/modules/dom0.te b/tools/flask/policy/modules/dom0.te +index f1dcff48e2..16b8c9646d 100644 +--- a/tools/flask/policy/modules/dom0.te ++++ b/tools/flask/policy/modules/dom0.te +@@ -16,7 +16,7 @@ allow dom0_t xen_t:xen { + allow dom0_t xen_t:xen2 { + resource_op psr_cmt_op psr_alloc pmu_ctrl get_symbol + get_cpu_levelling_caps get_cpu_featureset livepatch_op +- coverage_op ++ coverage_op get_dom0_console + }; + + # Allow dom0 to use all XENVER_ subops that have checks. +diff --git a/xen/xsm/flask/hooks.c b/xen/xsm/flask/hooks.c +index 78225f68c1..5e88c71b8e 100644 +--- a/xen/xsm/flask/hooks.c ++++ b/xen/xsm/flask/hooks.c +@@ -1558,6 +1558,10 @@ static int cf_check flask_platform_op(uint32_t op) + return avc_has_perm(domain_sid(current->domain), SECINITSID_XEN, + SECCLASS_XEN2, XEN2__GET_SYMBOL, NULL); + ++ case XENPF_get_dom0_console: ++ return avc_has_perm(domain_sid(current->domain), SECINITSID_XEN, ++ SECCLASS_XEN2, XEN2__GET_DOM0_CONSOLE, NULL); ++ + default: + return avc_unknown_permission("platform_op", op); + } +diff --git a/xen/xsm/flask/policy/access_vectors b/xen/xsm/flask/policy/access_vectors +index 4e6710a63e..a35e3d4c51 100644 +--- a/xen/xsm/flask/policy/access_vectors ++++ b/xen/xsm/flask/policy/access_vectors +@@ -99,6 +99,8 @@ class xen2 + livepatch_op + # XEN_SYSCTL_coverage_op + coverage_op ++# XENPF_get_dom0_console ++ get_dom0_console + } + + # Classes domain and domain2 consist of operations that a domain performs on +-- +2.45.2 + diff --git a/0008-x86-vmx-Disallow-the-use-of-inactivity-states.patch b/0008-x86-vmx-Disallow-the-use-of-inactivity-states.patch deleted file mode 100644 index 3af45e8..0000000 --- a/0008-x86-vmx-Disallow-the-use-of-inactivity-states.patch +++ /dev/null @@ -1,126 +0,0 @@ -From 7bd612727df792671e44152a8205f0cf821ad984 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Fri, 2 Feb 2024 08:03:26 +0100 -Subject: [PATCH 08/67] x86/vmx: Disallow the use of inactivity states - -Right now, vvmx will blindly copy L12's ACTIVITY_STATE into the L02 VMCS and -enter the vCPU. Luckily for us, nested-virt is explicitly unsupported for -security bugs. - -The inactivity states are HLT, SHUTDOWN and WAIT-FOR-SIPI, and as noted by the -SDM in Vol3 27.7 "Special Features of VM Entry": - - If VM entry ends with the logical processor in an inactive activity state, - the VM entry generates any special bus cycle that is normally generated when - that activity state is entered from the active state. - -Also, - - Some activity states unconditionally block certain events. - -I.e. A VMEntry with ACTIVITY=SHUTDOWN will initiate a platform reset, while a -VMEntry with ACTIVITY=WAIT-FOR-SIPI will really block everything other than -SIPIs. - -Both of these activity states are for the TXT ACM to use, not for regular -hypervisors, and Xen doesn't support dropping the HLT intercept either. - -There are two paths in Xen which operate on ACTIVITY_STATE. - -1) The vmx_{get,set}_nonreg_state() helpers for VM-Fork. - - As regular VMs can't use any inactivity states, this is just duplicating - the 0 from construct_vmcs(). Retain the ability to query activity_state, - but crash the domain on any attempt to set an inactivity state. - -2) Nested virt, because of ACTIVITY_STATE in vmcs_gstate_field[]. - - Explicitly hide the inactivity states in the guest's view of MSR_VMX_MISC, - and remove ACTIVITY_STATE from vmcs_gstate_field[]. - - In virtual_vmentry(), we should trigger a VMEntry failure for the use of - any inactivity states, but there's no support for that in the code at all - so leave a TODO for when we finally start working on nested-virt in - earnest. - -Reported-by: Reima Ishii <ishiir@g.ecc.u-tokyo.ac.jp> -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Tamas K Lengyel <tamas@tklengyel.com> -master commit: 3643bb53a05b7c8fbac072c63bef1538f2a6d0d2 -master date: 2024-01-18 20:59:06 +0000 ---- - xen/arch/x86/hvm/vmx/vmx.c | 8 +++++++- - xen/arch/x86/hvm/vmx/vvmx.c | 9 +++++++-- - xen/arch/x86/include/asm/hvm/vmx/vmcs.h | 1 + - 3 files changed, 15 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/hvm/vmx/vmx.c b/xen/arch/x86/hvm/vmx/vmx.c -index 31f4a861c6..35d391d8e5 100644 ---- a/xen/arch/x86/hvm/vmx/vmx.c -+++ b/xen/arch/x86/hvm/vmx/vmx.c -@@ -1499,7 +1499,13 @@ static void cf_check vmx_set_nonreg_state(struct vcpu *v, - { - vmx_vmcs_enter(v); - -- __vmwrite(GUEST_ACTIVITY_STATE, nrs->vmx.activity_state); -+ if ( nrs->vmx.activity_state ) -+ { -+ printk("Attempt to set %pv activity_state %#lx\n", -+ v, nrs->vmx.activity_state); -+ domain_crash(v->domain); -+ } -+ - __vmwrite(GUEST_INTERRUPTIBILITY_INFO, nrs->vmx.interruptibility_info); - __vmwrite(GUEST_PENDING_DBG_EXCEPTIONS, nrs->vmx.pending_dbg); - -diff --git a/xen/arch/x86/hvm/vmx/vvmx.c b/xen/arch/x86/hvm/vmx/vvmx.c -index f8fe8d0c14..515cb5ae77 100644 ---- a/xen/arch/x86/hvm/vmx/vvmx.c -+++ b/xen/arch/x86/hvm/vmx/vvmx.c -@@ -910,7 +910,10 @@ static const u16 vmcs_gstate_field[] = { - GUEST_LDTR_AR_BYTES, - GUEST_TR_AR_BYTES, - GUEST_INTERRUPTIBILITY_INFO, -+ /* -+ * ACTIVITY_STATE is handled specially. - GUEST_ACTIVITY_STATE, -+ */ - GUEST_SYSENTER_CS, - GUEST_PREEMPTION_TIMER, - /* natural */ -@@ -1211,6 +1214,8 @@ static void virtual_vmentry(struct cpu_user_regs *regs) - nvcpu->nv_vmentry_pending = 0; - nvcpu->nv_vmswitch_in_progress = 1; - -+ /* TODO: Fail VMentry for GUEST_ACTIVITY_STATE != 0 */ -+ - /* - * EFER handling: - * hvm_set_efer won't work if CR0.PG = 1, so we change the value -@@ -2327,8 +2332,8 @@ int nvmx_msr_read_intercept(unsigned int msr, u64 *msr_content) - data = hvm_cr4_guest_valid_bits(d); - break; - case MSR_IA32_VMX_MISC: -- /* Do not support CR3-target feature now */ -- data = host_data & ~VMX_MISC_CR3_TARGET; -+ /* Do not support CR3-targets or activity states. */ -+ data = host_data & ~(VMX_MISC_CR3_TARGET | VMX_MISC_ACTIVITY_MASK); - break; - case MSR_IA32_VMX_EPT_VPID_CAP: - data = nept_get_ept_vpid_cap(); -diff --git a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h -index 78404e42b3..0af021d5f5 100644 ---- a/xen/arch/x86/include/asm/hvm/vmx/vmcs.h -+++ b/xen/arch/x86/include/asm/hvm/vmx/vmcs.h -@@ -288,6 +288,7 @@ extern u32 vmx_secondary_exec_control; - #define VMX_VPID_INVVPID_SINGLE_CONTEXT_RETAINING_GLOBAL 0x80000000000ULL - extern u64 vmx_ept_vpid_cap; - -+#define VMX_MISC_ACTIVITY_MASK 0x000001c0 - #define VMX_MISC_PROC_TRACE 0x00004000 - #define VMX_MISC_CR3_TARGET 0x01ff0000 - #define VMX_MISC_VMWRITE_ALL 0x20000000 --- -2.44.0 - diff --git a/0008-xen-x86-Fix-Syntax-warning-in-gen-cpuid.py.patch b/0008-xen-x86-Fix-Syntax-warning-in-gen-cpuid.py.patch new file mode 100644 index 0000000..7c04f23 --- /dev/null +++ b/0008-xen-x86-Fix-Syntax-warning-in-gen-cpuid.py.patch @@ -0,0 +1,41 @@ +From 47cf06c09a2fa1ee92ea3e7718c8f8e0f1450d88 Mon Sep 17 00:00:00 2001 +From: Jason Andryuk <jason.andryuk@amd.com> +Date: Tue, 21 May 2024 10:20:06 +0200 +Subject: [PATCH 08/56] xen/x86: Fix Syntax warning in gen-cpuid.py + +Python 3.12.2 warns: + +xen/tools/gen-cpuid.py:50: SyntaxWarning: invalid escape sequence '\s' + "\s+([\s\d]+\*[\s\d]+\+[\s\d]+)\)" +xen/tools/gen-cpuid.py:51: SyntaxWarning: invalid escape sequence '\s' + "\s+/\*([\w!]*) .*$") + +Specify the strings as raw strings so '\s' is read as literal '\' + 's'. +This avoids escaping all the '\'s in the strings. + +Signed-off-by: Jason Andryuk <jason.andryuk@amd.com> +Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> +master commit: 08e79bba73d74a85d3ce6ff0f91c5205f1e05eda +master date: 2024-04-30 08:34:37 +0200 +--- + xen/tools/gen-cpuid.py | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/xen/tools/gen-cpuid.py b/xen/tools/gen-cpuid.py +index 02dd45a5ed..415d644db5 100755 +--- a/xen/tools/gen-cpuid.py ++++ b/xen/tools/gen-cpuid.py +@@ -47,8 +47,8 @@ def parse_definitions(state): + """ + feat_regex = re.compile( + r"^XEN_CPUFEATURE\(([A-Z0-9_]+)," +- "\s+([\s\d]+\*[\s\d]+\+[\s\d]+)\)" +- "\s+/\*([\w!]*) .*$") ++ r"\s+([\s\d]+\*[\s\d]+\+[\s\d]+)\)" ++ r"\s+/\*([\w!]*) .*$") + + word_regex = re.compile( + r"^/\* .* word (\d*) \*/$") +-- +2.45.2 + diff --git a/0009-VT-d-correct-ATS-checking-for-root-complex-integrate.patch b/0009-VT-d-correct-ATS-checking-for-root-complex-integrate.patch new file mode 100644 index 0000000..2d2dc91 --- /dev/null +++ b/0009-VT-d-correct-ATS-checking-for-root-complex-integrate.patch @@ -0,0 +1,63 @@ +From a4c5bbb9db07b27e66f7c47676b1c888e1bece20 Mon Sep 17 00:00:00 2001 +From: Jan Beulich <jbeulich@suse.com> +Date: Tue, 21 May 2024 10:20:58 +0200 +Subject: [PATCH 09/56] VT-d: correct ATS checking for root complex integrated + devices +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Spec version 4.1 says + +"The ATSR structures identifies PCI Express Root-Ports supporting + Address Translation Services (ATS) transactions. Software must enable + ATS on endpoint devices behind a Root Port only if the Root Port is + reported as supporting ATS transactions." + +Clearly root complex integrated devices aren't "behind root ports", +matching my observation on a SapphireRapids system having an ATS- +capable root complex integrated device. Hence for such devices we +shouldn't try to locate a corresponding ATSR. + +Since both pci_find_ext_capability() and pci_find_cap_offset() return +"unsigned int", change "pos" to that type at the same time. + +Fixes: 903b93211f56 ("[VTD] laying the ground work for ATS") +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Acked-by: Roger Pau Monné <roger.pau@citrix.com> +master commit: 04e31583bab97e5042a44a1d00fce2760272635f +master date: 2024-05-06 09:22:45 +0200 +--- + xen/drivers/passthrough/vtd/x86/ats.c | 9 +++++++-- + 1 file changed, 7 insertions(+), 2 deletions(-) + +diff --git a/xen/drivers/passthrough/vtd/x86/ats.c b/xen/drivers/passthrough/vtd/x86/ats.c +index 1f5913bed9..61052ef580 100644 +--- a/xen/drivers/passthrough/vtd/x86/ats.c ++++ b/xen/drivers/passthrough/vtd/x86/ats.c +@@ -44,7 +44,7 @@ struct acpi_drhd_unit *find_ats_dev_drhd(struct vtd_iommu *iommu) + int ats_device(const struct pci_dev *pdev, const struct acpi_drhd_unit *drhd) + { + struct acpi_drhd_unit *ats_drhd; +- int pos; ++ unsigned int pos, expfl = 0; + + if ( !ats_enabled || !iommu_qinval ) + return 0; +@@ -53,7 +53,12 @@ int ats_device(const struct pci_dev *pdev, const struct acpi_drhd_unit *drhd) + !ecap_dev_iotlb(drhd->iommu->ecap) ) + return 0; + +- if ( !acpi_find_matched_atsr_unit(pdev) ) ++ pos = pci_find_cap_offset(pdev->sbdf, PCI_CAP_ID_EXP); ++ if ( pos ) ++ expfl = pci_conf_read16(pdev->sbdf, pos + PCI_EXP_FLAGS); ++ ++ if ( MASK_EXTR(expfl, PCI_EXP_FLAGS_TYPE) != PCI_EXP_TYPE_RC_END && ++ !acpi_find_matched_atsr_unit(pdev) ) + return 0; + + ats_drhd = find_ats_dev_drhd(drhd->iommu); +-- +2.45.2 + diff --git a/0009-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch b/0009-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch deleted file mode 100644 index f33d27d..0000000 --- a/0009-lib-fdt-elf-move-lib-fdt-elf-temp.o-and-their-deps-t.patch +++ /dev/null @@ -1,70 +0,0 @@ -From afb85cf1e8f165abf88de9d8a6df625692a753b1 Mon Sep 17 00:00:00 2001 -From: Michal Orzel <michal.orzel@amd.com> -Date: Fri, 2 Feb 2024 08:04:07 +0100 -Subject: [PATCH 09/67] lib{fdt,elf}: move lib{fdt,elf}-temp.o and their deps - to $(targets) - -At the moment, trying to run xencov read/reset (calling SYSCTL_coverage_op -under the hood) results in a crash. This is due to a profiler trying to -access data in the .init.* sections (libfdt for Arm and libelf for x86) -that are stripped after boot. Normally, the build system compiles any -*.init.o file without COV_FLAGS. However, these two libraries are -handled differently as sections will be renamed to init after linking. - -To override COV_FLAGS to empty for these libraries, lib{fdt,elf}.o were -added to nocov-y. This worked until e321576f4047 ("xen/build: start using -if_changed") that added lib{fdt,elf}-temp.o and their deps to extra-y. -This way, even though these objects appear as prerequisites of -lib{fdt,elf}.o and the settings should propagate to them, make can also -build them as a prerequisite of __build, in which case COV_FLAGS would -still have the unwanted flags. Fix it by switching to $(targets) instead. - -Also, for libfdt, append libfdt.o to nocov-y only if CONFIG_OVERLAY_DTB -is not set. Otherwise, there is no section renaming and we should be able -to run the coverage. - -Fixes: e321576f4047 ("xen/build: start using if_changed") -Signed-off-by: Michal Orzel <michal.orzel@amd.com> -Reviewed-by: Anthony PERARD <anthony.perard@citrix.com> -Acked-by: Jan Beulich <jbeulich@suse.com> -master commit: 79519fcfa0605bbf19d8c02b979af3a2c8afed68 -master date: 2024-01-23 12:02:44 +0100 ---- - xen/common/libelf/Makefile | 2 +- - xen/common/libfdt/Makefile | 4 ++-- - 2 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/xen/common/libelf/Makefile b/xen/common/libelf/Makefile -index 8a4522e4e1..917d12b006 100644 ---- a/xen/common/libelf/Makefile -+++ b/xen/common/libelf/Makefile -@@ -13,4 +13,4 @@ $(obj)/libelf.o: $(obj)/libelf-temp.o FORCE - $(obj)/libelf-temp.o: $(addprefix $(obj)/,$(libelf-objs)) FORCE - $(call if_changed,ld) - --extra-y += libelf-temp.o $(libelf-objs) -+targets += libelf-temp.o $(libelf-objs) -diff --git a/xen/common/libfdt/Makefile b/xen/common/libfdt/Makefile -index 75aaefa2e3..4d14fd61ba 100644 ---- a/xen/common/libfdt/Makefile -+++ b/xen/common/libfdt/Makefile -@@ -2,9 +2,9 @@ include $(src)/Makefile.libfdt - - SECTIONS := text data $(SPECIAL_DATA_SECTIONS) - OBJCOPYFLAGS := $(foreach s,$(SECTIONS),--rename-section .$(s)=.init.$(s)) -+nocov-y += libfdt.o - - obj-y += libfdt.o --nocov-y += libfdt.o - - CFLAGS-y += -I$(srctree)/include/xen/libfdt/ - -@@ -14,4 +14,4 @@ $(obj)/libfdt.o: $(obj)/libfdt-temp.o FORCE - $(obj)/libfdt-temp.o: $(addprefix $(obj)/,$(LIBFDT_OBJS)) FORCE - $(call if_changed,ld) - --extra-y += libfdt-temp.o $(LIBFDT_OBJS) -+targets += libfdt-temp.o $(LIBFDT_OBJS) --- -2.44.0 - diff --git a/0010-tools-libxs-Open-dev-xen-xenbus-fds-as-O_CLOEXEC.patch b/0010-tools-libxs-Open-dev-xen-xenbus-fds-as-O_CLOEXEC.patch new file mode 100644 index 0000000..9f9cdd7 --- /dev/null +++ b/0010-tools-libxs-Open-dev-xen-xenbus-fds-as-O_CLOEXEC.patch @@ -0,0 +1,47 @@ +From 2bc52041cacb33a301ebf939d69a021597941186 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Tue, 21 May 2024 10:21:47 +0200 +Subject: [PATCH 10/56] tools/libxs: Open /dev/xen/xenbus fds as O_CLOEXEC + +The header description for xs_open() goes as far as to suggest that the fd is +O_CLOEXEC, but it isn't actually. + +`xl devd` has been observed leaking /dev/xen/xenbus into children. + +Link: https://github.com/QubesOS/qubes-issues/issues/8292 +Reported-by: Demi Marie Obenour <demi@invisiblethingslab.com> +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Juergen Gross <jgross@suse.com> +master commit: f4f2f3402b2f4985d69ffc0d46f845d05fd0b60f +master date: 2024-05-07 15:18:36 +0100 +--- + tools/libs/store/xs.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/tools/libs/store/xs.c b/tools/libs/store/xs.c +index 140b9a2839..1498515073 100644 +--- a/tools/libs/store/xs.c ++++ b/tools/libs/store/xs.c +@@ -54,6 +54,10 @@ struct xs_stored_msg { + #include <dlfcn.h> + #endif + ++#ifndef O_CLOEXEC ++#define O_CLOEXEC 0 ++#endif ++ + struct xs_handle { + /* Communications channel to xenstore daemon. */ + int fd; +@@ -227,7 +231,7 @@ error: + static int get_dev(const char *connect_to) + { + /* We cannot open read-only because requests are writes */ +- return open(connect_to, O_RDWR); ++ return open(connect_to, O_RDWR | O_CLOEXEC); + } + + static int all_restrict_cb(Xentoolcore__Active_Handle *ah, domid_t domid) { +-- +2.45.2 + diff --git a/0010-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch b/0010-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch deleted file mode 100644 index 9b3b9a0..0000000 --- a/0010-x86-p2m-pt-fix-off-by-one-in-entry-check-assert.patch +++ /dev/null @@ -1,36 +0,0 @@ -From 091466ba55d1e2e75738f751818ace2e3ed08ccf Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Fri, 2 Feb 2024 08:04:33 +0100 -Subject: [PATCH 10/67] x86/p2m-pt: fix off by one in entry check assert -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The MMIO RO rangeset overlap check is bogus: the rangeset is inclusive so the -passed end mfn should be the last mfn to be mapped (not last + 1). - -Fixes: 6fa1755644d0 ('amd/npt/shadow: replace assert that prevents creating 2M/1G MMIO entries') -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: George Dunlap <george.dunlap@cloud.com> -master commit: 610775d0dd61c1bd2f4720c755986098e6a5bafd -master date: 2024-01-25 16:09:04 +0100 ---- - xen/arch/x86/mm/p2m-pt.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/arch/x86/mm/p2m-pt.c b/xen/arch/x86/mm/p2m-pt.c -index eaba2b0fb4..f02ebae372 100644 ---- a/xen/arch/x86/mm/p2m-pt.c -+++ b/xen/arch/x86/mm/p2m-pt.c -@@ -564,7 +564,7 @@ static void check_entry(mfn_t mfn, p2m_type_t new, p2m_type_t old, - if ( new == p2m_mmio_direct ) - ASSERT(!mfn_eq(mfn, INVALID_MFN) && - !rangeset_overlaps_range(mmio_ro_ranges, mfn_x(mfn), -- mfn_x(mfn) + (1ul << order))); -+ mfn_x(mfn) + (1UL << order) - 1)); - else if ( p2m_allows_invalid_mfn(new) || new == p2m_invalid || - new == p2m_mmio_dm ) - ASSERT(mfn_valid(mfn) || mfn_eq(mfn, INVALID_MFN)); --- -2.44.0 - diff --git a/0011-tools-xentop-fix-sorting-bug-for-some-columns.patch b/0011-tools-xentop-fix-sorting-bug-for-some-columns.patch deleted file mode 100644 index 6bf11d9..0000000 --- a/0011-tools-xentop-fix-sorting-bug-for-some-columns.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 61da71968ea44964fd1dd2e449b053c77eb83139 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Cyril=20R=C3=A9bert=20=28zithro=29?= <slack@rabbit.lu> -Date: Tue, 27 Feb 2024 14:06:53 +0100 -Subject: [PATCH 11/67] tools/xentop: fix sorting bug for some columns -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Sort doesn't work on columns VBD_OO, VBD_RD, VBD_WR and VBD_RSECT. -Fix by adjusting variables names in compare functions. -Bug fix only. No functional change. - -Fixes: 91c3e3dc91d6 ("tools/xentop: Display '-' when stats are not available.") -Signed-off-by: Cyril Rébert (zithro) <slack@rabbit.lu> -Reviewed-by: Anthony PERARD <anthony.perard@citrix.com> -master commit: 29f17d837421f13c0e0010802de1b2d51d2ded4a -master date: 2024-02-05 17:58:23 +0000 ---- - tools/xentop/xentop.c | 10 +++++----- - 1 file changed, 5 insertions(+), 5 deletions(-) - -diff --git a/tools/xentop/xentop.c b/tools/xentop/xentop.c -index 950e8935c4..545bd5e96d 100644 ---- a/tools/xentop/xentop.c -+++ b/tools/xentop/xentop.c -@@ -684,7 +684,7 @@ static int compare_vbd_oo(xenstat_domain *domain1, xenstat_domain *domain2) - unsigned long long dom1_vbd_oo = 0, dom2_vbd_oo = 0; - - tot_vbd_reqs(domain1, FIELD_VBD_OO, &dom1_vbd_oo); -- tot_vbd_reqs(domain1, FIELD_VBD_OO, &dom2_vbd_oo); -+ tot_vbd_reqs(domain2, FIELD_VBD_OO, &dom2_vbd_oo); - - return -compare(dom1_vbd_oo, dom2_vbd_oo); - } -@@ -711,9 +711,9 @@ static int compare_vbd_rd(xenstat_domain *domain1, xenstat_domain *domain2) - unsigned long long dom1_vbd_rd = 0, dom2_vbd_rd = 0; - - tot_vbd_reqs(domain1, FIELD_VBD_RD, &dom1_vbd_rd); -- tot_vbd_reqs(domain1, FIELD_VBD_RD, &dom2_vbd_rd); -+ tot_vbd_reqs(domain2, FIELD_VBD_RD, &dom2_vbd_rd); - -- return -compare(dom1_vbd_rd, dom1_vbd_rd); -+ return -compare(dom1_vbd_rd, dom2_vbd_rd); - } - - /* Prints number of total VBD READ requests statistic */ -@@ -738,7 +738,7 @@ static int compare_vbd_wr(xenstat_domain *domain1, xenstat_domain *domain2) - unsigned long long dom1_vbd_wr = 0, dom2_vbd_wr = 0; - - tot_vbd_reqs(domain1, FIELD_VBD_WR, &dom1_vbd_wr); -- tot_vbd_reqs(domain1, FIELD_VBD_WR, &dom2_vbd_wr); -+ tot_vbd_reqs(domain2, FIELD_VBD_WR, &dom2_vbd_wr); - - return -compare(dom1_vbd_wr, dom2_vbd_wr); - } -@@ -765,7 +765,7 @@ static int compare_vbd_rsect(xenstat_domain *domain1, xenstat_domain *domain2) - unsigned long long dom1_vbd_rsect = 0, dom2_vbd_rsect = 0; - - tot_vbd_reqs(domain1, FIELD_VBD_RSECT, &dom1_vbd_rsect); -- tot_vbd_reqs(domain1, FIELD_VBD_RSECT, &dom2_vbd_rsect); -+ tot_vbd_reqs(domain2, FIELD_VBD_RSECT, &dom2_vbd_rsect); - - return -compare(dom1_vbd_rsect, dom2_vbd_rsect); - } --- -2.44.0 - diff --git a/0011-x86-cpu-policy-Fix-migration-from-Ice-Lake-to-Cascad.patch b/0011-x86-cpu-policy-Fix-migration-from-Ice-Lake-to-Cascad.patch new file mode 100644 index 0000000..26eb3ec --- /dev/null +++ b/0011-x86-cpu-policy-Fix-migration-from-Ice-Lake-to-Cascad.patch @@ -0,0 +1,92 @@ +From 0673eae8e53de5007dba35149527579819428323 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Tue, 21 May 2024 10:22:08 +0200 +Subject: [PATCH 11/56] x86/cpu-policy: Fix migration from Ice Lake to Cascade + Lake +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Ever since Xen 4.14, there has been a latent bug with migration. + +While some toolstacks can level the features properly, they don't shink +feat.max_subleaf when all features have been dropped. This is because +we *still* have not completed the toolstack side work for full CPU Policy +objects. + +As a consequence, even when properly feature levelled, VMs can't migrate +"backwards" across hardware which reduces feat.max_subleaf. One such example +is Ice Lake (max_subleaf=2 for INTEL_PSFD) to Cascade Lake (max_subleaf=0). + +Extend the max policies feat.max_subleaf to the hightest number Xen knows +about, but leave the default policies matching the host. This will allow VMs +with a higher feat.max_subleaf than strictly necessary to migrate in. + +Eventually we'll manage to teach the toolstack how to avoid creating such VMs +in the first place, but there's still more work to do there. + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Acked-by: Roger Pau Monné <roger.pau@citrix.com> +master commit: a2330b51df267e20e66bbba6c5bf08f0570ed58b +master date: 2024-05-07 16:56:46 +0100 +--- + xen/arch/x86/cpu-policy.c | 22 ++++++++++++++++++++++ + 1 file changed, 22 insertions(+) + +diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c +index a822800f52..1aba6ed4ca 100644 +--- a/xen/arch/x86/cpu-policy.c ++++ b/xen/arch/x86/cpu-policy.c +@@ -603,6 +603,13 @@ static void __init calculate_pv_max_policy(void) + unsigned int i; + + *p = host_cpu_policy; ++ ++ /* ++ * Some VMs may have a larger-than-necessary feat max_subleaf. Allow them ++ * to migrate in. ++ */ ++ p->feat.max_subleaf = ARRAY_SIZE(p->feat.raw) - 1; ++ + x86_cpu_policy_to_featureset(p, fs); + + for ( i = 0; i < ARRAY_SIZE(fs); ++i ) +@@ -643,6 +650,10 @@ static void __init calculate_pv_def_policy(void) + unsigned int i; + + *p = pv_max_cpu_policy; ++ ++ /* Default to the same max_subleaf as the host. */ ++ p->feat.max_subleaf = host_cpu_policy.feat.max_subleaf; ++ + x86_cpu_policy_to_featureset(p, fs); + + for ( i = 0; i < ARRAY_SIZE(fs); ++i ) +@@ -679,6 +690,13 @@ static void __init calculate_hvm_max_policy(void) + const uint32_t *mask; + + *p = host_cpu_policy; ++ ++ /* ++ * Some VMs may have a larger-than-necessary feat max_subleaf. Allow them ++ * to migrate in. ++ */ ++ p->feat.max_subleaf = ARRAY_SIZE(p->feat.raw) - 1; ++ + x86_cpu_policy_to_featureset(p, fs); + + mask = hvm_hap_supported() ? +@@ -780,6 +798,10 @@ static void __init calculate_hvm_def_policy(void) + const uint32_t *mask; + + *p = hvm_max_cpu_policy; ++ ++ /* Default to the same max_subleaf as the host. */ ++ p->feat.max_subleaf = host_cpu_policy.feat.max_subleaf; ++ + x86_cpu_policy_to_featureset(p, fs); + + mask = hvm_hap_supported() ? +-- +2.45.2 + diff --git a/0012-amd-vi-fix-IVMD-memory-type-checks.patch b/0012-amd-vi-fix-IVMD-memory-type-checks.patch deleted file mode 100644 index f38e39e..0000000 --- a/0012-amd-vi-fix-IVMD-memory-type-checks.patch +++ /dev/null @@ -1,53 +0,0 @@ -From 463aaf3fbf62d24e898ae0c2ba53d85ca0f94d3f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 27 Feb 2024 14:07:12 +0100 -Subject: [PATCH 12/67] amd-vi: fix IVMD memory type checks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The current code that parses the IVMD blocks is relaxed with regard to the -restriction that such unity regions should always fall into memory ranges -marked as reserved in the memory map. - -However the type checks for the IVMD addresses are inverted, and as a result -IVMD ranges falling into RAM areas are accepted. Note that having such ranges -in the first place is a firmware bug, as IVMD should always fall into reserved -ranges. - -Fixes: ed6c77ebf0c1 ('AMD/IOMMU: check / convert IVMD ranges for being / to be reserved') -Reported-by: Ox <oxjo@proton.me> -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Tested-by: oxjo <oxjo@proton.me> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: 83afa313583019d9f159c122cecf867735d27ec5 -master date: 2024-02-06 11:56:13 +0100 ---- - xen/drivers/passthrough/amd/iommu_acpi.c | 11 ++++++++--- - 1 file changed, 8 insertions(+), 3 deletions(-) - -diff --git a/xen/drivers/passthrough/amd/iommu_acpi.c b/xen/drivers/passthrough/amd/iommu_acpi.c -index 3b577c9b39..3a7045c39b 100644 ---- a/xen/drivers/passthrough/amd/iommu_acpi.c -+++ b/xen/drivers/passthrough/amd/iommu_acpi.c -@@ -426,9 +426,14 @@ static int __init parse_ivmd_block(const struct acpi_ivrs_memory *ivmd_block) - return -EIO; - } - -- /* Types which won't be handed out are considered good enough. */ -- if ( !(type & (RAM_TYPE_RESERVED | RAM_TYPE_ACPI | -- RAM_TYPE_UNUSABLE)) ) -+ /* -+ * Types which aren't RAM are considered good enough. -+ * Note that a page being partially RESERVED, ACPI or UNUSABLE will -+ * force Xen into assuming the whole page as having that type in -+ * practice. -+ */ -+ if ( type & (RAM_TYPE_RESERVED | RAM_TYPE_ACPI | -+ RAM_TYPE_UNUSABLE) ) - continue; - - AMD_IOMMU_ERROR("IVMD: page at %lx can't be converted\n", addr); --- -2.44.0 - diff --git a/0012-x86-ucode-Distinguish-ucode-already-up-to-date.patch b/0012-x86-ucode-Distinguish-ucode-already-up-to-date.patch new file mode 100644 index 0000000..dd2f91a --- /dev/null +++ b/0012-x86-ucode-Distinguish-ucode-already-up-to-date.patch @@ -0,0 +1,58 @@ +From a42c83b202cc034c43c723cf363dbbabac61b1af Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Tue, 21 May 2024 10:22:52 +0200 +Subject: [PATCH 12/56] x86/ucode: Distinguish "ucode already up to date" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Right now, Xen returns -ENOENT for both "the provided blob isn't correct for +this CPU", and "the blob isn't newer than what's loaded". + +This in turn causes xen-ucode to exit with an error, when "nothing to do" is +more commonly a success condition. + +Handle EEXIST specially and exit cleanly. + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Acked-by: Roger Pau Monné <roger.pau@citrix.com> +master commit: 648db37a155aca6f66d4cf3bb118417a728c3579 +master date: 2024-05-09 18:19:49 +0100 +--- + tools/misc/xen-ucode.c | 5 ++++- + xen/arch/x86/cpu/microcode/core.c | 2 +- + 2 files changed, 5 insertions(+), 2 deletions(-) + +diff --git a/tools/misc/xen-ucode.c b/tools/misc/xen-ucode.c +index c6ae6498d6..390969db3d 100644 +--- a/tools/misc/xen-ucode.c ++++ b/tools/misc/xen-ucode.c +@@ -125,8 +125,11 @@ int main(int argc, char *argv[]) + exit(1); + } + ++ errno = 0; + ret = xc_microcode_update(xch, buf, len); +- if ( ret ) ++ if ( ret == -1 && errno == EEXIST ) ++ printf("Microcode already up to date\n"); ++ else if ( ret ) + { + fprintf(stderr, "Failed to update microcode. (err: %s)\n", + strerror(errno)); +diff --git a/xen/arch/x86/cpu/microcode/core.c b/xen/arch/x86/cpu/microcode/core.c +index 4e011cdc41..d5338ad345 100644 +--- a/xen/arch/x86/cpu/microcode/core.c ++++ b/xen/arch/x86/cpu/microcode/core.c +@@ -640,7 +640,7 @@ static long cf_check microcode_update_helper(void *data) + "microcode: couldn't find any newer%s revision in the provided blob!\n", + opt_ucode_allow_same ? " (or the same)" : ""); + microcode_free_patch(patch); +- ret = -ENOENT; ++ ret = -EEXIST; + + goto put; + } +-- +2.45.2 + diff --git a/0013-libxl-fix-population-of-the-online-vCPU-bitmap-for-P.patch b/0013-libxl-fix-population-of-the-online-vCPU-bitmap-for-P.patch new file mode 100644 index 0000000..e5fb285 --- /dev/null +++ b/0013-libxl-fix-population-of-the-online-vCPU-bitmap-for-P.patch @@ -0,0 +1,61 @@ +From 9966e5413133157a630f7462518005fb898e582a Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> +Date: Tue, 21 May 2024 10:23:27 +0200 +Subject: [PATCH 13/56] libxl: fix population of the online vCPU bitmap for PVH +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +libxl passes some information to libacpi to create the ACPI table for a PVH +guest, and among that information it's a bitmap of which vCPUs are online +which can be less than the maximum number of vCPUs assigned to the domain. + +While the population of the bitmap is done correctly for HVM based on the +number of online vCPUs, for PVH the population of the bitmap is done based on +the number of maximum vCPUs allowed. This leads to all local APIC entries in +the MADT being set as enabled, which contradicts the data in xenstore if vCPUs +is different than maximum vCPUs. + +Fix by copying the internal libxl bitmap that's populated based on the vCPUs +parameter. + +Reported-by: Arthur Borsboom <arthurborsboom@gmail.com> +Link: https://gitlab.com/libvirt/libvirt/-/issues/399 +Reported-by: Leigh Brown <leigh@solinno.co.uk> +Fixes: 14c0d328da2b ('libxl/acpi: Build ACPI tables for HVMlite guests') +Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> +Tested-by: Leigh Brown <leigh@solinno.co.uk> +Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> +master commit: 5cc7347b04b2d0a3133754c7a9b936f614ec656a +master date: 2024-05-11 00:13:43 +0100 +--- + tools/libs/light/libxl_x86_acpi.c | 6 +++--- + 1 file changed, 3 insertions(+), 3 deletions(-) + +diff --git a/tools/libs/light/libxl_x86_acpi.c b/tools/libs/light/libxl_x86_acpi.c +index 620f3c700c..5cf261bd67 100644 +--- a/tools/libs/light/libxl_x86_acpi.c ++++ b/tools/libs/light/libxl_x86_acpi.c +@@ -89,7 +89,7 @@ static int init_acpi_config(libxl__gc *gc, + uint32_t domid = dom->guest_domid; + xc_domaininfo_t info; + struct hvm_info_table *hvminfo; +- int i, r, rc; ++ int r, rc; + + config->dsdt_anycpu = config->dsdt_15cpu = dsdt_pvh; + config->dsdt_anycpu_len = config->dsdt_15cpu_len = dsdt_pvh_len; +@@ -138,8 +138,8 @@ static int init_acpi_config(libxl__gc *gc, + hvminfo->nr_vcpus = info.max_vcpu_id + 1; + } + +- for (i = 0; i < hvminfo->nr_vcpus; i++) +- hvminfo->vcpu_online[i / 8] |= 1 << (i & 7); ++ memcpy(hvminfo->vcpu_online, b_info->avail_vcpus.map, ++ b_info->avail_vcpus.size); + + config->hvminfo = hvminfo; + +-- +2.45.2 + diff --git a/0013-x86-hvm-Fix-fast-singlestep-state-persistence.patch b/0013-x86-hvm-Fix-fast-singlestep-state-persistence.patch deleted file mode 100644 index 2a14354..0000000 --- a/0013-x86-hvm-Fix-fast-singlestep-state-persistence.patch +++ /dev/null @@ -1,86 +0,0 @@ -From 415f770d23f9fcbc02436560fa6583dcd8e1343f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Petr=20Bene=C5=A1?= <w1benny@gmail.com> -Date: Tue, 27 Feb 2024 14:07:45 +0100 -Subject: [PATCH 13/67] x86/hvm: Fix fast singlestep state persistence -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This patch addresses an issue where the fast singlestep setting would persist -despite xc_domain_debug_control being called with XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF. -Specifically, if fast singlestep was enabled in a VMI session and that session -stopped before the MTF trap occurred, the fast singlestep setting remained -active even though MTF itself was disabled. This led to a situation where, upon -starting a new VMI session, the first event to trigger an EPT violation would -cause the corresponding EPT event callback to be skipped due to the lingering -fast singlestep setting. - -The fix ensures that the fast singlestep setting is properly reset when -disabling single step debugging operations. - -Signed-off-by: Petr Beneš <w1benny@gmail.com> -Reviewed-by: Tamas K Lengyel <tamas@tklengyel.com> -master commit: 897def94b56175ce569673a05909d2f223e1e749 -master date: 2024-02-12 09:37:58 +0100 ---- - xen/arch/x86/hvm/hvm.c | 34 ++++++++++++++++++++++++---------- - 1 file changed, 24 insertions(+), 10 deletions(-) - -diff --git a/xen/arch/x86/hvm/hvm.c b/xen/arch/x86/hvm/hvm.c -index d6c6ab8897..558dc3eddc 100644 ---- a/xen/arch/x86/hvm/hvm.c -+++ b/xen/arch/x86/hvm/hvm.c -@@ -5153,26 +5153,40 @@ long do_hvm_op(unsigned long op, XEN_GUEST_HANDLE_PARAM(void) arg) - - int hvm_debug_op(struct vcpu *v, int32_t op) - { -- int rc; -+ int rc = 0; - - switch ( op ) - { - case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON: - case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF: -- rc = -EOPNOTSUPP; - if ( !cpu_has_monitor_trap_flag ) -- break; -- rc = 0; -- vcpu_pause(v); -- v->arch.hvm.single_step = -- (op == XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON); -- vcpu_unpause(v); /* guest will latch new state */ -+ return -EOPNOTSUPP; - break; - default: -- rc = -ENOSYS; -- break; -+ return -ENOSYS; -+ } -+ -+ vcpu_pause(v); -+ -+ switch ( op ) -+ { -+ case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_ON: -+ v->arch.hvm.single_step = true; -+ break; -+ -+ case XEN_DOMCTL_DEBUG_OP_SINGLE_STEP_OFF: -+ v->arch.hvm.single_step = false; -+ v->arch.hvm.fast_single_step.enabled = false; -+ v->arch.hvm.fast_single_step.p2midx = 0; -+ break; -+ -+ default: /* Excluded above */ -+ ASSERT_UNREACHABLE(); -+ return -ENOSYS; - } - -+ vcpu_unpause(v); /* guest will latch new state */ -+ - return rc; - } - --- -2.44.0 - diff --git a/0014-libxl-Fix-handling-XenStore-errors-in-device-creatio.patch b/0014-libxl-Fix-handling-XenStore-errors-in-device-creatio.patch new file mode 100644 index 0000000..ac28521 --- /dev/null +++ b/0014-libxl-Fix-handling-XenStore-errors-in-device-creatio.patch @@ -0,0 +1,191 @@ +From 8271f0e8f23b63199caf0edcfe85ebc1c1412d1b Mon Sep 17 00:00:00 2001 +From: Demi Marie Obenour <demi@invisiblethingslab.com> +Date: Tue, 21 May 2024 10:23:52 +0200 +Subject: [PATCH 14/56] libxl: Fix handling XenStore errors in device creation + +If xenstored runs out of memory it is possible for it to fail operations +that should succeed. libxl wasn't robust against this, and could fail +to ensure that the TTY path of a non-initial console was created and +read-only for guests. This doesn't qualify for an XSA because guests +should not be able to run xenstored out of memory, but it still needs to +be fixed. + +Add the missing error checks to ensure that all errors are properly +handled and that at no point can a guest make the TTY path of its +frontend directory writable. + +Signed-off-by: Demi Marie Obenour <demi@invisiblethingslab.com> +Reviewed-by: Juergen Gross <jgross@suse.com> +master commit: 531d3bea5e9357357eaf6d40f5784a1b4c29b910 +master date: 2024-05-11 00:13:43 +0100 +--- + tools/libs/light/libxl_console.c | 11 ++--- + tools/libs/light/libxl_device.c | 72 ++++++++++++++++++++------------ + tools/libs/light/libxl_xshelp.c | 13 ++++-- + 3 files changed, 60 insertions(+), 36 deletions(-) + +diff --git a/tools/libs/light/libxl_console.c b/tools/libs/light/libxl_console.c +index cd7412a327..a563c9d3c7 100644 +--- a/tools/libs/light/libxl_console.c ++++ b/tools/libs/light/libxl_console.c +@@ -351,11 +351,10 @@ int libxl__device_console_add(libxl__gc *gc, uint32_t domid, + flexarray_append(front, "protocol"); + flexarray_append(front, LIBXL_XENCONSOLE_PROTOCOL); + } +- libxl__device_generic_add(gc, XBT_NULL, device, +- libxl__xs_kvs_of_flexarray(gc, back), +- libxl__xs_kvs_of_flexarray(gc, front), +- libxl__xs_kvs_of_flexarray(gc, ro_front)); +- rc = 0; ++ rc = libxl__device_generic_add(gc, XBT_NULL, device, ++ libxl__xs_kvs_of_flexarray(gc, back), ++ libxl__xs_kvs_of_flexarray(gc, front), ++ libxl__xs_kvs_of_flexarray(gc, ro_front)); + out: + return rc; + } +@@ -665,6 +664,8 @@ int libxl_device_channel_getinfo(libxl_ctx *ctx, uint32_t domid, + */ + if (!val) val = "/NO-SUCH-PATH"; + channelinfo->u.pty.path = strdup(val); ++ if (channelinfo->u.pty.path == NULL) ++ abort(); + break; + default: + break; +diff --git a/tools/libs/light/libxl_device.c b/tools/libs/light/libxl_device.c +index 13da6e0573..3035501f2c 100644 +--- a/tools/libs/light/libxl_device.c ++++ b/tools/libs/light/libxl_device.c +@@ -177,8 +177,13 @@ int libxl__device_generic_add(libxl__gc *gc, xs_transaction_t t, + ro_frontend_perms[1].perms = backend_perms[1].perms = XS_PERM_READ; + + retry_transaction: +- if (create_transaction) ++ if (create_transaction) { + t = xs_transaction_start(ctx->xsh); ++ if (t == XBT_NULL) { ++ LOGED(ERROR, device->domid, "xs_transaction_start failed"); ++ return ERROR_FAIL; ++ } ++ } + + /* FIXME: read frontend_path and check state before removing stuff */ + +@@ -195,42 +200,55 @@ retry_transaction: + if (rc) goto out; + } + +- /* xxx much of this function lacks error checks! */ +- + if (fents || ro_fents) { +- xs_rm(ctx->xsh, t, frontend_path); +- xs_mkdir(ctx->xsh, t, frontend_path); ++ if (!xs_rm(ctx->xsh, t, frontend_path) && errno != ENOENT) ++ goto out; ++ if (!xs_mkdir(ctx->xsh, t, frontend_path)) ++ goto out; + /* Console 0 is a special case. It doesn't use the regular PV + * state machine but also the frontend directory has + * historically contained other information, such as the + * vnc-port, which we don't want the guest fiddling with. + */ + if ((device->kind == LIBXL__DEVICE_KIND_CONSOLE && device->devid == 0) || +- (device->kind == LIBXL__DEVICE_KIND_VUART)) +- xs_set_permissions(ctx->xsh, t, frontend_path, +- ro_frontend_perms, ARRAY_SIZE(ro_frontend_perms)); +- else +- xs_set_permissions(ctx->xsh, t, frontend_path, +- frontend_perms, ARRAY_SIZE(frontend_perms)); +- xs_write(ctx->xsh, t, GCSPRINTF("%s/backend", frontend_path), +- backend_path, strlen(backend_path)); +- if (fents) +- libxl__xs_writev_perms(gc, t, frontend_path, fents, +- frontend_perms, ARRAY_SIZE(frontend_perms)); +- if (ro_fents) +- libxl__xs_writev_perms(gc, t, frontend_path, ro_fents, +- ro_frontend_perms, ARRAY_SIZE(ro_frontend_perms)); ++ (device->kind == LIBXL__DEVICE_KIND_VUART)) { ++ if (!xs_set_permissions(ctx->xsh, t, frontend_path, ++ ro_frontend_perms, ARRAY_SIZE(ro_frontend_perms))) ++ goto out; ++ } else { ++ if (!xs_set_permissions(ctx->xsh, t, frontend_path, ++ frontend_perms, ARRAY_SIZE(frontend_perms))) ++ goto out; ++ } ++ if (!xs_write(ctx->xsh, t, GCSPRINTF("%s/backend", frontend_path), ++ backend_path, strlen(backend_path))) ++ goto out; ++ if (fents) { ++ rc = libxl__xs_writev_perms(gc, t, frontend_path, fents, ++ frontend_perms, ARRAY_SIZE(frontend_perms)); ++ if (rc) goto out; ++ } ++ if (ro_fents) { ++ rc = libxl__xs_writev_perms(gc, t, frontend_path, ro_fents, ++ ro_frontend_perms, ARRAY_SIZE(ro_frontend_perms)); ++ if (rc) goto out; ++ } + } + + if (bents) { + if (!libxl_only) { +- xs_rm(ctx->xsh, t, backend_path); +- xs_mkdir(ctx->xsh, t, backend_path); +- xs_set_permissions(ctx->xsh, t, backend_path, backend_perms, +- ARRAY_SIZE(backend_perms)); +- xs_write(ctx->xsh, t, GCSPRINTF("%s/frontend", backend_path), +- frontend_path, strlen(frontend_path)); +- libxl__xs_writev(gc, t, backend_path, bents); ++ if (!xs_rm(ctx->xsh, t, backend_path) && errno != ENOENT) ++ goto out; ++ if (!xs_mkdir(ctx->xsh, t, backend_path)) ++ goto out; ++ if (!xs_set_permissions(ctx->xsh, t, backend_path, backend_perms, ++ ARRAY_SIZE(backend_perms))) ++ goto out; ++ if (!xs_write(ctx->xsh, t, GCSPRINTF("%s/frontend", backend_path), ++ frontend_path, strlen(frontend_path))) ++ goto out; ++ rc = libxl__xs_writev(gc, t, backend_path, bents); ++ if (rc) goto out; + } + + /* +@@ -276,7 +294,7 @@ retry_transaction: + out: + if (create_transaction && t) + libxl__xs_transaction_abort(gc, &t); +- return rc; ++ return rc != 0 ? rc : ERROR_FAIL; + } + + typedef struct { +diff --git a/tools/libs/light/libxl_xshelp.c b/tools/libs/light/libxl_xshelp.c +index 751cd942d9..a6e34ab10f 100644 +--- a/tools/libs/light/libxl_xshelp.c ++++ b/tools/libs/light/libxl_xshelp.c +@@ -60,10 +60,15 @@ int libxl__xs_writev_perms(libxl__gc *gc, xs_transaction_t t, + for (i = 0; kvs[i] != NULL; i += 2) { + path = GCSPRINTF("%s/%s", dir, kvs[i]); + if (path && kvs[i + 1]) { +- int length = strlen(kvs[i + 1]); +- xs_write(ctx->xsh, t, path, kvs[i + 1], length); +- if (perms) +- xs_set_permissions(ctx->xsh, t, path, perms, num_perms); ++ size_t length = strlen(kvs[i + 1]); ++ if (length > UINT_MAX) ++ return ERROR_FAIL; ++ if (!xs_write(ctx->xsh, t, path, kvs[i + 1], length)) ++ return ERROR_FAIL; ++ if (perms) { ++ if (!xs_set_permissions(ctx->xsh, t, path, perms, num_perms)) ++ return ERROR_FAIL; ++ } + } + } + return 0; +-- +2.45.2 + diff --git a/0014-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch b/0014-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch deleted file mode 100644 index 6536674..0000000 --- a/0014-x86-HVM-tidy-state-on-hvmemul_map_linear_addr-s-erro.patch +++ /dev/null @@ -1,63 +0,0 @@ -From b3ae0e6201495216b12157bd8b2382b28fdd7dae Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Tue, 27 Feb 2024 14:08:20 +0100 -Subject: [PATCH 14/67] x86/HVM: tidy state on hvmemul_map_linear_addr()'s - error path - -While in the vast majority of cases failure of the function will not -be followed by re-invocation with the same emulation context, a few -very specific insns - involving multiple independent writes, e.g. ENTER -and PUSHA - exist where this can happen. Since failure of the function -only signals to the caller that it ought to try an MMIO write instead, -such failure also cannot be assumed to result in wholesale failure of -emulation of the current insn. Instead we have to maintain internal -state such that another invocation of the function with the same -emulation context remains possible. To achieve that we need to reset MFN -slots after putting page references on the error path. - -Note that all of this affects debugging code only, in causing an -assertion to trigger (higher up in the function). There's otherwise no -misbehavior - such a "leftover" slot would simply be overwritten by new -contents in a release build. - -Also extend the related unmap() assertion, to further check for MFN 0. - -Fixes: 8cbd4fb0b7ea ("x86/hvm: implement hvmemul_write() using real mappings") -Reported-by: Manuel Andreas <manuel.andreas@tum.de> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Acked-by: Paul Durrant <paul@xen.org> -master commit: e72f951df407bc3be82faac64d8733a270036ba1 -master date: 2024-02-13 09:36:14 +0100 ---- - xen/arch/x86/hvm/emulate.c | 7 ++++++- - 1 file changed, 6 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/hvm/emulate.c b/xen/arch/x86/hvm/emulate.c -index 275451dd36..27928dc3f3 100644 ---- a/xen/arch/x86/hvm/emulate.c -+++ b/xen/arch/x86/hvm/emulate.c -@@ -697,7 +697,12 @@ static void *hvmemul_map_linear_addr( - out: - /* Drop all held references. */ - while ( mfn-- > hvmemul_ctxt->mfn ) -+ { - put_page(mfn_to_page(*mfn)); -+#ifndef NDEBUG /* Clean slot for a subsequent map()'s error checking. */ -+ *mfn = _mfn(0); -+#endif -+ } - - return err; - } -@@ -719,7 +724,7 @@ static void hvmemul_unmap_linear_addr( - - for ( i = 0; i < nr_frames; i++ ) - { -- ASSERT(mfn_valid(*mfn)); -+ ASSERT(mfn_x(*mfn) && mfn_valid(*mfn)); - paging_mark_dirty(currd, *mfn); - put_page(mfn_to_page(*mfn)); - --- -2.44.0 - diff --git a/0015-build-Replace-which-with-command-v.patch b/0015-build-Replace-which-with-command-v.patch deleted file mode 100644 index 57f21d4..0000000 --- a/0015-build-Replace-which-with-command-v.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 1330a5fe44ca91f98857b53fe8bbe06522d9db27 Mon Sep 17 00:00:00 2001 -From: Anthony PERARD <anthony.perard@citrix.com> -Date: Tue, 27 Feb 2024 14:08:50 +0100 -Subject: [PATCH 15/67] build: Replace `which` with `command -v` -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The `which` command is not standard, may not exist on the build host, -or may not behave as expected by the build system. It is recommended -to use `command -v` to find out if a command exist and have its path, -and it's part of a POSIX shell standard (at least, it seems to be -mandatory since IEEE Std 1003.1-2008, but was optional before). - -Fixes: c8a8645f1efe ("xen/build: Automatically locate a suitable python interpreter") -Fixes: 3b47bcdb6d38 ("xen/build: Use a distro version of figlet") -Signed-off-by: Anthony PERARD <anthony.perard@citrix.com> -Tested-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com> -Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: f93629b18b528a5ab1b1092949c5420069c7226c -master date: 2024-02-19 12:45:48 +0100 ---- - xen/Makefile | 4 ++-- - xen/build.mk | 2 +- - 2 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/xen/Makefile b/xen/Makefile -index dd0b004e1c..7ea13a6791 100644 ---- a/xen/Makefile -+++ b/xen/Makefile -@@ -25,8 +25,8 @@ export XEN_BUILD_HOST := $(shell hostname) - endif - - # Best effort attempt to find a python interpreter, defaulting to Python 3 if --# available. Fall back to just `python` if `which` is nowhere to be found. --PYTHON_INTERPRETER := $(word 1,$(shell which python3 python python2 2>/dev/null) python) -+# available. Fall back to just `python`. -+PYTHON_INTERPRETER := $(word 1,$(shell command -v python3 || command -v python || command -v python2) python) - export PYTHON ?= $(PYTHON_INTERPRETER) - - export CHECKPOLICY ?= checkpolicy -diff --git a/xen/build.mk b/xen/build.mk -index 9ecb104f1e..b489f77b7c 100644 ---- a/xen/build.mk -+++ b/xen/build.mk -@@ -1,6 +1,6 @@ - quiet_cmd_banner = BANNER $@ - define cmd_banner -- if which figlet >/dev/null 2>&1 ; then \ -+ if command -v figlet >/dev/null 2>&1 ; then \ - echo " Xen $(XEN_FULLVERSION)" | figlet -f $< > $@.tmp; \ - else \ - echo " Xen $(XEN_FULLVERSION)" > $@.tmp; \ --- -2.44.0 - diff --git a/0015-xen-sched-set-all-sched_resource-data-inside-locked-.patch b/0015-xen-sched-set-all-sched_resource-data-inside-locked-.patch new file mode 100644 index 0000000..a8090d4 --- /dev/null +++ b/0015-xen-sched-set-all-sched_resource-data-inside-locked-.patch @@ -0,0 +1,84 @@ +From 3999b675cad5b717274d6493899b0eea8896f4d7 Mon Sep 17 00:00:00 2001 +From: Juergen Gross <jgross@suse.com> +Date: Tue, 21 May 2024 10:24:26 +0200 +Subject: [PATCH 15/56] xen/sched: set all sched_resource data inside locked + region for new cpu + +When adding a cpu to a scheduler, set all data items of struct +sched_resource inside the locked region, as otherwise a race might +happen (e.g. when trying to access the cpupool of the cpu): + + (XEN) ----[ Xen-4.19.0-1-d x86_64 debug=y Tainted: H ]---- + (XEN) CPU: 45 + (XEN) RIP: e008:[<ffff82d040244cbf>] common/sched/credit.c#csched_load_balance+0x41/0x877 + (XEN) RFLAGS: 0000000000010092 CONTEXT: hypervisor + (XEN) rax: ffff82d040981618 rbx: ffff82d040981618 rcx: 0000000000000000 + (XEN) rdx: 0000003ff68cd000 rsi: 000000000000002d rdi: ffff83103723d450 + (XEN) rbp: ffff83207caa7d48 rsp: ffff83207caa7b98 r8: 0000000000000000 + (XEN) r9: ffff831037253cf0 r10: ffff83103767c3f0 r11: 0000000000000009 + (XEN) r12: ffff831037237990 r13: ffff831037237990 r14: ffff831037253720 + (XEN) r15: 0000000000000000 cr0: 000000008005003b cr4: 0000000000f526e0 + (XEN) cr3: 000000005bc2f000 cr2: 0000000000000010 + (XEN) fsb: 0000000000000000 gsb: 0000000000000000 gss: 0000000000000000 + (XEN) ds: 0000 es: 0000 fs: 0000 gs: 0000 ss: 0000 cs: e008 + (XEN) Xen code around <ffff82d040244cbf> (common/sched/credit.c#csched_load_balance+0x41/0x877): + (XEN) 48 8b 0c 10 48 8b 49 08 <48> 8b 79 10 48 89 bd b8 fe ff ff 49 8b 4e 28 48 + <snip> + (XEN) Xen call trace: + (XEN) [<ffff82d040244cbf>] R common/sched/credit.c#csched_load_balance+0x41/0x877 + (XEN) [<ffff82d040245a18>] F common/sched/credit.c#csched_schedule+0x36a/0x69f + (XEN) [<ffff82d040252644>] F common/sched/core.c#do_schedule+0xe8/0x433 + (XEN) [<ffff82d0402572dd>] F common/sched/core.c#schedule+0x2e5/0x2f9 + (XEN) [<ffff82d040232f35>] F common/softirq.c#__do_softirq+0x94/0xbe + (XEN) [<ffff82d040232fc8>] F do_softirq+0x13/0x15 + (XEN) [<ffff82d0403075ef>] F arch/x86/domain.c#idle_loop+0x92/0xe6 + (XEN) + (XEN) Pagetable walk from 0000000000000010: + (XEN) L4[0x000] = 000000103ff61063 ffffffffffffffff + (XEN) L3[0x000] = 000000103ff60063 ffffffffffffffff + (XEN) L2[0x000] = 0000001033dff063 ffffffffffffffff + (XEN) L1[0x000] = 0000000000000000 ffffffffffffffff + (XEN) + (XEN) **************************************** + (XEN) Panic on CPU 45: + (XEN) FATAL PAGE FAULT + (XEN) [error_code=0000] + (XEN) Faulting linear address: 0000000000000010 + (XEN) **************************************** + +Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> +Fixes: a8c6c623192e ("sched: clarify use cases of schedule_cpu_switch()") +Signed-off-by: Juergen Gross <jgross@suse.com> +Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> +Tested-by: Andrew Cooper <andrew.cooper3@citrix.com> +master commit: d104a07524ffc92ae7a70dfe192c291de2a563cc +master date: 2024-05-15 19:59:52 +0100 +--- + xen/common/sched/core.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c +index 34ad39b9ad..3c2403ebcf 100644 +--- a/xen/common/sched/core.c ++++ b/xen/common/sched/core.c +@@ -3179,6 +3179,8 @@ int schedule_cpu_add(unsigned int cpu, struct cpupool *c) + + sr->scheduler = new_ops; + sr->sched_priv = ppriv; ++ sr->granularity = cpupool_get_granularity(c); ++ sr->cpupool = c; + + /* + * Reroute the lock to the per pCPU lock as /last/ thing. In fact, +@@ -3191,8 +3193,6 @@ int schedule_cpu_add(unsigned int cpu, struct cpupool *c) + /* _Not_ pcpu_schedule_unlock(): schedule_lock has changed! */ + spin_unlock_irqrestore(old_lock, flags); + +- sr->granularity = cpupool_get_granularity(c); +- sr->cpupool = c; + /* The cpu is added to a pool, trigger it to go pick up some work */ + cpu_raise_softirq(cpu, SCHEDULE_SOFTIRQ); + +-- +2.45.2 + diff --git a/0016-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch b/0016-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch deleted file mode 100644 index f75e07c..0000000 --- a/0016-libxl-Disable-relocating-memory-for-qemu-xen-in-stub.patch +++ /dev/null @@ -1,50 +0,0 @@ -From b9745280736ee526374873aa3c4142596e2ba10b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Marek=20Marczykowski-G=C3=B3recki?= - <marmarek@invisiblethingslab.com> -Date: Tue, 27 Feb 2024 14:09:19 +0100 -Subject: [PATCH 16/67] libxl: Disable relocating memory for qemu-xen in - stubdomain too -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -According to comments (and experiments) qemu-xen cannot handle memory -reolcation done by hvmloader. The code was already disabled when running -qemu-xen in dom0 (see libxl__spawn_local_dm()), but it was missed when -adding qemu-xen support to stubdomain. Adjust libxl__spawn_stub_dm() to -be consistent in this regard. - -Reported-by: Neowutran <xen@neowutran.ovh> -Signed-off-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com> -Reviewed-by: Jason Andryuk <jandryuk@gmail.com> -Acked-by: Anthony PERARD <anthony.perard@citrix.com> -master commit: 97883aa269f6745a6ded232be3a855abb1297e0d -master date: 2024-02-22 11:48:22 +0100 ---- - tools/libs/light/libxl_dm.c | 10 ++++++++++ - 1 file changed, 10 insertions(+) - -diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c -index 14b593110f..ed620a9d8e 100644 ---- a/tools/libs/light/libxl_dm.c -+++ b/tools/libs/light/libxl_dm.c -@@ -2432,6 +2432,16 @@ void libxl__spawn_stub_dm(libxl__egc *egc, libxl__stub_dm_spawn_state *sdss) - "%s", - libxl_bios_type_to_string(guest_config->b_info.u.hvm.bios)); - } -+ /* Disable relocating memory to make the MMIO hole larger -+ * unless we're running qemu-traditional and vNUMA is not -+ * configured. */ -+ libxl__xs_printf(gc, XBT_NULL, -+ libxl__sprintf(gc, "%s/hvmloader/allow-memory-relocate", -+ libxl__xs_get_dompath(gc, guest_domid)), -+ "%d", -+ guest_config->b_info.device_model_version -+ == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN_TRADITIONAL && -+ !libxl__vnuma_configured(&guest_config->b_info)); - ret = xc_domain_set_target(ctx->xch, dm_domid, guest_domid); - if (ret<0) { - LOGED(ERROR, guest_domid, "setting target domain %d -> %d", --- -2.44.0 - diff --git a/0016-x86-respect-mapcache_domain_init-failing.patch b/0016-x86-respect-mapcache_domain_init-failing.patch new file mode 100644 index 0000000..db7ddfe --- /dev/null +++ b/0016-x86-respect-mapcache_domain_init-failing.patch @@ -0,0 +1,38 @@ +From dfabab2cd9461ef9d21a708461f35d2ae4b55220 Mon Sep 17 00:00:00 2001 +From: Jan Beulich <jbeulich@suse.com> +Date: Tue, 21 May 2024 10:25:08 +0200 +Subject: [PATCH 16/56] x86: respect mapcache_domain_init() failing +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The function itself properly handles and hands onwards failure from +create_perdomain_mapping(). Therefore its caller should respect possible +failure, too. + +Fixes: 4b28bf6ae90b ("x86: re-introduce map_domain_page() et al") +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Acked-by: Roger Pau Monné <roger.pau@citrix.com> +master commit: 7270fdc7a0028d4b7b26fd1b36c6b9e97abcf3da +master date: 2024-05-15 19:59:52 +0100 +--- + xen/arch/x86/domain.c | 3 ++- + 1 file changed, 2 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/domain.c b/xen/arch/x86/domain.c +index 307446273a..5feb0d0679 100644 +--- a/xen/arch/x86/domain.c ++++ b/xen/arch/x86/domain.c +@@ -850,7 +850,8 @@ int arch_domain_create(struct domain *d, + } + else if ( is_pv_domain(d) ) + { +- mapcache_domain_init(d); ++ if ( (rc = mapcache_domain_init(d)) != 0 ) ++ goto fail; + + if ( (rc = pv_domain_initialise(d)) != 0 ) + goto fail; +-- +2.45.2 + diff --git a/0017-build-make-sure-build-fails-when-running-kconfig-fai.patch b/0017-build-make-sure-build-fails-when-running-kconfig-fai.patch deleted file mode 100644 index 1bb3aa8..0000000 --- a/0017-build-make-sure-build-fails-when-running-kconfig-fai.patch +++ /dev/null @@ -1,58 +0,0 @@ -From ea869977271f93945451908be9b6117ffd1fb02d Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Tue, 27 Feb 2024 14:09:37 +0100 -Subject: [PATCH 17/67] build: make sure build fails when running kconfig fails -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Because of using "-include", failure to (re)build auto.conf (with -auto.conf.cmd produced as a secondary target) won't stop make from -continuing the build. Arrange for it being possible to drop the - from -Rules.mk, requiring that the include be skipped for tools-only targets. -Note that relying on the inclusion in those cases wouldn't be correct -anyway, as it might be a stale file (yet to be rebuilt) which would be -included, while during initial build, the file would be absent -altogether. - -Fixes: 8d4c17a90b0a ("xen/build: silence make warnings about missing auto.conf*") -Reported-by: Roger Pau Monné <roger.pau@citrix.com> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Anthony PERARD <anthony.perard@citrix.com> -master commit: d34e5fa2e8db19f23081f46a3e710bb122130691 -master date: 2024-02-22 11:52:47 +0100 ---- - xen/Makefile | 1 + - xen/Rules.mk | 4 +++- - 2 files changed, 4 insertions(+), 1 deletion(-) - -diff --git a/xen/Makefile b/xen/Makefile -index 7ea13a6791..bac3684a36 100644 ---- a/xen/Makefile -+++ b/xen/Makefile -@@ -374,6 +374,7 @@ $(KCONFIG_CONFIG): tools_fixdep - # This exploits the 'multi-target pattern rule' trick. - # The syncconfig should be executed only once to make all the targets. - include/config/%.conf include/config/%.conf.cmd: $(KCONFIG_CONFIG) -+ $(Q)rm -f include/config/auto.conf - $(Q)$(MAKE) $(build)=tools/kconfig syncconfig - - ifeq ($(CONFIG_DEBUG),y) -diff --git a/xen/Rules.mk b/xen/Rules.mk -index 8af3dd7277..d759cccee3 100644 ---- a/xen/Rules.mk -+++ b/xen/Rules.mk -@@ -15,7 +15,9 @@ srcdir := $(srctree)/$(src) - PHONY := __build - __build: - ---include $(objtree)/include/config/auto.conf -+ifneq ($(firstword $(subst /, ,$(obj))),tools) -+include $(objtree)/include/config/auto.conf -+endif - - include $(XEN_ROOT)/Config.mk - include $(srctree)/scripts/Kbuild.include --- -2.44.0 - diff --git a/0017-tools-xentop-Fix-cpu-sort-order.patch b/0017-tools-xentop-Fix-cpu-sort-order.patch new file mode 100644 index 0000000..de19ddc --- /dev/null +++ b/0017-tools-xentop-Fix-cpu-sort-order.patch @@ -0,0 +1,76 @@ +From f3d20dd31770a70971f4f85521eec1e741d38695 Mon Sep 17 00:00:00 2001 +From: Leigh Brown <leigh@solinno.co.uk> +Date: Tue, 21 May 2024 10:25:30 +0200 +Subject: [PATCH 17/56] tools/xentop: Fix cpu% sort order + +In compare_cpu_pct(), there is a double -> unsigned long long converion when +calling compare(). In C, this discards the fractional part, resulting in an +out-of order sorting such as: + + NAME STATE CPU(sec) CPU(%) + xendd --b--- 4020 5.7 + icecream --b--- 2600 3.8 + Domain-0 -----r 1060 1.5 + neon --b--- 827 1.1 + cheese --b--- 225 0.7 + pizza --b--- 359 0.5 + cassini --b--- 490 0.4 + fusilli --b--- 159 0.2 + bob --b--- 502 0.2 + blender --b--- 121 0.2 + bread --b--- 69 0.1 + chickpea --b--- 67 0.1 + lentil --b--- 67 0.1 + +Introduce compare_dbl() function and update compare_cpu_pct() to call it. + +Fixes: 49839b535b78 ("Add xenstat framework.") +Signed-off-by: Leigh Brown <leigh@solinno.co.uk> +Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> +master commit: e27fc7d15eab79e604e8b8728778594accc23cf1 +master date: 2024-05-15 19:59:52 +0100 +--- + tools/xentop/xentop.c | 13 ++++++++++++- + 1 file changed, 12 insertions(+), 1 deletion(-) + +diff --git a/tools/xentop/xentop.c b/tools/xentop/xentop.c +index 545bd5e96d..c2a311befe 100644 +--- a/tools/xentop/xentop.c ++++ b/tools/xentop/xentop.c +@@ -85,6 +85,7 @@ static void set_delay(const char *value); + static void set_prompt(const char *new_prompt, void (*func)(const char *)); + static int handle_key(int); + static int compare(unsigned long long, unsigned long long); ++static int compare_dbl(double, double); + static int compare_domains(xenstat_domain **, xenstat_domain **); + static unsigned long long tot_net_bytes( xenstat_domain *, int); + static bool tot_vbd_reqs(xenstat_domain *, int, unsigned long long *); +@@ -422,6 +423,16 @@ static int compare(unsigned long long i1, unsigned long long i2) + return 0; + } + ++/* Compares two double precision numbers, returning -1,0,1 for <,=,> */ ++static int compare_dbl(double d1, double d2) ++{ ++ if (d1 < d2) ++ return -1; ++ if (d1 > d2) ++ return 1; ++ return 0; ++} ++ + /* Comparison function for use with qsort. Compares two domains using the + * current sort field. */ + static int compare_domains(xenstat_domain **domain1, xenstat_domain **domain2) +@@ -523,7 +534,7 @@ static double get_cpu_pct(xenstat_domain *domain) + + static int compare_cpu_pct(xenstat_domain *domain1, xenstat_domain *domain2) + { +- return -compare(get_cpu_pct(domain1), get_cpu_pct(domain2)); ++ return -compare_dbl(get_cpu_pct(domain1), get_cpu_pct(domain2)); + } + + /* Prints cpu percentage statistic */ +-- +2.45.2 + diff --git a/0018-x86-mtrr-avoid-system-wide-rendezvous-when-setting-A.patch b/0018-x86-mtrr-avoid-system-wide-rendezvous-when-setting-A.patch new file mode 100644 index 0000000..a57775d --- /dev/null +++ b/0018-x86-mtrr-avoid-system-wide-rendezvous-when-setting-A.patch @@ -0,0 +1,60 @@ +From 7cdb1fa2ab0b5e11f66cada0370770404153c824 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> +Date: Tue, 21 May 2024 10:25:39 +0200 +Subject: [PATCH 18/56] x86/mtrr: avoid system wide rendezvous when setting AP + MTRRs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +There's no point in forcing a system wide update of the MTRRs on all processors +when there are no changes to be propagated. On AP startup it's only the AP +that needs to write the system wide MTRR values in order to match the rest of +the already online CPUs. + +We have occasionally seen the watchdog trigger during `xen-hptool cpu-online` +in one Intel Cascade Lake box with 448 CPUs due to the re-setting of the MTRRs +on all the CPUs in the system. + +While there adjust the comment to clarify why the system-wide resetting of the +MTRR registers is not needed for the purposes of mtrr_ap_init(). + +Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> +Release-acked-by: Oleksii Kurochko <oleksii.kurochko@gmail.com> +Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> +master commit: abd00b037da5ffa4e8c4508a5df0cd6eabb805a4 +master date: 2024-05-15 19:59:52 +0100 +--- + xen/arch/x86/cpu/mtrr/main.c | 15 ++++++++------- + 1 file changed, 8 insertions(+), 7 deletions(-) + +diff --git a/xen/arch/x86/cpu/mtrr/main.c b/xen/arch/x86/cpu/mtrr/main.c +index 90b235f57e..0a44ebbcb0 100644 +--- a/xen/arch/x86/cpu/mtrr/main.c ++++ b/xen/arch/x86/cpu/mtrr/main.c +@@ -573,14 +573,15 @@ void mtrr_ap_init(void) + if (!mtrr_if || hold_mtrr_updates_on_aps) + return; + /* +- * Ideally we should hold mtrr_mutex here to avoid mtrr entries changed, +- * but this routine will be called in cpu boot time, holding the lock +- * breaks it. This routine is called in two cases: 1.very earily time +- * of software resume, when there absolutely isn't mtrr entry changes; +- * 2.cpu hotadd time. We let mtrr_add/del_page hold cpuhotplug lock to +- * prevent mtrr entry changes ++ * hold_mtrr_updates_on_aps takes care of preventing unnecessary MTRR ++ * updates when batch starting the CPUs (see ++ * mtrr_aps_sync_{begin,end}()). ++ * ++ * Otherwise just apply the current system wide MTRR values to this AP. ++ * Note this doesn't require synchronization with the other CPUs, as ++ * there are strictly no modifications of the current MTRR values. + */ +- set_mtrr(~0U, 0, 0, 0); ++ mtrr_set_all(); + } + + /** +-- +2.45.2 + diff --git a/0018-x86emul-add-missing-EVEX.R-checks.patch b/0018-x86emul-add-missing-EVEX.R-checks.patch deleted file mode 100644 index 12e7702..0000000 --- a/0018-x86emul-add-missing-EVEX.R-checks.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 16f2e47eb1207d866f95cf694a60a7ceb8f96a36 Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Tue, 27 Feb 2024 14:09:55 +0100 -Subject: [PATCH 18/67] x86emul: add missing EVEX.R' checks - -EVEX.R' is not ignored in 64-bit code when encoding a GPR or mask -register. While for mask registers suitable checks are in place (there -also covering EVEX.R), they were missing for the few cases where in -EVEX-encoded instructions ModR/M.reg encodes a GPR. While for VPEXTRW -the bit is replaced before an emulation stub is invoked, for -VCVT{,T}{S,D,H}2{,U}SI this actually would have led to #UD from inside -an emulation stub, in turn raising #UD to the guest, but accompanied by -log messages indicating something's wrong in Xen nevertheless. - -Fixes: 001bd91ad864 ("x86emul: support AVX512{F,BW,DQ} extract insns") -Fixes: baf4a376f550 ("x86emul: support AVX512F legacy-equivalent scalar int/FP conversion insns") -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> -master commit: cb319824bfa8d3c9ea0410cc71daaedc3e11aa2a -master date: 2024-02-22 11:54:07 +0100 ---- - xen/arch/x86/x86_emulate/x86_emulate.c | 5 +++-- - 1 file changed, 3 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/x86/x86_emulate/x86_emulate.c b/xen/arch/x86/x86_emulate/x86_emulate.c -index 0c0336f737..995670cbc8 100644 ---- a/xen/arch/x86/x86_emulate/x86_emulate.c -+++ b/xen/arch/x86/x86_emulate/x86_emulate.c -@@ -6829,7 +6829,8 @@ x86_emulate( - CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x2d): /* vcvts{s,d}2si xmm/mem,reg */ - CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x78): /* vcvtts{s,d}2usi xmm/mem,reg */ - CASE_SIMD_SCALAR_FP(_EVEX, 0x0f, 0x79): /* vcvts{s,d}2usi xmm/mem,reg */ -- generate_exception_if((evex.reg != 0xf || !evex.RX || evex.opmsk || -+ generate_exception_if((evex.reg != 0xf || !evex.RX || !evex.R || -+ evex.opmsk || - (ea.type != OP_REG && evex.brs)), - EXC_UD); - host_and_vcpu_must_have(avx512f); -@@ -10705,7 +10706,7 @@ x86_emulate( - goto pextr; - - case X86EMUL_OPC_EVEX_66(0x0f, 0xc5): /* vpextrw $imm8,xmm,reg */ -- generate_exception_if(ea.type != OP_REG, EXC_UD); -+ generate_exception_if(ea.type != OP_REG || !evex.R, EXC_UD); - /* Convert to alternative encoding: We want to use a memory operand. */ - evex.opcx = ext_0f3a; - b = 0x15; --- -2.44.0 - diff --git a/0001-update-Xen-version-to-4.17.4-pre.patch b/0019-update-Xen-version-to-4.18.3-pre.patch index e1070c9..34f2b33 100644 --- a/0001-update-Xen-version-to-4.17.4-pre.patch +++ b/0019-update-Xen-version-to-4.18.3-pre.patch @@ -1,25 +1,25 @@ -From 4f6e9d4327eb5252f1e8cac97a095d8b8485dadb Mon Sep 17 00:00:00 2001 +From 01f7a3c792241d348a4e454a30afdf6c0d6cd71c Mon Sep 17 00:00:00 2001 From: Jan Beulich <jbeulich@suse.com> -Date: Tue, 30 Jan 2024 14:36:44 +0100 -Subject: [PATCH 01/67] update Xen version to 4.17.4-pre +Date: Tue, 21 May 2024 11:52:11 +0200 +Subject: [PATCH 19/56] update Xen version to 4.18.3-pre --- xen/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xen/Makefile b/xen/Makefile -index a46e6330db..dd0b004e1c 100644 +index 657f6fa4e3..786ab61600 100644 --- a/xen/Makefile +++ b/xen/Makefile @@ -6,7 +6,7 @@ this-makefile := $(call lastword,$(MAKEFILE_LIST)) # All other places this is stored (eg. compile.h) should be autogenerated. export XEN_VERSION = 4 - export XEN_SUBVERSION = 17 --export XEN_EXTRAVERSION ?= .3$(XEN_VENDORVERSION) -+export XEN_EXTRAVERSION ?= .4-pre$(XEN_VENDORVERSION) + export XEN_SUBVERSION = 18 +-export XEN_EXTRAVERSION ?= .2$(XEN_VENDORVERSION) ++export XEN_EXTRAVERSION ?= .3-pre$(XEN_VENDORVERSION) export XEN_FULLVERSION = $(XEN_VERSION).$(XEN_SUBVERSION)$(XEN_EXTRAVERSION) -include xen-version -- -2.44.0 +2.45.2 diff --git a/0019-xen-livepatch-fix-norevert-test-hook-setup-typo.patch b/0019-xen-livepatch-fix-norevert-test-hook-setup-typo.patch deleted file mode 100644 index 1676f7a..0000000 --- a/0019-xen-livepatch-fix-norevert-test-hook-setup-typo.patch +++ /dev/null @@ -1,36 +0,0 @@ -From f6b12792542e372f36a71ea4c2563e6dd6e4fa57 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 27 Feb 2024 14:10:24 +0100 -Subject: [PATCH 19/67] xen/livepatch: fix norevert test hook setup typo -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The test code has a typo in using LIVEPATCH_APPLY_HOOK() instead of -LIVEPATCH_REVERT_HOOK(). - -Fixes: 6047104c3ccc ('livepatch: Add per-function applied/reverted state tracking marker') -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com> -master commit: f0622dd4fd6ae6ddb523a45d89ed9b8f3a9a8f36 -master date: 2024-02-26 10:13:46 +0100 ---- - xen/test/livepatch/xen_action_hooks_norevert.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/test/livepatch/xen_action_hooks_norevert.c b/xen/test/livepatch/xen_action_hooks_norevert.c -index 3e21ade6ab..c173855192 100644 ---- a/xen/test/livepatch/xen_action_hooks_norevert.c -+++ b/xen/test/livepatch/xen_action_hooks_norevert.c -@@ -120,7 +120,7 @@ static void post_revert_hook(livepatch_payload_t *payload) - printk(KERN_DEBUG "%s: Hook done.\n", __func__); - } - --LIVEPATCH_APPLY_HOOK(revert_hook); -+LIVEPATCH_REVERT_HOOK(revert_hook); - - LIVEPATCH_PREAPPLY_HOOK(pre_apply_hook); - LIVEPATCH_POSTAPPLY_HOOK(post_apply_hook); --- -2.44.0 - diff --git a/0020-x86-ucode-Further-fixes-to-identify-ucode-already-up.patch b/0020-x86-ucode-Further-fixes-to-identify-ucode-already-up.patch new file mode 100644 index 0000000..c00dce2 --- /dev/null +++ b/0020-x86-ucode-Further-fixes-to-identify-ucode-already-up.patch @@ -0,0 +1,92 @@ +From cd873f00bedca2f1afeaf13a78f70e719c5b1398 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Wed, 26 Jun 2024 13:36:13 +0200 +Subject: [PATCH 20/56] x86/ucode: Further fixes to identify "ucode already up + to date" +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When the revision in hardware is newer than anything Xen has to hand, +'microcode_cache' isn't set up. Then, `xen-ucode` initiates the update +because it doesn't know whether the revisions across the system are symmetric +or not. This involves the patch getting all the way into the +apply_microcode() hooks before being found to be too old. + +This is all a giant mess and needs an overhaul, but in the short term simply +adjust the apply_microcode() to return -EEXIST. + +Also, unconditionally print the preexisting microcode revision on boot. It's +relevant information which is otherwise unavailable if Xen doesn't find new +microcode to use. + +Fixes: 648db37a155a ("x86/ucode: Distinguish "ucode already up to date"") +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Acked-by: Roger Pau Monné <roger.pau@citrix.com> +master commit: 977d98e67c2e929c62aa1f495fc4c6341c45abb5 +master date: 2024-05-16 13:59:11 +0100 +--- + xen/arch/x86/cpu/microcode/amd.c | 7 +++++-- + xen/arch/x86/cpu/microcode/core.c | 2 ++ + xen/arch/x86/cpu/microcode/intel.c | 7 +++++-- + 3 files changed, 12 insertions(+), 4 deletions(-) + +diff --git a/xen/arch/x86/cpu/microcode/amd.c b/xen/arch/x86/cpu/microcode/amd.c +index 75fc84e445..d8f7646e88 100644 +--- a/xen/arch/x86/cpu/microcode/amd.c ++++ b/xen/arch/x86/cpu/microcode/amd.c +@@ -222,12 +222,15 @@ static int cf_check apply_microcode(const struct microcode_patch *patch) + uint32_t rev, old_rev = sig->rev; + enum microcode_match_result result = microcode_fits(patch); + ++ if ( result == MIS_UCODE ) ++ return -EINVAL; ++ + /* + * Allow application of the same revision to pick up SMT-specific changes + * even if the revision of the other SMT thread is already up-to-date. + */ +- if ( result != NEW_UCODE && result != SAME_UCODE ) +- return -EINVAL; ++ if ( result == OLD_UCODE ) ++ return -EEXIST; + + if ( check_final_patch_levels(sig) ) + { +diff --git a/xen/arch/x86/cpu/microcode/core.c b/xen/arch/x86/cpu/microcode/core.c +index d5338ad345..8a47f4471f 100644 +--- a/xen/arch/x86/cpu/microcode/core.c ++++ b/xen/arch/x86/cpu/microcode/core.c +@@ -887,6 +887,8 @@ int __init early_microcode_init(unsigned long *module_map, + + ucode_ops.collect_cpu_info(); + ++ printk(XENLOG_INFO "BSP microcode revision: 0x%08x\n", this_cpu(cpu_sig).rev); ++ + /* + * Some hypervisors deliberately report a microcode revision of -1 to + * mean that they will not accept microcode updates. +diff --git a/xen/arch/x86/cpu/microcode/intel.c b/xen/arch/x86/cpu/microcode/intel.c +index 060c529a6e..a2d88e3ac0 100644 +--- a/xen/arch/x86/cpu/microcode/intel.c ++++ b/xen/arch/x86/cpu/microcode/intel.c +@@ -294,10 +294,13 @@ static int cf_check apply_microcode(const struct microcode_patch *patch) + + result = microcode_update_match(patch); + +- if ( result != NEW_UCODE && +- !(opt_ucode_allow_same && result == SAME_UCODE) ) ++ if ( result == MIS_UCODE ) + return -EINVAL; + ++ if ( result == OLD_UCODE || ++ (result == SAME_UCODE && !opt_ucode_allow_same) ) ++ return -EEXIST; ++ + wbinvd(); + + wrmsrl(MSR_IA32_UCODE_WRITE, (unsigned long)patch->data); +-- +2.45.2 + diff --git a/0020-xen-cmdline-fix-printf-format-specifier-in-no_config.patch b/0020-xen-cmdline-fix-printf-format-specifier-in-no_config.patch deleted file mode 100644 index b47d9ee..0000000 --- a/0020-xen-cmdline-fix-printf-format-specifier-in-no_config.patch +++ /dev/null @@ -1,38 +0,0 @@ -From 229e8a72ee4cde5698aaf42cc59ae57446dce60f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 27 Feb 2024 14:10:39 +0100 -Subject: [PATCH 20/67] xen/cmdline: fix printf format specifier in - no_config_param() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -'*' sets the width field, which is the minimum number of characters to output, -but what we want in no_config_param() is the precision instead, which is '.*' -as it imposes a maximum limit on the output. - -Fixes: 68d757df8dd2 ('x86/pv: Options to disable and/or compile out 32bit PV support') -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: ef101f525173cf51dc70f4c77862f6f10a8ddccf -master date: 2024-02-26 10:17:40 +0100 ---- - xen/include/xen/param.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/include/xen/param.h b/xen/include/xen/param.h -index 93c3fe7cb7..e02e49635c 100644 ---- a/xen/include/xen/param.h -+++ b/xen/include/xen/param.h -@@ -191,7 +191,7 @@ static inline void no_config_param(const char *cfg, const char *param, - { - int len = e ? ({ ASSERT(e >= s); e - s; }) : strlen(s); - -- printk(XENLOG_INFO "CONFIG_%s disabled - ignoring '%s=%*s' setting\n", -+ printk(XENLOG_INFO "CONFIG_%s disabled - ignoring '%s=%.*s' setting\n", - cfg, param, len, s); - } - --- -2.44.0 - diff --git a/0021-x86-altcall-use-a-union-as-register-type-for-functio.patch b/0021-x86-altcall-use-a-union-as-register-type-for-functio.patch deleted file mode 100644 index ab050dd..0000000 --- a/0021-x86-altcall-use-a-union-as-register-type-for-functio.patch +++ /dev/null @@ -1,141 +0,0 @@ -From 1aafe054e7d1efbf8e8482a9cdd4be5753b79e2f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 27 Feb 2024 14:11:04 +0100 -Subject: [PATCH 21/67] x86/altcall: use a union as register type for function - parameters on clang -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The current code for alternative calls uses the caller parameter types as the -types for the register variables that serve as function parameters: - -uint8_t foo; -[...] -alternative_call(myfunc, foo); - -Would expand roughly into: - -register unint8_t a1_ asm("rdi") = foo; -register unsigned long a2_ asm("rsi"); -[...] -asm volatile ("call *%c[addr](%%rip)"...); - -However with -O2 clang will generate incorrect code, given the following -example: - -unsigned int func(uint8_t t) -{ - return t; -} - -static void bar(uint8_t b) -{ - int ret_; - register uint8_t di asm("rdi") = b; - register unsigned long si asm("rsi"); - register unsigned long dx asm("rdx"); - register unsigned long cx asm("rcx"); - register unsigned long r8 asm("r8"); - register unsigned long r9 asm("r9"); - register unsigned long r10 asm("r10"); - register unsigned long r11 asm("r11"); - - asm volatile ( "call %c[addr]" - : "+r" (di), "=r" (si), "=r" (dx), - "=r" (cx), "=r" (r8), "=r" (r9), - "=r" (r10), "=r" (r11), "=a" (ret_) - : [addr] "i" (&(func)), "g" (func) - : "memory" ); -} - -void foo(unsigned int a) -{ - bar(a); -} - -Clang generates the following assembly code: - -func: # @func - movl %edi, %eax - retq -foo: # @foo - callq func - retq - -Note the truncation of the unsigned int parameter 'a' of foo() to uint8_t when -passed into bar() is lost. clang doesn't zero extend the parameters in the -callee when required, as the psABI mandates. - -The above can be worked around by using a union when defining the register -variables, so that `di` becomes: - -register union { - uint8_t e; - unsigned long r; -} di asm("rdi") = { .e = b }; - -Which results in following code generated for `foo()`: - -foo: # @foo - movzbl %dil, %edi - callq func - retq - -So the truncation is not longer lost. Apply such workaround only when built -with clang. - -Reported-by: Matthew Grooms <mgrooms@shrew.net> -Link: https://bugs.freebsd.org/bugzilla/show_bug.cgi?id=277200 -Link: https://github.com/llvm/llvm-project/issues/12579 -Link: https://github.com/llvm/llvm-project/issues/82598 -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Acked-by: Jan Beulich <jbeulich@suse.com> -master commit: 2ce562b2a413cbdb2e1128989ed1722290a27c4e -master date: 2024-02-26 10:18:01 +0100 ---- - xen/arch/x86/include/asm/alternative.h | 25 +++++++++++++++++++++++++ - 1 file changed, 25 insertions(+) - -diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h -index a7a82c2c03..bcb1dc94f4 100644 ---- a/xen/arch/x86/include/asm/alternative.h -+++ b/xen/arch/x86/include/asm/alternative.h -@@ -167,9 +167,34 @@ extern void alternative_branches(void); - #define ALT_CALL_arg5 "r8" - #define ALT_CALL_arg6 "r9" - -+#ifdef CONFIG_CC_IS_CLANG -+/* -+ * Use a union with an unsigned long in order to prevent clang from -+ * skipping a possible truncation of the value. By using the union any -+ * truncation is carried before the call instruction, in turn covering -+ * for ABI-non-compliance in that the necessary clipping / extension of -+ * the value is supposed to be carried out in the callee. -+ * -+ * Note this behavior is not mandated by the standard, and hence could -+ * stop being a viable workaround, or worse, could cause a different set -+ * of code-generation issues in future clang versions. -+ * -+ * This has been reported upstream: -+ * https://github.com/llvm/llvm-project/issues/12579 -+ * https://github.com/llvm/llvm-project/issues/82598 -+ */ -+#define ALT_CALL_ARG(arg, n) \ -+ register union { \ -+ typeof(arg) e; \ -+ unsigned long r; \ -+ } a ## n ## _ asm ( ALT_CALL_arg ## n ) = { \ -+ .e = ({ BUILD_BUG_ON(sizeof(arg) > sizeof(void *)); (arg); }) \ -+ } -+#else - #define ALT_CALL_ARG(arg, n) \ - register typeof(arg) a ## n ## _ asm ( ALT_CALL_arg ## n ) = \ - ({ BUILD_BUG_ON(sizeof(arg) > sizeof(void *)); (arg); }) -+#endif - #define ALT_CALL_NO_ARG(n) \ - register unsigned long a ## n ## _ asm ( ALT_CALL_arg ## n ) - --- -2.44.0 - diff --git a/0021-x86-msi-prevent-watchdog-triggering-when-dumping-MSI.patch b/0021-x86-msi-prevent-watchdog-triggering-when-dumping-MSI.patch new file mode 100644 index 0000000..8bcc63f --- /dev/null +++ b/0021-x86-msi-prevent-watchdog-triggering-when-dumping-MSI.patch @@ -0,0 +1,44 @@ +From 1ffb29d132600e6a7965c2885505615a6fd6c647 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> +Date: Wed, 26 Jun 2024 13:36:52 +0200 +Subject: [PATCH 21/56] x86/msi: prevent watchdog triggering when dumping MSI + state +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Use the same check that's used in dump_irqs(). + +Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> +Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> +master commit: 594b22ca5be681ec1b42c34f321cc2600d582210 +master date: 2024-05-20 14:29:44 +0100 +--- + xen/arch/x86/msi.c | 4 ++++ + 1 file changed, 4 insertions(+) + +diff --git a/xen/arch/x86/msi.c b/xen/arch/x86/msi.c +index a78367d7cf..3eaeffd1e0 100644 +--- a/xen/arch/x86/msi.c ++++ b/xen/arch/x86/msi.c +@@ -17,6 +17,7 @@ + #include <xen/param.h> + #include <xen/pci.h> + #include <xen/pci_regs.h> ++#include <xen/softirq.h> + #include <xen/iocap.h> + #include <xen/keyhandler.h> + #include <xen/pfn.h> +@@ -1405,6 +1406,9 @@ static void cf_check dump_msi(unsigned char key) + unsigned long flags; + const char *type = "???"; + ++ if ( !(irq & 0x1f) ) ++ process_pending_softirqs(); ++ + if ( !irq_desc_initialized(desc) ) + continue; + +-- +2.45.2 + diff --git a/0022-x86-irq-remove-offline-CPUs-from-old-CPU-mask-when-a.patch b/0022-x86-irq-remove-offline-CPUs-from-old-CPU-mask-when-a.patch new file mode 100644 index 0000000..28fec3e --- /dev/null +++ b/0022-x86-irq-remove-offline-CPUs-from-old-CPU-mask-when-a.patch @@ -0,0 +1,44 @@ +From 52e16bf065cb42b79d14ac74d701d1f9d8506430 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> +Date: Wed, 26 Jun 2024 13:37:20 +0200 +Subject: [PATCH 22/56] x86/irq: remove offline CPUs from old CPU mask when + adjusting move_cleanup_count +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +When adjusting move_cleanup_count to account for CPUs that are offline also +adjust old_cpu_mask, otherwise further calls to fixup_irqs() could subtract +those again and create an imbalance in move_cleanup_count. + +Fixes: 472e0b74c5c4 ('x86/IRQ: deal with move cleanup count state in fixup_irqs()') +Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +master commit: e63209d3ba2fd1b2f232babd14c9c679ffa7b09a +master date: 2024-06-10 10:33:22 +0200 +--- + xen/arch/x86/irq.c | 8 ++++++++ + 1 file changed, 8 insertions(+) + +diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c +index e07006391a..db14df93db 100644 +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -2576,6 +2576,14 @@ void fixup_irqs(const cpumask_t *mask, bool verbose) + desc->arch.move_cleanup_count -= cpumask_weight(affinity); + if ( !desc->arch.move_cleanup_count ) + release_old_vec(desc); ++ else ++ /* ++ * Adjust old_cpu_mask to account for the offline CPUs, ++ * otherwise further calls to fixup_irqs() could subtract those ++ * again and possibly underflow the counter. ++ */ ++ cpumask_andnot(desc->arch.old_cpu_mask, desc->arch.old_cpu_mask, ++ affinity); + } + + if ( !desc->action || cpumask_subset(desc->affinity, mask) ) +-- +2.45.2 + diff --git a/0022-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch b/0022-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch deleted file mode 100644 index ce01c1a..0000000 --- a/0022-x86-spec-fix-BRANCH_HARDEN-option-to-only-be-set-whe.patch +++ /dev/null @@ -1,57 +0,0 @@ -From 91650010815f3da0834bc9781c4359350d1162a5 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 27 Feb 2024 14:11:40 +0100 -Subject: [PATCH 22/67] x86/spec: fix BRANCH_HARDEN option to only be set when - build-enabled -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The current logic to handle the BRANCH_HARDEN option will report it as enabled -even when build-time disabled. Fix this by only allowing the option to be set -when support for it is built into Xen. - -Fixes: 2d6f36daa086 ('x86/nospec: Introduce CONFIG_SPECULATIVE_HARDEN_BRANCH') -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: 60e00f77a5cc671d30c5ef3318f5b8e9b74e4aa3 -master date: 2024-02-26 16:06:42 +0100 ---- - xen/arch/x86/spec_ctrl.c | 14 ++++++++++++-- - 1 file changed, 12 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 56e07d7536..661716d695 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -62,7 +62,8 @@ int8_t __initdata opt_psfd = -1; - int8_t __ro_after_init opt_ibpb_ctxt_switch = -1; - int8_t __read_mostly opt_eager_fpu = -1; - int8_t __read_mostly opt_l1d_flush = -1; --static bool __initdata opt_branch_harden = true; -+static bool __initdata opt_branch_harden = -+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH); - - bool __initdata bsp_delay_spec_ctrl; - uint8_t __read_mostly default_xen_spec_ctrl; -@@ -280,7 +281,16 @@ static int __init cf_check parse_spec_ctrl(const char *s) - else if ( (val = parse_boolean("l1d-flush", s, ss)) >= 0 ) - opt_l1d_flush = val; - else if ( (val = parse_boolean("branch-harden", s, ss)) >= 0 ) -- opt_branch_harden = val; -+ { -+ if ( IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) ) -+ opt_branch_harden = val; -+ else -+ { -+ no_config_param("SPECULATIVE_HARDEN_BRANCH", "spec-ctrl", s, -+ ss); -+ rc = -EINVAL; -+ } -+ } - else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 ) - opt_srb_lock = val; - else if ( (val = parse_boolean("unpriv-mmio", s, ss)) >= 0 ) --- -2.44.0 - diff --git a/0023-CI-Update-FreeBSD-to-13.3.patch b/0023-CI-Update-FreeBSD-to-13.3.patch new file mode 100644 index 0000000..6a6e7ae --- /dev/null +++ b/0023-CI-Update-FreeBSD-to-13.3.patch @@ -0,0 +1,33 @@ +From 80f2d2c2a515a6b9a4ea1b128267c6e1b5085002 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Wed, 26 Jun 2024 13:37:58 +0200 +Subject: [PATCH 23/56] CI: Update FreeBSD to 13.3 +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Acked-by: Roger Pau Monné <roger.pau@citrix.com> +Acked-by: Stefano Stabellini <sstabellini@kernel.org> +master commit: 5ea7f2c9d7a1334b3b2bd5f67fab4d447b60613d +master date: 2024-06-11 17:00:10 +0100 +--- + .cirrus.yml | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/.cirrus.yml b/.cirrus.yml +index 63f3afb104..e961877881 100644 +--- a/.cirrus.yml ++++ b/.cirrus.yml +@@ -17,7 +17,7 @@ freebsd_template: &FREEBSD_TEMPLATE + task: + name: 'FreeBSD 13' + freebsd_instance: +- image_family: freebsd-13-2 ++ image_family: freebsd-13-3 + << : *FREEBSD_TEMPLATE + + task: +-- +2.45.2 + diff --git a/0023-x86-account-for-shadow-stack-in-exception-from-stub-.patch b/0023-x86-account-for-shadow-stack-in-exception-from-stub-.patch deleted file mode 100644 index e23a764..0000000 --- a/0023-x86-account-for-shadow-stack-in-exception-from-stub-.patch +++ /dev/null @@ -1,212 +0,0 @@ -From 49f77602373b58b7bbdb40cea2b49d2f88d4003d Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Tue, 27 Feb 2024 14:12:11 +0100 -Subject: [PATCH 23/67] x86: account for shadow stack in exception-from-stub - recovery - -Dealing with exceptions raised from within emulation stubs involves -discarding return address (replaced by exception related information). -Such discarding of course also requires removing the corresponding entry -from the shadow stack. - -Also amend the comment in fixup_exception_return(), to further clarify -why use of ptr[1] can't be an out-of-bounds access. - -This is CVE-2023-46841 / XSA-451. - -Fixes: 209fb9919b50 ("x86/extable: Adjust extable handling to be shadow stack compatible") -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> -master commit: 91f5f7a9154919a765c3933521760acffeddbf28 -master date: 2024-02-27 13:49:22 +0100 ---- - xen/arch/x86/extable.c | 20 ++++++---- - xen/arch/x86/include/asm/uaccess.h | 3 +- - xen/arch/x86/traps.c | 63 +++++++++++++++++++++++++++--- - 3 files changed, 71 insertions(+), 15 deletions(-) - -diff --git a/xen/arch/x86/extable.c b/xen/arch/x86/extable.c -index 6758ba1dca..dd9583f2a5 100644 ---- a/xen/arch/x86/extable.c -+++ b/xen/arch/x86/extable.c -@@ -86,26 +86,29 @@ search_one_extable(const struct exception_table_entry *first, - } - - unsigned long --search_exception_table(const struct cpu_user_regs *regs) -+search_exception_table(const struct cpu_user_regs *regs, unsigned long *stub_ra) - { - const struct virtual_region *region = find_text_region(regs->rip); - unsigned long stub = this_cpu(stubs.addr); - - if ( region && region->ex ) -+ { -+ *stub_ra = 0; - return search_one_extable(region->ex, region->ex_end, regs->rip); -+ } - - if ( regs->rip >= stub + STUB_BUF_SIZE / 2 && - regs->rip < stub + STUB_BUF_SIZE && - regs->rsp > (unsigned long)regs && - regs->rsp < (unsigned long)get_cpu_info() ) - { -- unsigned long retptr = *(unsigned long *)regs->rsp; -+ unsigned long retaddr = *(unsigned long *)regs->rsp, fixup; - -- region = find_text_region(retptr); -- retptr = region && region->ex -- ? search_one_extable(region->ex, region->ex_end, retptr) -- : 0; -- if ( retptr ) -+ region = find_text_region(retaddr); -+ fixup = region && region->ex -+ ? search_one_extable(region->ex, region->ex_end, retaddr) -+ : 0; -+ if ( fixup ) - { - /* - * Put trap number and error code on the stack (in place of the -@@ -117,7 +120,8 @@ search_exception_table(const struct cpu_user_regs *regs) - }; - - *(unsigned long *)regs->rsp = token.raw; -- return retptr; -+ *stub_ra = retaddr; -+ return fixup; - } - } - -diff --git a/xen/arch/x86/include/asm/uaccess.h b/xen/arch/x86/include/asm/uaccess.h -index 684fccd95c..74bb222c03 100644 ---- a/xen/arch/x86/include/asm/uaccess.h -+++ b/xen/arch/x86/include/asm/uaccess.h -@@ -421,7 +421,8 @@ union stub_exception_token { - unsigned long raw; - }; - --extern unsigned long search_exception_table(const struct cpu_user_regs *regs); -+extern unsigned long search_exception_table(const struct cpu_user_regs *regs, -+ unsigned long *stub_ra); - extern void sort_exception_tables(void); - extern void sort_exception_table(struct exception_table_entry *start, - const struct exception_table_entry *stop); -diff --git a/xen/arch/x86/traps.c b/xen/arch/x86/traps.c -index 06c4f3868b..7599bee361 100644 ---- a/xen/arch/x86/traps.c -+++ b/xen/arch/x86/traps.c -@@ -856,7 +856,7 @@ void do_unhandled_trap(struct cpu_user_regs *regs) - } - - static void fixup_exception_return(struct cpu_user_regs *regs, -- unsigned long fixup) -+ unsigned long fixup, unsigned long stub_ra) - { - if ( IS_ENABLED(CONFIG_XEN_SHSTK) ) - { -@@ -873,7 +873,8 @@ static void fixup_exception_return(struct cpu_user_regs *regs, - /* - * Search for %rip. The shstk currently looks like this: - * -- * ... [Likely pointed to by SSP] -+ * tok [Supervisor token, == &tok | BUSY, only with FRED inactive] -+ * ... [Pointed to by SSP for most exceptions, empty in IST cases] - * %cs [== regs->cs] - * %rip [== regs->rip] - * SSP [Likely points to 3 slots higher, above %cs] -@@ -891,7 +892,56 @@ static void fixup_exception_return(struct cpu_user_regs *regs, - */ - if ( ptr[0] == regs->rip && ptr[1] == regs->cs ) - { -+ unsigned long primary_shstk = -+ (ssp & ~(STACK_SIZE - 1)) + -+ (PRIMARY_SHSTK_SLOT + 1) * PAGE_SIZE - 8; -+ - wrss(fixup, ptr); -+ -+ if ( !stub_ra ) -+ goto shstk_done; -+ -+ /* -+ * Stub recovery ought to happen only when the outer context -+ * was on the main shadow stack. We need to also "pop" the -+ * stub's return address from the interrupted context's shadow -+ * stack. That is, -+ * - if we're still on the main stack, we need to move the -+ * entire stack (up to and including the exception frame) -+ * up by one slot, incrementing the original SSP in the -+ * exception frame, -+ * - if we're on an IST stack, we need to increment the -+ * original SSP. -+ */ -+ BUG_ON((ptr[-1] ^ primary_shstk) >> PAGE_SHIFT); -+ -+ if ( (ssp ^ primary_shstk) >> PAGE_SHIFT ) -+ { -+ /* -+ * We're on an IST stack. First make sure the two return -+ * addresses actually match. Then increment the interrupted -+ * context's SSP. -+ */ -+ BUG_ON(stub_ra != *(unsigned long*)ptr[-1]); -+ wrss(ptr[-1] + 8, &ptr[-1]); -+ goto shstk_done; -+ } -+ -+ /* Make sure the two return addresses actually match. */ -+ BUG_ON(stub_ra != ptr[2]); -+ -+ /* Move exception frame, updating SSP there. */ -+ wrss(ptr[1], &ptr[2]); /* %cs */ -+ wrss(ptr[0], &ptr[1]); /* %rip */ -+ wrss(ptr[-1] + 8, &ptr[0]); /* SSP */ -+ -+ /* Move all newer entries. */ -+ while ( --ptr != _p(ssp) ) -+ wrss(ptr[-1], &ptr[0]); -+ -+ /* Finally account for our own stack having shifted up. */ -+ asm volatile ( "incsspd %0" :: "r" (2) ); -+ - goto shstk_done; - } - } -@@ -912,7 +962,8 @@ static void fixup_exception_return(struct cpu_user_regs *regs, - - static bool extable_fixup(struct cpu_user_regs *regs, bool print) - { -- unsigned long fixup = search_exception_table(regs); -+ unsigned long stub_ra = 0; -+ unsigned long fixup = search_exception_table(regs, &stub_ra); - - if ( unlikely(fixup == 0) ) - return false; -@@ -926,7 +977,7 @@ static bool extable_fixup(struct cpu_user_regs *regs, bool print) - vector_name(regs->entry_vector), regs->error_code, - _p(regs->rip), _p(regs->rip), _p(fixup)); - -- fixup_exception_return(regs, fixup); -+ fixup_exception_return(regs, fixup, stub_ra); - this_cpu(last_extable_addr) = regs->rip; - - return true; -@@ -1214,7 +1265,7 @@ void do_invalid_op(struct cpu_user_regs *regs) - void (*fn)(struct cpu_user_regs *) = bug_ptr(bug); - - fn(regs); -- fixup_exception_return(regs, (unsigned long)eip); -+ fixup_exception_return(regs, (unsigned long)eip, 0); - return; - } - -@@ -1235,7 +1286,7 @@ void do_invalid_op(struct cpu_user_regs *regs) - case BUGFRAME_warn: - printk("Xen WARN at %s%s:%d\n", prefix, filename, lineno); - show_execution_state(regs); -- fixup_exception_return(regs, (unsigned long)eip); -+ fixup_exception_return(regs, (unsigned long)eip, 0); - return; - - case BUGFRAME_bug: --- -2.44.0 - diff --git a/0024-x86-smp-do-not-use-shorthand-IPI-destinations-in-CPU.patch b/0024-x86-smp-do-not-use-shorthand-IPI-destinations-in-CPU.patch new file mode 100644 index 0000000..b69c88c --- /dev/null +++ b/0024-x86-smp-do-not-use-shorthand-IPI-destinations-in-CPU.patch @@ -0,0 +1,98 @@ +From 98238d49ecb149a5ac07cb8032817904c404ac2b Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> +Date: Wed, 26 Jun 2024 13:38:36 +0200 +Subject: [PATCH 24/56] x86/smp: do not use shorthand IPI destinations in CPU + hot{,un}plug contexts +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Due to the current rwlock logic, if the CPU calling get_cpu_maps() does +so from a cpu_hotplug_{begin,done}() region the function will still +return success, because a CPU taking the rwlock in read mode after +having taken it in write mode is allowed. Such corner case makes using +get_cpu_maps() alone not enough to prevent using the shorthand in CPU +hotplug regions. + +Introduce a new helper to detect whether the current caller is between a +cpu_hotplug_{begin,done}() region and use it in send_IPI_mask() to restrict +shorthand usage. + +Fixes: 5500d265a2a8 ('x86/smp: use APIC ALLBUT destination shorthand when possible') +Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +master commit: 171c52fba5d94e050d704770480dcb983490d0ad +master date: 2024-06-12 14:29:31 +0200 +--- + xen/arch/x86/smp.c | 2 +- + xen/common/cpu.c | 5 +++++ + xen/include/xen/cpu.h | 10 ++++++++++ + xen/include/xen/rwlock.h | 2 ++ + 4 files changed, 18 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/smp.c b/xen/arch/x86/smp.c +index 3a331cbdbc..340fcafb46 100644 +--- a/xen/arch/x86/smp.c ++++ b/xen/arch/x86/smp.c +@@ -88,7 +88,7 @@ void send_IPI_mask(const cpumask_t *mask, int vector) + * the system have been accounted for. + */ + if ( system_state > SYS_STATE_smp_boot && +- !unaccounted_cpus && !disabled_cpus && ++ !unaccounted_cpus && !disabled_cpus && !cpu_in_hotplug_context() && + /* NB: get_cpu_maps lock requires enabled interrupts. */ + local_irq_is_enabled() && (cpus_locked = get_cpu_maps()) && + (park_offline_cpus || +diff --git a/xen/common/cpu.c b/xen/common/cpu.c +index 8709db4d29..6e35b114c0 100644 +--- a/xen/common/cpu.c ++++ b/xen/common/cpu.c +@@ -68,6 +68,11 @@ void cpu_hotplug_done(void) + write_unlock(&cpu_add_remove_lock); + } + ++bool cpu_in_hotplug_context(void) ++{ ++ return rw_is_write_locked_by_me(&cpu_add_remove_lock); ++} ++ + static NOTIFIER_HEAD(cpu_chain); + + void __init register_cpu_notifier(struct notifier_block *nb) +diff --git a/xen/include/xen/cpu.h b/xen/include/xen/cpu.h +index e1d4eb5967..6bf5786750 100644 +--- a/xen/include/xen/cpu.h ++++ b/xen/include/xen/cpu.h +@@ -13,6 +13,16 @@ void put_cpu_maps(void); + void cpu_hotplug_begin(void); + void cpu_hotplug_done(void); + ++/* ++ * Returns true when the caller CPU is between a cpu_hotplug_{begin,done}() ++ * region. ++ * ++ * This is required to safely identify hotplug contexts, as get_cpu_maps() ++ * would otherwise succeed because a caller holding the lock in write mode is ++ * allowed to acquire the same lock in read mode. ++ */ ++bool cpu_in_hotplug_context(void); ++ + /* Receive notification of CPU hotplug events. */ + void register_cpu_notifier(struct notifier_block *nb); + +diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h +index 9e35ee2edf..dc74d1c057 100644 +--- a/xen/include/xen/rwlock.h ++++ b/xen/include/xen/rwlock.h +@@ -309,6 +309,8 @@ static always_inline void write_lock_irq(rwlock_t *l) + + #define rw_is_locked(l) _rw_is_locked(l) + #define rw_is_write_locked(l) _rw_is_write_locked(l) ++#define rw_is_write_locked_by_me(l) \ ++ lock_evaluate_nospec(_is_write_locked_by_me(atomic_read(&(l)->cnts))) + + + typedef struct percpu_rwlock percpu_rwlock_t; +-- +2.45.2 + diff --git a/0024-xen-arm-Fix-UBSAN-failure-in-start_xen.patch b/0024-xen-arm-Fix-UBSAN-failure-in-start_xen.patch deleted file mode 100644 index 7bdd651..0000000 --- a/0024-xen-arm-Fix-UBSAN-failure-in-start_xen.patch +++ /dev/null @@ -1,52 +0,0 @@ -From 6cbccc4071ef49a8c591ecaddfdcb1cc26d28411 Mon Sep 17 00:00:00 2001 -From: Michal Orzel <michal.orzel@amd.com> -Date: Thu, 8 Feb 2024 11:43:39 +0100 -Subject: [PATCH 24/67] xen/arm: Fix UBSAN failure in start_xen() - -When running Xen on arm32, in scenario where Xen is loaded at an address -such as boot_phys_offset >= 2GB, UBSAN reports the following: - -(XEN) UBSAN: Undefined behaviour in arch/arm/setup.c:739:58 -(XEN) pointer operation underflowed 00200000 to 86800000 -(XEN) Xen WARN at common/ubsan/ubsan.c:172 -(XEN) ----[ Xen-4.19-unstable arm32 debug=y ubsan=y Not tainted ]---- -... -(XEN) Xen call trace: -(XEN) [<0031b4c0>] ubsan.c#ubsan_epilogue+0x18/0xf0 (PC) -(XEN) [<0031d134>] __ubsan_handle_pointer_overflow+0xb8/0xd4 (LR) -(XEN) [<0031d134>] __ubsan_handle_pointer_overflow+0xb8/0xd4 -(XEN) [<004d15a8>] start_xen+0xe0/0xbe0 -(XEN) [<0020007c>] head.o#primary_switched+0x4/0x30 - -The failure is reported for the following line: -(paddr_t)(uintptr_t)(_start + boot_phys_offset) - -This occurs because the compiler treats (ptr + size) with size bigger than -PTRDIFF_MAX as undefined behavior. To address this, switch to macro -virt_to_maddr(), given the future plans to eliminate boot_phys_offset. - -Signed-off-by: Michal Orzel <michal.orzel@amd.com> -Reviewed-by: Luca Fancellu <luca.fancellu@arm.com> -Tested-by: Luca Fancellu <luca.fancellu@arm.com> -Acked-by: Julien Grall <jgrall@amazon.com> -(cherry picked from commit e11f5766503c0ff074b4e0f888bbfc931518a169) ---- - xen/arch/arm/setup.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/arch/arm/setup.c b/xen/arch/arm/setup.c -index 4395640019..9ee19c2bc1 100644 ---- a/xen/arch/arm/setup.c -+++ b/xen/arch/arm/setup.c -@@ -1025,7 +1025,7 @@ void __init start_xen(unsigned long boot_phys_offset, - - /* Register Xen's load address as a boot module. */ - xen_bootmodule = add_boot_module(BOOTMOD_XEN, -- (paddr_t)(uintptr_t)(_start + boot_phys_offset), -+ virt_to_maddr(_start), - (paddr_t)(uintptr_t)(_end - _start), false); - BUG_ON(!xen_bootmodule); - --- -2.44.0 - diff --git a/0025-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch b/0025-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch deleted file mode 100644 index 28e489b..0000000 --- a/0025-x86-HVM-hide-SVM-VMX-when-their-enabling-is-prohibit.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 9c0d518eb8dc69430e6a8d767bd101dad19b846a Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Tue, 5 Mar 2024 11:56:31 +0100 -Subject: [PATCH 25/67] x86/HVM: hide SVM/VMX when their enabling is prohibited - by firmware -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -... or we fail to enable the functionality on the BSP for other reasons. -The only place where hardware announcing the feature is recorded is the -raw CPU policy/featureset. - -Inspired by https://lore.kernel.org/all/20230921114940.957141-1-pbonzini@redhat.com/. - -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Acked-by: Roger Pau Monné <roger.pau@citrix.com> -master commit: 0b5f149338e35a795bf609ce584640b0977f9e6c -master date: 2024-01-09 14:06:34 +0100 ---- - xen/arch/x86/hvm/svm/svm.c | 1 + - xen/arch/x86/hvm/vmx/vmcs.c | 17 +++++++++++++++++ - 2 files changed, 18 insertions(+) - -diff --git a/xen/arch/x86/hvm/svm/svm.c b/xen/arch/x86/hvm/svm/svm.c -index fd32600ae3..3c17464550 100644 ---- a/xen/arch/x86/hvm/svm/svm.c -+++ b/xen/arch/x86/hvm/svm/svm.c -@@ -1669,6 +1669,7 @@ const struct hvm_function_table * __init start_svm(void) - - if ( _svm_cpu_up(true) ) - { -+ setup_clear_cpu_cap(X86_FEATURE_SVM); - printk("SVM: failed to initialise.\n"); - return NULL; - } -diff --git a/xen/arch/x86/hvm/vmx/vmcs.c b/xen/arch/x86/hvm/vmx/vmcs.c -index bcbecc6945..b5ecc51b43 100644 ---- a/xen/arch/x86/hvm/vmx/vmcs.c -+++ b/xen/arch/x86/hvm/vmx/vmcs.c -@@ -2163,6 +2163,23 @@ int __init vmx_vmcs_init(void) - - if ( !ret ) - register_keyhandler('v', vmcs_dump, "dump VT-x VMCSs", 1); -+ else -+ { -+ setup_clear_cpu_cap(X86_FEATURE_VMX); -+ -+ /* -+ * _vmx_vcpu_up() may have made it past feature identification. -+ * Make sure all dependent features are off as well. -+ */ -+ vmx_basic_msr = 0; -+ vmx_pin_based_exec_control = 0; -+ vmx_cpu_based_exec_control = 0; -+ vmx_secondary_exec_control = 0; -+ vmx_vmexit_control = 0; -+ vmx_vmentry_control = 0; -+ vmx_ept_vpid_cap = 0; -+ vmx_vmfunc = 0; -+ } - - return ret; - } --- -2.44.0 - diff --git a/0025-x86-irq-limit-interrupt-movement-done-by-fixup_irqs.patch b/0025-x86-irq-limit-interrupt-movement-done-by-fixup_irqs.patch new file mode 100644 index 0000000..7c40bba --- /dev/null +++ b/0025-x86-irq-limit-interrupt-movement-done-by-fixup_irqs.patch @@ -0,0 +1,104 @@ +From ce0a0cb0a74a909abf988f242aa228acdd2917fe Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> +Date: Wed, 26 Jun 2024 13:39:11 +0200 +Subject: [PATCH 25/56] x86/irq: limit interrupt movement done by fixup_irqs() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +The current check used in fixup_irqs() to decide whether to move around +interrupts is based on the affinity mask, but such mask can have all bits set, +and hence is unlikely to be a subset of the input mask. For example if an +interrupt has an affinity mask of all 1s, any input to fixup_irqs() that's not +an all set CPU mask would cause that interrupt to be shuffled around +unconditionally. + +What fixup_irqs() care about is evacuating interrupts from CPUs not set on the +input CPU mask, and for that purpose it should check whether the interrupt is +assigned to a CPU not present in the input mask. Assume that ->arch.cpu_mask +is a subset of the ->affinity mask, and keep the current logic that resets the +->affinity mask if the interrupt has to be shuffled around. + +Doing the affinity movement based on ->arch.cpu_mask requires removing the +special handling to ->arch.cpu_mask done for high priority vectors, otherwise +the adjustment done to cpu_mask makes them always skip the CPU interrupt +movement. + +While there also adjust the comment as to the purpose of fixup_irqs(). + +Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +master commit: c7564d7366d865cc407e3d64bca816d07edee174 +master date: 2024-06-12 14:30:40 +0200 +--- + xen/arch/x86/include/asm/irq.h | 2 +- + xen/arch/x86/irq.c | 21 +++++++++++---------- + 2 files changed, 12 insertions(+), 11 deletions(-) + +diff --git a/xen/arch/x86/include/asm/irq.h b/xen/arch/x86/include/asm/irq.h +index d7fb8ec7e8..71d4a8fc56 100644 +--- a/xen/arch/x86/include/asm/irq.h ++++ b/xen/arch/x86/include/asm/irq.h +@@ -132,7 +132,7 @@ void free_domain_pirqs(struct domain *d); + int map_domain_emuirq_pirq(struct domain *d, int pirq, int emuirq); + int unmap_domain_pirq_emuirq(struct domain *d, int pirq); + +-/* Reset irq affinities to match the given CPU mask. */ ++/* Evacuate interrupts assigned to CPUs not present in the input CPU mask. */ + void fixup_irqs(const cpumask_t *mask, bool verbose); + void fixup_eoi(void); + +diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c +index db14df93db..566331bec1 100644 +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -2529,7 +2529,7 @@ static int __init cf_check setup_dump_irqs(void) + } + __initcall(setup_dump_irqs); + +-/* Reset irq affinities to match the given CPU mask. */ ++/* Evacuate interrupts assigned to CPUs not present in the input CPU mask. */ + void fixup_irqs(const cpumask_t *mask, bool verbose) + { + unsigned int irq; +@@ -2553,19 +2553,15 @@ void fixup_irqs(const cpumask_t *mask, bool verbose) + + vector = irq_to_vector(irq); + if ( vector >= FIRST_HIPRIORITY_VECTOR && +- vector <= LAST_HIPRIORITY_VECTOR ) ++ vector <= LAST_HIPRIORITY_VECTOR && ++ desc->handler == &no_irq_type ) + { +- cpumask_and(desc->arch.cpu_mask, desc->arch.cpu_mask, mask); +- + /* + * This can in particular happen when parking secondary threads + * during boot and when the serial console wants to use a PCI IRQ. + */ +- if ( desc->handler == &no_irq_type ) +- { +- spin_unlock(&desc->lock); +- continue; +- } ++ spin_unlock(&desc->lock); ++ continue; + } + + if ( desc->arch.move_cleanup_count ) +@@ -2586,7 +2582,12 @@ void fixup_irqs(const cpumask_t *mask, bool verbose) + affinity); + } + +- if ( !desc->action || cpumask_subset(desc->affinity, mask) ) ++ /* ++ * Avoid shuffling the interrupt around as long as current target CPUs ++ * are a subset of the input mask. What fixup_irqs() cares about is ++ * evacuating interrupts from CPUs not in the input mask. ++ */ ++ if ( !desc->action || cpumask_subset(desc->arch.cpu_mask, mask) ) + { + spin_unlock(&desc->lock); + continue; +-- +2.45.2 + diff --git a/0026-x86-EPT-correct-special-page-checking-in-epte_get_en.patch b/0026-x86-EPT-correct-special-page-checking-in-epte_get_en.patch new file mode 100644 index 0000000..c94728a --- /dev/null +++ b/0026-x86-EPT-correct-special-page-checking-in-epte_get_en.patch @@ -0,0 +1,46 @@ +From 6e647efaf2b02ce92bcf80bec47c18cca5084f8a Mon Sep 17 00:00:00 2001 +From: Jan Beulich <jbeulich@suse.com> +Date: Wed, 26 Jun 2024 13:39:44 +0200 +Subject: [PATCH 26/56] x86/EPT: correct special page checking in + epte_get_entry_emt() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +mfn_valid() granularity is (currently) 256Mb. Therefore the start of a +1Gb page passing the test doesn't necessarily mean all parts of such a +range would also pass. Yet using the result of mfn_to_page() on an MFN +which doesn't pass mfn_valid() checking is liable to result in a crash +(the invocation of mfn_to_page() alone is presumably "just" UB in such a +case). + +Fixes: ca24b2ffdbd9 ("x86/hvm: set 'ipat' in EPT for special pages") +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> +master commit: 5540b94e8191059eb9cbbe98ac316232a42208f6 +master date: 2024-06-13 16:53:34 +0200 +--- + xen/arch/x86/mm/p2m-ept.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c +index 85c4e8e54f..1aa6bbc771 100644 +--- a/xen/arch/x86/mm/p2m-ept.c ++++ b/xen/arch/x86/mm/p2m-ept.c +@@ -518,8 +518,12 @@ int epte_get_entry_emt(struct domain *d, gfn_t gfn, mfn_t mfn, + } + + for ( special_pgs = i = 0; i < (1ul << order); i++ ) +- if ( is_special_page(mfn_to_page(mfn_add(mfn, i))) ) ++ { ++ mfn_t cur = mfn_add(mfn, i); ++ ++ if ( mfn_valid(cur) && is_special_page(mfn_to_page(cur)) ) + special_pgs++; ++ } + + if ( special_pgs ) + { +-- +2.45.2 + diff --git a/0026-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch b/0026-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch deleted file mode 100644 index 4b051ea..0000000 --- a/0026-xen-sched-Fix-UB-shift-in-compat_set_timer_op.patch +++ /dev/null @@ -1,86 +0,0 @@ -From b75bee183210318150e678e14b35224d7c73edb6 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 5 Mar 2024 11:57:02 +0100 -Subject: [PATCH 26/67] xen/sched: Fix UB shift in compat_set_timer_op() - -Tamas reported this UBSAN failure from fuzzing: - - (XEN) ================================================================================ - (XEN) UBSAN: Undefined behaviour in common/sched/compat.c:48:37 - (XEN) left shift of negative value -2147425536 - (XEN) ----[ Xen-4.19-unstable x86_64 debug=y ubsan=y Not tainted ]---- - ... - (XEN) Xen call trace: - (XEN) [<ffff82d040307c1c>] R ubsan.c#ubsan_epilogue+0xa/0xd9 - (XEN) [<ffff82d040308afb>] F __ubsan_handle_shift_out_of_bounds+0x11a/0x1c5 - (XEN) [<ffff82d040307758>] F compat_set_timer_op+0x41/0x43 - (XEN) [<ffff82d04040e4cc>] F hvm_do_multicall_call+0x77f/0xa75 - (XEN) [<ffff82d040519462>] F arch_do_multicall_call+0xec/0xf1 - (XEN) [<ffff82d040261567>] F do_multicall+0x1dc/0xde3 - (XEN) [<ffff82d04040d2b3>] F hvm_hypercall+0xa00/0x149a - (XEN) [<ffff82d0403cd072>] F vmx_vmexit_handler+0x1596/0x279c - (XEN) [<ffff82d0403d909b>] F vmx_asm_vmexit_handler+0xdb/0x200 - -Left-shifting any negative value is strictly undefined behaviour in C, and -the two parameters here come straight from the guest. - -The fuzzer happened to choose lo 0xf, hi 0x8000e300. - -Switch everything to be unsigned values, making the shift well defined. - -As GCC documents: - - As an extension to the C language, GCC does not use the latitude given in - C99 and C11 only to treat certain aspects of signed '<<' as undefined. - However, -fsanitize=shift (and -fsanitize=undefined) will diagnose such - cases. - -this was deemed not to need an XSA. - -Note: The unsigned -> signed conversion for do_set_timer_op()'s s_time_t -parameter is also well defined. C makes it implementation defined, and GCC -defines it as reduction modulo 2^N to be within range of the new type. - -Fixes: 2942f45e09fb ("Enable compatibility mode operation for HYPERVISOR_sched_op and HYPERVISOR_set_timer_op.") -Reported-by: Tamas K Lengyel <tamas@tklengyel.com> -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: ae6d4fd876765e6d623eec67d14f5d0464be09cb -master date: 2024-02-01 19:52:44 +0000 ---- - xen/common/sched/compat.c | 4 ++-- - xen/include/hypercall-defs.c | 2 +- - 2 files changed, 3 insertions(+), 3 deletions(-) - -diff --git a/xen/common/sched/compat.c b/xen/common/sched/compat.c -index 040b4caca2..b827fdecb8 100644 ---- a/xen/common/sched/compat.c -+++ b/xen/common/sched/compat.c -@@ -39,9 +39,9 @@ static int compat_poll(struct compat_sched_poll *compat) - - #include "core.c" - --int compat_set_timer_op(u32 lo, s32 hi) -+int compat_set_timer_op(uint32_t lo, uint32_t hi) - { -- return do_set_timer_op(((s64)hi << 32) | lo); -+ return do_set_timer_op(((uint64_t)hi << 32) | lo); - } - - /* -diff --git a/xen/include/hypercall-defs.c b/xen/include/hypercall-defs.c -index 1896121074..c442dee284 100644 ---- a/xen/include/hypercall-defs.c -+++ b/xen/include/hypercall-defs.c -@@ -127,7 +127,7 @@ xenoprof_op(int op, void *arg) - - #ifdef CONFIG_COMPAT - prefix: compat --set_timer_op(uint32_t lo, int32_t hi) -+set_timer_op(uint32_t lo, uint32_t hi) - multicall(multicall_entry_compat_t *call_list, uint32_t nr_calls) - memory_op(unsigned int cmd, void *arg) - #ifdef CONFIG_IOREQ_SERVER --- -2.44.0 - diff --git a/0027-x86-EPT-avoid-marking-non-present-entries-for-re-con.patch b/0027-x86-EPT-avoid-marking-non-present-entries-for-re-con.patch new file mode 100644 index 0000000..23e8946 --- /dev/null +++ b/0027-x86-EPT-avoid-marking-non-present-entries-for-re-con.patch @@ -0,0 +1,85 @@ +From d31385be5c8e8bc5efb6f8848057bd0c69e8274a Mon Sep 17 00:00:00 2001 +From: Jan Beulich <jbeulich@suse.com> +Date: Wed, 26 Jun 2024 13:40:11 +0200 +Subject: [PATCH 27/56] x86/EPT: avoid marking non-present entries for + re-configuring +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +For non-present entries EMT, like most other fields, is meaningless to +hardware. Make the logic in ept_set_entry() setting the field (and iPAT) +conditional upon dealing with a present entry, leaving the value at 0 +otherwise. This has two effects for epte_get_entry_emt() which we'll +want to leverage subsequently: +1) The call moved here now won't be issued with INVALID_MFN anymore (a + respective BUG_ON() is being added). +2) Neither of the other two calls could now be issued with a truncated + form of INVALID_MFN anymore (as long as there's no bug anywhere + marking an entry present when that was populated using INVALID_MFN). + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> +master commit: 777c71d31325bc55ba1cc3f317d4155fe519ab0b +master date: 2024-06-13 16:54:17 +0200 +--- + xen/arch/x86/mm/p2m-ept.c | 29 ++++++++++++++++++----------- + 1 file changed, 18 insertions(+), 11 deletions(-) + +diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c +index 1aa6bbc771..641d61b350 100644 +--- a/xen/arch/x86/mm/p2m-ept.c ++++ b/xen/arch/x86/mm/p2m-ept.c +@@ -649,6 +649,8 @@ static int cf_check resolve_misconfig(struct p2m_domain *p2m, unsigned long gfn) + if ( e.emt != MTRR_NUM_TYPES ) + break; + ++ ASSERT(is_epte_present(&e)); ++ + if ( level == 0 ) + { + for ( gfn -= i, i = 0; i < EPT_PAGETABLE_ENTRIES; ++i ) +@@ -914,17 +916,6 @@ ept_set_entry(struct p2m_domain *p2m, gfn_t gfn_, mfn_t mfn, + + if ( mfn_valid(mfn) || p2m_allows_invalid_mfn(p2mt) ) + { +- bool ipat; +- int emt = epte_get_entry_emt(p2m->domain, _gfn(gfn), mfn, +- i * EPT_TABLE_ORDER, &ipat, +- p2mt); +- +- if ( emt >= 0 ) +- new_entry.emt = emt; +- else /* ept_handle_misconfig() will need to take care of this. */ +- new_entry.emt = MTRR_NUM_TYPES; +- +- new_entry.ipat = ipat; + new_entry.sp = !!i; + new_entry.sa_p2mt = p2mt; + new_entry.access = p2ma; +@@ -940,6 +931,22 @@ ept_set_entry(struct p2m_domain *p2m, gfn_t gfn_, mfn_t mfn, + need_modify_vtd_table = 0; + + ept_p2m_type_to_flags(p2m, &new_entry); ++ ++ if ( is_epte_present(&new_entry) ) ++ { ++ bool ipat; ++ int emt = epte_get_entry_emt(p2m->domain, _gfn(gfn), mfn, ++ i * EPT_TABLE_ORDER, &ipat, ++ p2mt); ++ ++ BUG_ON(mfn_eq(mfn, INVALID_MFN)); ++ ++ if ( emt >= 0 ) ++ new_entry.emt = emt; ++ else /* ept_handle_misconfig() will need to take care of this. */ ++ new_entry.emt = MTRR_NUM_TYPES; ++ new_entry.ipat = ipat; ++ } + } + + if ( sve != -1 ) +-- +2.45.2 + diff --git a/0027-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch b/0027-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch deleted file mode 100644 index 845247a..0000000 --- a/0027-x86-spec-print-the-built-in-SPECULATIVE_HARDEN_-opti.patch +++ /dev/null @@ -1,54 +0,0 @@ -From 76ea2aab3652cc34e474de0905f0a9cd4df7d087 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 5 Mar 2024 11:57:41 +0100 -Subject: [PATCH 27/67] x86/spec: print the built-in SPECULATIVE_HARDEN_* - options -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Just like it's done for INDIRECT_THUNK and SHADOW_PAGING. - -Reported-by: Jan Beulich <jbeulich@suse.com> -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: 6e9507f7d51fe49df8bc70f83e49ce06c92e4e54 -master date: 2024-02-27 14:57:52 +0100 ---- - xen/arch/x86/spec_ctrl.c | 14 +++++++++++++- - 1 file changed, 13 insertions(+), 1 deletion(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 661716d695..93f1cf3bb5 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -488,13 +488,25 @@ static void __init print_details(enum ind_thunk thunk) - (e21a & cpufeat_mask(X86_FEATURE_SBPB)) ? " SBPB" : ""); - - /* Compiled-in support which pertains to mitigations. */ -- if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) ) -+ if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) || -+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_ARRAY) || -+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) || -+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS) ) - printk(" Compiled-in support:" - #ifdef CONFIG_INDIRECT_THUNK - " INDIRECT_THUNK" - #endif - #ifdef CONFIG_SHADOW_PAGING - " SHADOW_PAGING" -+#endif -+#ifdef CONFIG_SPECULATIVE_HARDEN_ARRAY -+ " HARDEN_ARRAY" -+#endif -+#ifdef CONFIG_SPECULATIVE_HARDEN_BRANCH -+ " HARDEN_BRANCH" -+#endif -+#ifdef CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS -+ " HARDEN_GUEST_ACCESS" - #endif - "\n"); - --- -2.44.0 - diff --git a/0028-x86-EPT-drop-questionable-mfn_valid-from-epte_get_en.patch b/0028-x86-EPT-drop-questionable-mfn_valid-from-epte_get_en.patch new file mode 100644 index 0000000..ee495d4 --- /dev/null +++ b/0028-x86-EPT-drop-questionable-mfn_valid-from-epte_get_en.patch @@ -0,0 +1,47 @@ +From 3b777c2ce4ea8cf67b79a5496e51201145606798 Mon Sep 17 00:00:00 2001 +From: Jan Beulich <jbeulich@suse.com> +Date: Wed, 26 Jun 2024 13:40:35 +0200 +Subject: [PATCH 28/56] x86/EPT: drop questionable mfn_valid() from + epte_get_entry_emt() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +mfn_valid() is RAM-focused; it will often return false for MMIO. Yet +access to actual MMIO space should not generally be restricted to UC +only; especially video frame buffer accesses are unduly affected by such +a restriction. + +Since, as of 777c71d31325 ("x86/EPT: avoid marking non-present entries +for re-configuring"), the function won't be called with INVALID_MFN or, +worse, truncated forms thereof anymore, we call fully drop that check. + +Fixes: 81fd0d3ca4b2 ("x86/hvm: simplify 'mmio_direct' check in epte_get_entry_emt()") +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> +master commit: 4fdd8d75566fdad06667a79ec0ce6f43cc466c54 +master date: 2024-06-13 16:55:22 +0200 +--- + xen/arch/x86/mm/p2m-ept.c | 6 ------ + 1 file changed, 6 deletions(-) + +diff --git a/xen/arch/x86/mm/p2m-ept.c b/xen/arch/x86/mm/p2m-ept.c +index 641d61b350..d325424e97 100644 +--- a/xen/arch/x86/mm/p2m-ept.c ++++ b/xen/arch/x86/mm/p2m-ept.c +@@ -500,12 +500,6 @@ int epte_get_entry_emt(struct domain *d, gfn_t gfn, mfn_t mfn, + return -1; + } + +- if ( !mfn_valid(mfn) ) +- { +- *ipat = true; +- return X86_MT_UC; +- } +- + /* + * Conditional must be kept in sync with the code in + * {iomem,ioports}_{permit,deny}_access(). +-- +2.45.2 + diff --git a/0028-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch b/0028-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch deleted file mode 100644 index dfbf516..0000000 --- a/0028-x86-spec-fix-INDIRECT_THUNK-option-to-only-be-set-wh.patch +++ /dev/null @@ -1,67 +0,0 @@ -From 693455c3c370e535eb6cd065800ff91e147815fa Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 5 Mar 2024 11:58:04 +0100 -Subject: [PATCH 28/67] x86/spec: fix INDIRECT_THUNK option to only be set when - build-enabled -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Attempt to provide a more helpful error message when the user attempts to set -spec-ctrl=bti-thunk option but the support is build-time disabled. - -While there also adjust the command line documentation to mention -CONFIG_INDIRECT_THUNK instead of INDIRECT_THUNK. - -Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: 8441fa806a3b778867867cd0159fa1722e90397e -master date: 2024-02-27 14:58:20 +0100 ---- - docs/misc/xen-command-line.pandoc | 10 +++++----- - xen/arch/x86/spec_ctrl.c | 7 ++++++- - 2 files changed, 11 insertions(+), 6 deletions(-) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index 05f613c71c..2006697226 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2378,11 +2378,11 @@ guests to use. - performance reasons dom0 is unprotected by default. If it is necessary to - protect dom0 too, boot with `spec-ctrl=ibpb-entry`. - --If Xen was compiled with INDIRECT_THUNK support, `bti-thunk=` can be used to --select which of the thunks gets patched into the `__x86_indirect_thunk_%reg` --locations. The default thunk is `retpoline` (generally preferred), with the --alternatives being `jmp` (a `jmp *%reg` gadget, minimal overhead), and --`lfence` (an `lfence; jmp *%reg` gadget). -+If Xen was compiled with `CONFIG_INDIRECT_THUNK` support, `bti-thunk=` can be -+used to select which of the thunks gets patched into the -+`__x86_indirect_thunk_%reg` locations. The default thunk is `retpoline` -+(generally preferred), with the alternatives being `jmp` (a `jmp *%reg` gadget, -+minimal overhead), and `lfence` (an `lfence; jmp *%reg` gadget). - - On hardware supporting IBRS (Indirect Branch Restricted Speculation), the - `ibrs=` option can be used to force or prevent Xen using the feature itself. -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 93f1cf3bb5..098fa3184d 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -253,7 +253,12 @@ static int __init cf_check parse_spec_ctrl(const char *s) - { - s += 10; - -- if ( !cmdline_strcmp(s, "retpoline") ) -+ if ( !IS_ENABLED(CONFIG_INDIRECT_THUNK) ) -+ { -+ no_config_param("INDIRECT_THUNK", "spec-ctrl", s - 10, ss); -+ rc = -EINVAL; -+ } -+ else if ( !cmdline_strcmp(s, "retpoline") ) - opt_thunk = THUNK_RETPOLINE; - else if ( !cmdline_strcmp(s, "lfence") ) - opt_thunk = THUNK_LFENCE; --- -2.44.0 - diff --git a/0029-x86-Intel-unlock-CPUID-earlier-for-the-BSP.patch b/0029-x86-Intel-unlock-CPUID-earlier-for-the-BSP.patch new file mode 100644 index 0000000..6722508 --- /dev/null +++ b/0029-x86-Intel-unlock-CPUID-earlier-for-the-BSP.patch @@ -0,0 +1,105 @@ +From c4b284912695a5802433512b913e968eda01544f Mon Sep 17 00:00:00 2001 +From: Jan Beulich <jbeulich@suse.com> +Date: Wed, 26 Jun 2024 13:41:05 +0200 +Subject: [PATCH 29/56] x86/Intel: unlock CPUID earlier for the BSP +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Intel CPUs have a MSR bit to limit CPUID enumeration to leaf two. If +this bit is set by the BIOS then CPUID evaluation does not work when +data from any leaf greater than two is needed; early_cpu_init() in +particular wants to collect leaf 7 data. + +Cure this by unlocking CPUID right before evaluating anything which +depends on the maximum CPUID leaf being greater than two. + +Inspired by (and description cloned from) Linux commit 0c2f6d04619e +("x86/topology/intel: Unlock CPUID before evaluating anything"). + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> +master commit: fa4d026737a47cd1d66ffb797a29150b4453aa9f +master date: 2024-06-18 15:12:44 +0200 +--- + xen/arch/x86/cpu/common.c | 3 ++- + xen/arch/x86/cpu/cpu.h | 2 ++ + xen/arch/x86/cpu/intel.c | 29 +++++++++++++++++------------ + 3 files changed, 21 insertions(+), 13 deletions(-) + +diff --git a/xen/arch/x86/cpu/common.c b/xen/arch/x86/cpu/common.c +index 26eed2ade1..edec0a2546 100644 +--- a/xen/arch/x86/cpu/common.c ++++ b/xen/arch/x86/cpu/common.c +@@ -336,7 +336,8 @@ void __init early_cpu_init(bool verbose) + + c->x86_vendor = x86_cpuid_lookup_vendor(ebx, ecx, edx); + switch (c->x86_vendor) { +- case X86_VENDOR_INTEL: actual_cpu = intel_cpu_dev; break; ++ case X86_VENDOR_INTEL: intel_unlock_cpuid_leaves(c); ++ actual_cpu = intel_cpu_dev; break; + case X86_VENDOR_AMD: actual_cpu = amd_cpu_dev; break; + case X86_VENDOR_CENTAUR: actual_cpu = centaur_cpu_dev; break; + case X86_VENDOR_SHANGHAI: actual_cpu = shanghai_cpu_dev; break; +diff --git a/xen/arch/x86/cpu/cpu.h b/xen/arch/x86/cpu/cpu.h +index e3d06278b3..8be65e975a 100644 +--- a/xen/arch/x86/cpu/cpu.h ++++ b/xen/arch/x86/cpu/cpu.h +@@ -24,3 +24,5 @@ void amd_init_lfence(struct cpuinfo_x86 *c); + void amd_init_ssbd(const struct cpuinfo_x86 *c); + void amd_init_spectral_chicken(void); + void detect_zen2_null_seg_behaviour(void); ++ ++void intel_unlock_cpuid_leaves(struct cpuinfo_x86 *c); +diff --git a/xen/arch/x86/cpu/intel.c b/xen/arch/x86/cpu/intel.c +index deb7b70464..0dc7c27601 100644 +--- a/xen/arch/x86/cpu/intel.c ++++ b/xen/arch/x86/cpu/intel.c +@@ -303,10 +303,24 @@ static void __init noinline intel_init_levelling(void) + ctxt_switch_masking = intel_ctxt_switch_masking; + } + +-static void cf_check early_init_intel(struct cpuinfo_x86 *c) ++/* Unmask CPUID levels if masked. */ ++void intel_unlock_cpuid_leaves(struct cpuinfo_x86 *c) + { +- u64 misc_enable, disable; ++ uint64_t misc_enable, disable; ++ ++ rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); ++ ++ disable = misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID; ++ if (disable) { ++ wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable); ++ bootsym(trampoline_misc_enable_off) |= disable; ++ c->cpuid_level = cpuid_eax(0); ++ printk(KERN_INFO "revised cpuid level: %u\n", c->cpuid_level); ++ } ++} + ++static void cf_check early_init_intel(struct cpuinfo_x86 *c) ++{ + /* Netburst reports 64 bytes clflush size, but does IO in 128 bytes */ + if (c->x86 == 15 && c->x86_cache_alignment == 64) + c->x86_cache_alignment = 128; +@@ -315,16 +329,7 @@ static void cf_check early_init_intel(struct cpuinfo_x86 *c) + bootsym(trampoline_misc_enable_off) & MSR_IA32_MISC_ENABLE_XD_DISABLE) + printk(KERN_INFO "re-enabled NX (Execute Disable) protection\n"); + +- /* Unmask CPUID levels and NX if masked: */ +- rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); +- +- disable = misc_enable & MSR_IA32_MISC_ENABLE_LIMIT_CPUID; +- if (disable) { +- wrmsrl(MSR_IA32_MISC_ENABLE, misc_enable & ~disable); +- bootsym(trampoline_misc_enable_off) |= disable; +- printk(KERN_INFO "revised cpuid level: %d\n", +- cpuid_eax(0)); +- } ++ intel_unlock_cpuid_leaves(c); + + /* CPUID workaround for Intel 0F33/0F34 CPU */ + if (boot_cpu_data.x86 == 0xF && boot_cpu_data.x86_model == 3 && +-- +2.45.2 + diff --git a/0029-x86-spec-do-not-print-thunk-option-selection-if-not-.patch b/0029-x86-spec-do-not-print-thunk-option-selection-if-not-.patch deleted file mode 100644 index 71e6633..0000000 --- a/0029-x86-spec-do-not-print-thunk-option-selection-if-not-.patch +++ /dev/null @@ -1,50 +0,0 @@ -From 0ce25b46ab2fb53a1b58f7682ca14971453f4f2c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 5 Mar 2024 11:58:36 +0100 -Subject: [PATCH 29/67] x86/spec: do not print thunk option selection if not - built-in -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Since the thunk built-in enable is printed as part of the "Compiled-in -support:" line, avoid printing anything in "Xen settings:" if the thunk is -disabled at build time. - -Note the BTI-Thunk option printing is also adjusted to print a colon in the -same way the other options on the line do. - -Requested-by: Jan Beulich <jbeulich@suse.com> -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: 576528a2a742069af203e90c613c5c93e23c9755 -master date: 2024-02-27 14:58:40 +0100 ---- - xen/arch/x86/spec_ctrl.c | 11 ++++++----- - 1 file changed, 6 insertions(+), 5 deletions(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 098fa3184d..25a18ac598 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -516,11 +516,12 @@ static void __init print_details(enum ind_thunk thunk) - "\n"); - - /* Settings for Xen's protection, irrespective of guests. */ -- printk(" Xen settings: BTI-Thunk %s, SPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n", -- thunk == THUNK_NONE ? "N/A" : -- thunk == THUNK_RETPOLINE ? "RETPOLINE" : -- thunk == THUNK_LFENCE ? "LFENCE" : -- thunk == THUNK_JMP ? "JMP" : "?", -+ printk(" Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n", -+ thunk != THUNK_NONE ? "BTI-Thunk: " : "", -+ thunk == THUNK_NONE ? "" : -+ thunk == THUNK_RETPOLINE ? "RETPOLINE, " : -+ thunk == THUNK_LFENCE ? "LFENCE, " : -+ thunk == THUNK_JMP ? "JMP, " : "?, ", - (!boot_cpu_has(X86_FEATURE_IBRSB) && - !boot_cpu_has(X86_FEATURE_IBRS)) ? "No" : - (default_xen_spec_ctrl & SPEC_CTRL_IBRS) ? "IBRS+" : "IBRS-", --- -2.44.0 - diff --git a/0030-x86-irq-deal-with-old_cpu_mask-for-interrupts-in-mov.patch b/0030-x86-irq-deal-with-old_cpu_mask-for-interrupts-in-mov.patch new file mode 100644 index 0000000..785df10 --- /dev/null +++ b/0030-x86-irq-deal-with-old_cpu_mask-for-interrupts-in-mov.patch @@ -0,0 +1,84 @@ +From 39a6170c15bf369a2b26c855ea7621387ed4070b Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> +Date: Wed, 26 Jun 2024 13:41:35 +0200 +Subject: [PATCH 30/56] x86/irq: deal with old_cpu_mask for interrupts in + movement in fixup_irqs() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Given the current logic it's possible for ->arch.old_cpu_mask to get out of +sync: if a CPU set in old_cpu_mask is offlined and then onlined +again without old_cpu_mask having been updated the data in the mask will no +longer be accurate, as when brought back online the CPU will no longer have +old_vector configured to handle the old interrupt source. + +If there's an interrupt movement in progress, and the to be offlined CPU (which +is the call context) is in the old_cpu_mask, clear it and update the mask, so +it doesn't contain stale data. + +Note that when the system is going down fixup_irqs() will be called by +smp_send_stop() from CPU 0 with a mask with only CPU 0 on it, effectively +asking to move all interrupts to the current caller (CPU 0) which is the only +CPU to remain online. In that case we don't care to migrate interrupts that +are in the process of being moved, as it's likely we won't be able to move all +interrupts to CPU 0 due to vector shortage anyway. + +Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +master commit: 817d1cd627be668c358d038f0fadbf7d24d417d3 +master date: 2024-06-18 15:14:49 +0200 +--- + xen/arch/x86/irq.c | 29 ++++++++++++++++++++++++++++- + 1 file changed, 28 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c +index 566331bec1..f877327975 100644 +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -2539,7 +2539,7 @@ void fixup_irqs(const cpumask_t *mask, bool verbose) + for ( irq = 0; irq < nr_irqs; irq++ ) + { + bool break_affinity = false, set_affinity = true; +- unsigned int vector; ++ unsigned int vector, cpu = smp_processor_id(); + cpumask_t *affinity = this_cpu(scratch_cpumask); + + if ( irq == 2 ) +@@ -2582,6 +2582,33 @@ void fixup_irqs(const cpumask_t *mask, bool verbose) + affinity); + } + ++ if ( desc->arch.move_in_progress && ++ /* ++ * Only attempt to adjust the mask if the current CPU is going ++ * offline, otherwise the whole system is going down and leaving ++ * stale data in the masks is fine. ++ */ ++ !cpu_online(cpu) && ++ cpumask_test_cpu(cpu, desc->arch.old_cpu_mask) ) ++ { ++ /* ++ * This CPU is going offline, remove it from ->arch.old_cpu_mask ++ * and possibly release the old vector if the old mask becomes ++ * empty. ++ * ++ * Note cleaning ->arch.old_cpu_mask is required if the CPU is ++ * brought offline and then online again, as when re-onlined the ++ * per-cpu vector table will no longer have ->arch.old_vector ++ * setup, and hence ->arch.old_cpu_mask would be stale. ++ */ ++ cpumask_clear_cpu(cpu, desc->arch.old_cpu_mask); ++ if ( cpumask_empty(desc->arch.old_cpu_mask) ) ++ { ++ desc->arch.move_in_progress = 0; ++ release_old_vec(desc); ++ } ++ } ++ + /* + * Avoid shuffling the interrupt around as long as current target CPUs + * are a subset of the input mask. What fixup_irqs() cares about is +-- +2.45.2 + diff --git a/0030-xen-livepatch-register-livepatch-regions-when-loaded.patch b/0030-xen-livepatch-register-livepatch-regions-when-loaded.patch deleted file mode 100644 index f521ecc..0000000 --- a/0030-xen-livepatch-register-livepatch-regions-when-loaded.patch +++ /dev/null @@ -1,159 +0,0 @@ -From b11917de0cd261a878beaf50c18a689bde0b2f50 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 5 Mar 2024 11:59:26 +0100 -Subject: [PATCH 30/67] xen/livepatch: register livepatch regions when loaded -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Currently livepatch regions are registered as virtual regions only after the -livepatch has been applied. - -This can lead to issues when using the pre-apply or post-revert hooks, as at -that point the livepatch is not in the virtual regions list. If a livepatch -pre-apply hook contains a WARN() it would trigger an hypervisor crash, as the -code to handle the bug frame won't be able to find the instruction pointer that -triggered the #UD in any of the registered virtual regions, and hence crash. - -Fix this by adding the livepatch payloads as virtual regions as soon as loaded, -and only remove them once the payload is unloaded. This requires some changes -to the virtual regions code, as the removal of the virtual regions is no longer -done in stop machine context, and hence an RCU barrier is added in order to -make sure there are no users of the virtual region after it's been removed from -the list. - -Fixes: 8313c864fa95 ('livepatch: Implement pre-|post- apply|revert hooks') -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com> -master commit: a57b4074ab39bee78b6c116277f0a9963bd8e687 -master date: 2024-02-28 16:57:25 +0000 ---- - xen/common/livepatch.c | 4 ++-- - xen/common/virtual_region.c | 44 ++++++++++++++----------------------- - 2 files changed, 19 insertions(+), 29 deletions(-) - -diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c -index c2ae84d18b..537e9f33e4 100644 ---- a/xen/common/livepatch.c -+++ b/xen/common/livepatch.c -@@ -1015,6 +1015,7 @@ static int build_symbol_table(struct payload *payload, - static void free_payload(struct payload *data) - { - ASSERT(spin_is_locked(&payload_lock)); -+ unregister_virtual_region(&data->region); - list_del(&data->list); - payload_cnt--; - payload_version++; -@@ -1114,6 +1115,7 @@ static int livepatch_upload(struct xen_sysctl_livepatch_upload *upload) - INIT_LIST_HEAD(&data->list); - INIT_LIST_HEAD(&data->applied_list); - -+ register_virtual_region(&data->region); - list_add_tail(&data->list, &payload_list); - payload_cnt++; - payload_version++; -@@ -1330,7 +1332,6 @@ static inline void apply_payload_tail(struct payload *data) - * The applied_list is iterated by the trap code. - */ - list_add_tail_rcu(&data->applied_list, &applied_list); -- register_virtual_region(&data->region); - - data->state = LIVEPATCH_STATE_APPLIED; - } -@@ -1376,7 +1377,6 @@ static inline void revert_payload_tail(struct payload *data) - * The applied_list is iterated by the trap code. - */ - list_del_rcu(&data->applied_list); -- unregister_virtual_region(&data->region); - - data->reverted = true; - data->state = LIVEPATCH_STATE_CHECKED; -diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c -index 5f89703f51..9f12c30efe 100644 ---- a/xen/common/virtual_region.c -+++ b/xen/common/virtual_region.c -@@ -23,14 +23,8 @@ static struct virtual_region core_init __initdata = { - }; - - /* -- * RCU locking. Additions are done either at startup (when there is only -- * one CPU) or when all CPUs are running without IRQs. -- * -- * Deletions are bit tricky. We do it when Live Patch (all CPUs running -- * without IRQs) or during bootup (when clearing the init). -- * -- * Hence we use list_del_rcu (which sports an memory fence) and a spinlock -- * on deletion. -+ * RCU locking. Modifications to the list must be done in exclusive mode, and -+ * hence need to hold the spinlock. - * - * All readers of virtual_region_list MUST use list_for_each_entry_rcu. - */ -@@ -58,41 +52,36 @@ const struct virtual_region *find_text_region(unsigned long addr) - - void register_virtual_region(struct virtual_region *r) - { -- ASSERT(!local_irq_is_enabled()); -+ unsigned long flags; - -+ spin_lock_irqsave(&virtual_region_lock, flags); - list_add_tail_rcu(&r->list, &virtual_region_list); -+ spin_unlock_irqrestore(&virtual_region_lock, flags); - } - --static void remove_virtual_region(struct virtual_region *r) -+/* -+ * Suggest inline so when !CONFIG_LIVEPATCH the function is not left -+ * unreachable after init code is removed. -+ */ -+static void inline remove_virtual_region(struct virtual_region *r) - { - unsigned long flags; - - spin_lock_irqsave(&virtual_region_lock, flags); - list_del_rcu(&r->list); - spin_unlock_irqrestore(&virtual_region_lock, flags); -- /* -- * We do not need to invoke call_rcu. -- * -- * This is due to the fact that on the deletion we have made sure -- * to use spinlocks (to guard against somebody else calling -- * unregister_virtual_region) and list_deletion spiced with -- * memory barrier. -- * -- * That protects us from corrupting the list as the readers all -- * use list_for_each_entry_rcu which is safe against concurrent -- * deletions. -- */ - } - -+#ifdef CONFIG_LIVEPATCH - void unregister_virtual_region(struct virtual_region *r) - { -- /* Expected to be called from Live Patch - which has IRQs disabled. */ -- ASSERT(!local_irq_is_enabled()); -- - remove_virtual_region(r); -+ -+ /* Assert that no CPU might be using the removed region. */ -+ rcu_barrier(); - } - --#if defined(CONFIG_LIVEPATCH) && defined(CONFIG_X86) -+#ifdef CONFIG_X86 - void relax_virtual_region_perms(void) - { - const struct virtual_region *region; -@@ -116,7 +105,8 @@ void tighten_virtual_region_perms(void) - PAGE_HYPERVISOR_RX); - rcu_read_unlock(&rcu_virtual_region_lock); - } --#endif -+#endif /* CONFIG_X86 */ -+#endif /* CONFIG_LIVEPATCH */ - - void __init unregister_init_virtual_region(void) - { --- -2.44.0 - diff --git a/0031-x86-irq-handle-moving-interrupts-in-_assign_irq_vect.patch b/0031-x86-irq-handle-moving-interrupts-in-_assign_irq_vect.patch new file mode 100644 index 0000000..96e87cd --- /dev/null +++ b/0031-x86-irq-handle-moving-interrupts-in-_assign_irq_vect.patch @@ -0,0 +1,172 @@ +From 3a8f4ec75d8ed8da6370deac95c341cbada96802 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> +Date: Wed, 26 Jun 2024 13:42:05 +0200 +Subject: [PATCH 31/56] x86/irq: handle moving interrupts in + _assign_irq_vector() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Currently there's logic in fixup_irqs() that attempts to prevent +_assign_irq_vector() from failing, as fixup_irqs() is required to evacuate all +interrupts from the CPUs not present in the input mask. The current logic in +fixup_irqs() is incomplete, as it doesn't deal with interrupts that have +move_cleanup_count > 0 and a non-empty ->arch.old_cpu_mask field. + +Instead of attempting to fixup the interrupt descriptor in fixup_irqs() so that +_assign_irq_vector() cannot fail, introduce logic in _assign_irq_vector() +to deal with interrupts that have either move_{in_progress,cleanup_count} set +and no remaining online CPUs in ->arch.cpu_mask. + +If _assign_irq_vector() is requested to move an interrupt in the state +described above, first attempt to see if ->arch.old_cpu_mask contains any valid +CPUs that could be used as fallback, and if that's the case do move the +interrupt back to the previous destination. Note this is easier because the +vector hasn't been released yet, so there's no need to allocate and setup a new +vector on the destination. + +Due to the logic in fixup_irqs() that clears offline CPUs from +->arch.old_cpu_mask (and releases the old vector if the mask becomes empty) it +shouldn't be possible to get into _assign_irq_vector() with +->arch.move_{in_progress,cleanup_count} set but no online CPUs in +->arch.old_cpu_mask. + +However if ->arch.move_{in_progress,cleanup_count} is set and the interrupt has +also changed affinity, it's possible the members of ->arch.old_cpu_mask are no +longer part of the affinity set, move the interrupt to a different CPU part of +the provided mask and keep the current ->arch.old_{cpu_mask,vector} for the +pending interrupt movement to be completed. + +Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +master commit: 369558924a642bbb0cb731e9a3375958867cb17b +master date: 2024-06-18 15:15:10 +0200 +--- + xen/arch/x86/irq.c | 97 ++++++++++++++++++++++++++++++++-------------- + 1 file changed, 68 insertions(+), 29 deletions(-) + +diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c +index f877327975..13ef61a5b7 100644 +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -553,7 +553,58 @@ static int _assign_irq_vector(struct irq_desc *desc, const cpumask_t *mask) + } + + if ( desc->arch.move_in_progress || desc->arch.move_cleanup_count ) +- return -EAGAIN; ++ { ++ /* ++ * If the current destination is online refuse to shuffle. Retry after ++ * the in-progress movement has finished. ++ */ ++ if ( cpumask_intersects(desc->arch.cpu_mask, &cpu_online_map) ) ++ return -EAGAIN; ++ ++ /* ++ * Due to the logic in fixup_irqs() that clears offlined CPUs from ++ * ->arch.old_cpu_mask it shouldn't be possible to get here with ++ * ->arch.move_{in_progress,cleanup_count} set and no online CPUs in ++ * ->arch.old_cpu_mask. ++ */ ++ ASSERT(valid_irq_vector(desc->arch.old_vector)); ++ ASSERT(cpumask_intersects(desc->arch.old_cpu_mask, &cpu_online_map)); ++ ++ if ( cpumask_intersects(desc->arch.old_cpu_mask, mask) ) ++ { ++ /* ++ * Fallback to the old destination if moving is in progress and the ++ * current destination is to be offlined. This is only possible if ++ * the CPUs in old_cpu_mask intersect with the affinity mask passed ++ * in the 'mask' parameter. ++ */ ++ desc->arch.vector = desc->arch.old_vector; ++ cpumask_and(desc->arch.cpu_mask, desc->arch.old_cpu_mask, mask); ++ ++ /* Undo any possibly done cleanup. */ ++ for_each_cpu(cpu, desc->arch.cpu_mask) ++ per_cpu(vector_irq, cpu)[desc->arch.vector] = irq; ++ ++ /* Cancel the pending move and release the current vector. */ ++ desc->arch.old_vector = IRQ_VECTOR_UNASSIGNED; ++ cpumask_clear(desc->arch.old_cpu_mask); ++ desc->arch.move_in_progress = 0; ++ desc->arch.move_cleanup_count = 0; ++ if ( desc->arch.used_vectors ) ++ { ++ ASSERT(test_bit(old_vector, desc->arch.used_vectors)); ++ clear_bit(old_vector, desc->arch.used_vectors); ++ } ++ ++ return 0; ++ } ++ ++ /* ++ * There's an interrupt movement in progress but the destination(s) in ++ * ->arch.old_cpu_mask are not suitable given the 'mask' parameter, go ++ * through the full logic to find a new vector in a suitable CPU. ++ */ ++ } + + err = -ENOSPC; + +@@ -609,7 +660,22 @@ next: + current_vector = vector; + current_offset = offset; + +- if ( valid_irq_vector(old_vector) ) ++ if ( desc->arch.move_in_progress || desc->arch.move_cleanup_count ) ++ { ++ ASSERT(!cpumask_intersects(desc->arch.cpu_mask, &cpu_online_map)); ++ /* ++ * Special case when evacuating an interrupt from a CPU to be ++ * offlined and the interrupt was already in the process of being ++ * moved. Leave ->arch.old_{vector,cpu_mask} as-is and just ++ * replace ->arch.{cpu_mask,vector} with the new destination. ++ * Cleanup will be done normally for the old fields, just release ++ * the current vector here. ++ */ ++ if ( desc->arch.used_vectors && ++ !test_and_clear_bit(old_vector, desc->arch.used_vectors) ) ++ ASSERT_UNREACHABLE(); ++ } ++ else if ( valid_irq_vector(old_vector) ) + { + cpumask_and(desc->arch.old_cpu_mask, desc->arch.cpu_mask, + &cpu_online_map); +@@ -2620,33 +2686,6 @@ void fixup_irqs(const cpumask_t *mask, bool verbose) + continue; + } + +- /* +- * In order for the affinity adjustment below to be successful, we +- * need _assign_irq_vector() to succeed. This in particular means +- * clearing desc->arch.move_in_progress if this would otherwise +- * prevent the function from succeeding. Since there's no way for the +- * flag to get cleared anymore when there's no possible destination +- * left (the only possibility then would be the IRQs enabled window +- * after this loop), there's then also no race with us doing it here. +- * +- * Therefore the logic here and there need to remain in sync. +- */ +- if ( desc->arch.move_in_progress && +- !cpumask_intersects(mask, desc->arch.cpu_mask) ) +- { +- unsigned int cpu; +- +- cpumask_and(affinity, desc->arch.old_cpu_mask, &cpu_online_map); +- +- spin_lock(&vector_lock); +- for_each_cpu(cpu, affinity) +- per_cpu(vector_irq, cpu)[desc->arch.old_vector] = ~irq; +- spin_unlock(&vector_lock); +- +- release_old_vec(desc); +- desc->arch.move_in_progress = 0; +- } +- + if ( !cpumask_intersects(mask, desc->affinity) ) + { + break_affinity = true; +-- +2.45.2 + diff --git a/0031-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch b/0031-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch deleted file mode 100644 index c778639..0000000 --- a/0031-xen-livepatch-search-for-symbols-in-all-loaded-paylo.patch +++ /dev/null @@ -1,149 +0,0 @@ -From c54cf903b06fb1933fad053cc547580c92c856ea Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 5 Mar 2024 11:59:35 +0100 -Subject: [PATCH 31/67] xen/livepatch: search for symbols in all loaded - payloads -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -When checking if an address belongs to a patch, or when resolving a symbol, -take into account all loaded livepatch payloads, even if not applied. - -This is required in order for the pre-apply and post-revert hooks to work -properly, or else Xen won't detect the instruction pointer belonging to those -hooks as being part of the currently active text. - -Move the RCU handling to be used for payload_list instead of applied_list, as -now the calls from trap code will iterate over the payload_list. - -Fixes: 8313c864fa95 ('livepatch: Implement pre-|post- apply|revert hooks') -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com> -master commit: d2daa40fb3ddb8f83e238e57854bd878924cde90 -master date: 2024-02-28 16:57:25 +0000 ---- - xen/common/livepatch.c | 49 +++++++++++++++--------------------------- - 1 file changed, 17 insertions(+), 32 deletions(-) - -diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c -index 537e9f33e4..a129ab9973 100644 ---- a/xen/common/livepatch.c -+++ b/xen/common/livepatch.c -@@ -36,13 +36,14 @@ - * caller in schedule_work. - */ - static DEFINE_SPINLOCK(payload_lock); --static LIST_HEAD(payload_list); -- - /* -- * Patches which have been applied. Need RCU in case we crash (and then -- * traps code would iterate via applied_list) when adding entries on the list. -+ * Need RCU in case we crash (and then traps code would iterate via -+ * payload_list) when adding entries on the list. - */ --static DEFINE_RCU_READ_LOCK(rcu_applied_lock); -+static DEFINE_RCU_READ_LOCK(rcu_payload_lock); -+static LIST_HEAD(payload_list); -+ -+/* Patches which have been applied. Only modified from stop machine context. */ - static LIST_HEAD(applied_list); - - static unsigned int payload_cnt; -@@ -111,12 +112,8 @@ bool_t is_patch(const void *ptr) - const struct payload *data; - bool_t r = 0; - -- /* -- * Only RCU locking since this list is only ever changed during apply -- * or revert context. And in case it dies there we need an safe list. -- */ -- rcu_read_lock(&rcu_applied_lock); -- list_for_each_entry_rcu ( data, &applied_list, applied_list ) -+ rcu_read_lock(&rcu_payload_lock); -+ list_for_each_entry_rcu ( data, &payload_list, list ) - { - if ( (ptr >= data->rw_addr && - ptr < (data->rw_addr + data->rw_size)) || -@@ -130,7 +127,7 @@ bool_t is_patch(const void *ptr) - } - - } -- rcu_read_unlock(&rcu_applied_lock); -+ rcu_read_unlock(&rcu_payload_lock); - - return r; - } -@@ -166,12 +163,8 @@ static const char *cf_check livepatch_symbols_lookup( - const void *va = (const void *)addr; - const char *n = NULL; - -- /* -- * Only RCU locking since this list is only ever changed during apply -- * or revert context. And in case it dies there we need an safe list. -- */ -- rcu_read_lock(&rcu_applied_lock); -- list_for_each_entry_rcu ( data, &applied_list, applied_list ) -+ rcu_read_lock(&rcu_payload_lock); -+ list_for_each_entry_rcu ( data, &payload_list, list ) - { - if ( va < data->text_addr || - va >= (data->text_addr + data->text_size) ) -@@ -200,7 +193,7 @@ static const char *cf_check livepatch_symbols_lookup( - n = data->symtab[best].name; - break; - } -- rcu_read_unlock(&rcu_applied_lock); -+ rcu_read_unlock(&rcu_payload_lock); - - return n; - } -@@ -1016,7 +1009,8 @@ static void free_payload(struct payload *data) - { - ASSERT(spin_is_locked(&payload_lock)); - unregister_virtual_region(&data->region); -- list_del(&data->list); -+ list_del_rcu(&data->list); -+ rcu_barrier(); - payload_cnt--; - payload_version++; - free_payload_data(data); -@@ -1116,7 +1110,7 @@ static int livepatch_upload(struct xen_sysctl_livepatch_upload *upload) - INIT_LIST_HEAD(&data->applied_list); - - register_virtual_region(&data->region); -- list_add_tail(&data->list, &payload_list); -+ list_add_tail_rcu(&data->list, &payload_list); - payload_cnt++; - payload_version++; - } -@@ -1327,11 +1321,7 @@ static int apply_payload(struct payload *data) - - static inline void apply_payload_tail(struct payload *data) - { -- /* -- * We need RCU variant (which has barriers) in case we crash here. -- * The applied_list is iterated by the trap code. -- */ -- list_add_tail_rcu(&data->applied_list, &applied_list); -+ list_add_tail(&data->applied_list, &applied_list); - - data->state = LIVEPATCH_STATE_APPLIED; - } -@@ -1371,12 +1361,7 @@ static int revert_payload(struct payload *data) - - static inline void revert_payload_tail(struct payload *data) - { -- -- /* -- * We need RCU variant (which has barriers) in case we crash here. -- * The applied_list is iterated by the trap code. -- */ -- list_del_rcu(&data->applied_list); -+ list_del(&data->applied_list); - - data->reverted = true; - data->state = LIVEPATCH_STATE_CHECKED; --- -2.44.0 - diff --git a/0032-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch b/0032-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch deleted file mode 100644 index 76af9ef..0000000 --- a/0032-xen-livepatch-fix-norevert-test-attempt-to-open-code.patch +++ /dev/null @@ -1,186 +0,0 @@ -From 5564323f643715f9d364df88e0eb9c7d6fd2c22b Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 5 Mar 2024 11:59:43 +0100 -Subject: [PATCH 32/67] xen/livepatch: fix norevert test attempt to open-code - revert -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The purpose of the norevert test is to install a dummy handler that replaces -the internal Xen revert code, and then perform the revert in the post-revert -hook. For that purpose the usage of the previous common_livepatch_revert() is -not enough, as that just reverts specific functions, but not the whole state of -the payload. - -Remove both common_livepatch_{apply,revert}() and instead expose -revert_payload{,_tail}() in order to perform the patch revert from the -post-revert hook. - -Fixes: 6047104c3ccc ('livepatch: Add per-function applied/reverted state tracking marker') -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com> -master commit: cdae267ce10d04d71d1687b5701ff2911a96b6dc -master date: 2024-02-28 16:57:25 +0000 ---- - xen/common/livepatch.c | 41 +++++++++++++++++-- - xen/include/xen/livepatch.h | 32 ++------------- - .../livepatch/xen_action_hooks_norevert.c | 22 +++------- - 3 files changed, 46 insertions(+), 49 deletions(-) - -diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c -index a129ab9973..a5068a2217 100644 ---- a/xen/common/livepatch.c -+++ b/xen/common/livepatch.c -@@ -1310,7 +1310,22 @@ static int apply_payload(struct payload *data) - ASSERT(!local_irq_is_enabled()); - - for ( i = 0; i < data->nfuncs; i++ ) -- common_livepatch_apply(&data->funcs[i], &data->fstate[i]); -+ { -+ const struct livepatch_func *func = &data->funcs[i]; -+ struct livepatch_fstate *state = &data->fstate[i]; -+ -+ /* If the action has been already executed on this function, do nothing. */ -+ if ( state->applied == LIVEPATCH_FUNC_APPLIED ) -+ { -+ printk(XENLOG_WARNING LIVEPATCH -+ "%s: %s has been already applied before\n", -+ __func__, func->name); -+ continue; -+ } -+ -+ arch_livepatch_apply(func, state); -+ state->applied = LIVEPATCH_FUNC_APPLIED; -+ } - - arch_livepatch_revive(); - -@@ -1326,7 +1341,7 @@ static inline void apply_payload_tail(struct payload *data) - data->state = LIVEPATCH_STATE_APPLIED; - } - --static int revert_payload(struct payload *data) -+int revert_payload(struct payload *data) - { - unsigned int i; - int rc; -@@ -1341,7 +1356,25 @@ static int revert_payload(struct payload *data) - } - - for ( i = 0; i < data->nfuncs; i++ ) -- common_livepatch_revert(&data->funcs[i], &data->fstate[i]); -+ { -+ const struct livepatch_func *func = &data->funcs[i]; -+ struct livepatch_fstate *state = &data->fstate[i]; -+ -+ /* -+ * If the apply action hasn't been executed on this function, do -+ * nothing. -+ */ -+ if ( !func->old_addr || state->applied == LIVEPATCH_FUNC_NOT_APPLIED ) -+ { -+ printk(XENLOG_WARNING LIVEPATCH -+ "%s: %s has not been applied before\n", -+ __func__, func->name); -+ continue; -+ } -+ -+ arch_livepatch_revert(func, state); -+ state->applied = LIVEPATCH_FUNC_NOT_APPLIED; -+ } - - /* - * Since we are running with IRQs disabled and the hooks may call common -@@ -1359,7 +1392,7 @@ static int revert_payload(struct payload *data) - return 0; - } - --static inline void revert_payload_tail(struct payload *data) -+void revert_payload_tail(struct payload *data) - { - list_del(&data->applied_list); - -diff --git a/xen/include/xen/livepatch.h b/xen/include/xen/livepatch.h -index 537d3d58b6..c9ee58fd37 100644 ---- a/xen/include/xen/livepatch.h -+++ b/xen/include/xen/livepatch.h -@@ -136,35 +136,11 @@ void arch_livepatch_post_action(void); - void arch_livepatch_mask(void); - void arch_livepatch_unmask(void); - --static inline void common_livepatch_apply(const struct livepatch_func *func, -- struct livepatch_fstate *state) --{ -- /* If the action has been already executed on this function, do nothing. */ -- if ( state->applied == LIVEPATCH_FUNC_APPLIED ) -- { -- printk(XENLOG_WARNING LIVEPATCH "%s: %s has been already applied before\n", -- __func__, func->name); -- return; -- } -- -- arch_livepatch_apply(func, state); -- state->applied = LIVEPATCH_FUNC_APPLIED; --} -+/* Only for testing purposes. */ -+struct payload; -+int revert_payload(struct payload *data); -+void revert_payload_tail(struct payload *data); - --static inline void common_livepatch_revert(const struct livepatch_func *func, -- struct livepatch_fstate *state) --{ -- /* If the apply action hasn't been executed on this function, do nothing. */ -- if ( !func->old_addr || state->applied == LIVEPATCH_FUNC_NOT_APPLIED ) -- { -- printk(XENLOG_WARNING LIVEPATCH "%s: %s has not been applied before\n", -- __func__, func->name); -- return; -- } -- -- arch_livepatch_revert(func, state); -- state->applied = LIVEPATCH_FUNC_NOT_APPLIED; --} - #else - - /* -diff --git a/xen/test/livepatch/xen_action_hooks_norevert.c b/xen/test/livepatch/xen_action_hooks_norevert.c -index c173855192..c5fbab1746 100644 ---- a/xen/test/livepatch/xen_action_hooks_norevert.c -+++ b/xen/test/livepatch/xen_action_hooks_norevert.c -@@ -96,26 +96,14 @@ static int revert_hook(livepatch_payload_t *payload) - - static void post_revert_hook(livepatch_payload_t *payload) - { -- int i; -+ unsigned long flags; - - printk(KERN_DEBUG "%s: Hook starting.\n", __func__); - -- for (i = 0; i < payload->nfuncs; i++) -- { -- const struct livepatch_func *func = &payload->funcs[i]; -- struct livepatch_fstate *fstate = &payload->fstate[i]; -- -- BUG_ON(revert_cnt != 1); -- BUG_ON(fstate->applied != LIVEPATCH_FUNC_APPLIED); -- -- /* Outside of quiesce zone: MAY TRIGGER HOST CRASH/UNDEFINED BEHAVIOR */ -- arch_livepatch_quiesce(); -- common_livepatch_revert(payload); -- arch_livepatch_revive(); -- BUG_ON(fstate->applied == LIVEPATCH_FUNC_APPLIED); -- -- printk(KERN_DEBUG "%s: post reverted: %s\n", __func__, func->name); -- } -+ local_irq_save(flags); -+ BUG_ON(revert_payload(payload)); -+ revert_payload_tail(payload); -+ local_irq_restore(flags); - - printk(KERN_DEBUG "%s: Hook done.\n", __func__); - } --- -2.44.0 - diff --git a/0032-xen-ubsan-Fix-UB-in-type_descriptor-declaration.patch b/0032-xen-ubsan-Fix-UB-in-type_descriptor-declaration.patch new file mode 100644 index 0000000..c7c0968 --- /dev/null +++ b/0032-xen-ubsan-Fix-UB-in-type_descriptor-declaration.patch @@ -0,0 +1,39 @@ +From 5397ab9995f7354e7f8122a8a91c810256afa3d1 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Wed, 26 Jun 2024 13:42:30 +0200 +Subject: [PATCH 32/56] xen/ubsan: Fix UB in type_descriptor declaration + +struct type_descriptor is arranged with a NUL terminated string following the +kind/info fields. + +The only reason this doesn't trip UBSAN detection itself (on more modern +compilers at least) is because struct type_descriptor is only referenced in +suppressed regions. + +Switch the declaration to be a real flexible member. No functional change. + +Fixes: 00fcf4dd8eb4 ("xen/ubsan: Import ubsan implementation from Linux 4.13") +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +master commit: bd59af99700f075d06a6d47a16f777c9519928e0 +master date: 2024-06-18 14:55:04 +0100 +--- + xen/common/ubsan/ubsan.h | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/xen/common/ubsan/ubsan.h b/xen/common/ubsan/ubsan.h +index a3159040fe..3db42e75b1 100644 +--- a/xen/common/ubsan/ubsan.h ++++ b/xen/common/ubsan/ubsan.h +@@ -10,7 +10,7 @@ enum { + struct type_descriptor { + u16 type_kind; + u16 type_info; +- char type_name[1]; ++ char type_name[]; + }; + + struct source_location { +-- +2.45.2 + diff --git a/0033-x86-xstate-Fix-initialisation-of-XSS-cache.patch b/0033-x86-xstate-Fix-initialisation-of-XSS-cache.patch new file mode 100644 index 0000000..1a8c724 --- /dev/null +++ b/0033-x86-xstate-Fix-initialisation-of-XSS-cache.patch @@ -0,0 +1,74 @@ +From 4ee1df89d9c92609e5fff3c9b261ce4b1bb88e42 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Wed, 26 Jun 2024 13:43:19 +0200 +Subject: [PATCH 33/56] x86/xstate: Fix initialisation of XSS cache + +The clobbering of this_cpu(xcr0) and this_cpu(xss) to architecturally invalid +values is to force the subsequent set_xcr0() and set_msr_xss() to reload the +hardware register. + +While XCR0 is reloaded in xstate_init(), MSR_XSS isn't. This causes +get_msr_xss() to return the invalid value, and logic of the form: + + old = get_msr_xss(); + set_msr_xss(new); + ... + set_msr_xss(old); + +to try and restore said invalid value. + +The architecturally invalid value must be purged from the cache, meaning the +hardware register must be written at least once. This in turn highlights that +the invalid value must only be used in the case that the hardware register is +available. + +Fixes: f7f4a523927f ("x86/xstate: reset cached register values on resume") +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +master commit: 9e6dbbe8bf400aacb99009ddffa91d2a0c312b39 +master date: 2024-06-19 13:00:06 +0100 +--- + xen/arch/x86/xstate.c | 18 +++++++++++------- + 1 file changed, 11 insertions(+), 7 deletions(-) + +diff --git a/xen/arch/x86/xstate.c b/xen/arch/x86/xstate.c +index f442610fc5..ca76f98fe2 100644 +--- a/xen/arch/x86/xstate.c ++++ b/xen/arch/x86/xstate.c +@@ -641,13 +641,6 @@ void xstate_init(struct cpuinfo_x86 *c) + return; + } + +- /* +- * Zap the cached values to make set_xcr0() and set_msr_xss() really +- * write it. +- */ +- this_cpu(xcr0) = 0; +- this_cpu(xss) = ~0; +- + cpuid_count(XSTATE_CPUID, 0, &eax, &ebx, &ecx, &edx); + feature_mask = (((u64)edx << 32) | eax) & XCNTXT_MASK; + BUG_ON(!valid_xcr0(feature_mask)); +@@ -657,8 +650,19 @@ void xstate_init(struct cpuinfo_x86 *c) + * Set CR4_OSXSAVE and run "cpuid" to get xsave_cntxt_size. + */ + set_in_cr4(X86_CR4_OSXSAVE); ++ ++ /* ++ * Zap the cached values to make set_xcr0() and set_msr_xss() really write ++ * the hardware register. ++ */ ++ this_cpu(xcr0) = 0; + if ( !set_xcr0(feature_mask) ) + BUG(); ++ if ( cpu_has_xsaves ) ++ { ++ this_cpu(xss) = ~0; ++ set_msr_xss(0); ++ } + + if ( bsp ) + { +-- +2.45.2 + diff --git a/0033-xen-livepatch-properly-build-the-noapply-and-norever.patch b/0033-xen-livepatch-properly-build-the-noapply-and-norever.patch deleted file mode 100644 index 76803c6..0000000 --- a/0033-xen-livepatch-properly-build-the-noapply-and-norever.patch +++ /dev/null @@ -1,43 +0,0 @@ -From a59106b27609b6ae2873bd6755949b1258290872 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 5 Mar 2024 11:59:51 +0100 -Subject: [PATCH 33/67] xen/livepatch: properly build the noapply and norevert - tests -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -It seems the build variables for those tests where copy-pasted from -xen_action_hooks_marker-objs and not adjusted to use the correct source files. - -Fixes: 6047104c3ccc ('livepatch: Add per-function applied/reverted state tracking marker') -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com> -master commit: e579677095782c7dec792597ba8b037b7d716b32 -master date: 2024-02-28 16:57:25 +0000 ---- - xen/test/livepatch/Makefile | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/xen/test/livepatch/Makefile b/xen/test/livepatch/Makefile -index c258ab0b59..d987a8367f 100644 ---- a/xen/test/livepatch/Makefile -+++ b/xen/test/livepatch/Makefile -@@ -118,12 +118,12 @@ xen_action_hooks_marker-objs := xen_action_hooks_marker.o xen_hello_world_func.o - $(obj)/xen_action_hooks_noapply.o: $(obj)/config.h - - extra-y += xen_action_hooks_noapply.livepatch --xen_action_hooks_noapply-objs := xen_action_hooks_marker.o xen_hello_world_func.o note.o xen_note.o -+xen_action_hooks_noapply-objs := xen_action_hooks_noapply.o xen_hello_world_func.o note.o xen_note.o - - $(obj)/xen_action_hooks_norevert.o: $(obj)/config.h - - extra-y += xen_action_hooks_norevert.livepatch --xen_action_hooks_norevert-objs := xen_action_hooks_marker.o xen_hello_world_func.o note.o xen_note.o -+xen_action_hooks_norevert-objs := xen_action_hooks_norevert.o xen_hello_world_func.o note.o xen_note.o - - EXPECT_BYTES_COUNT := 8 - CODE_GET_EXPECT=$(shell $(OBJDUMP) -d --insn-width=1 $(1) | sed -n -e '/<'$(2)'>:$$/,/^$$/ p' | tail -n +2 | head -n $(EXPECT_BYTES_COUNT) | awk '{$$0=$$2; printf "%s", substr($$0,length-1)}' | sed 's/.\{2\}/0x&,/g' | sed 's/^/{/;s/,$$/}/g') --- -2.44.0 - diff --git a/0034-libxl-Fix-segfault-in-device_model_spawn_outcome.patch b/0034-libxl-Fix-segfault-in-device_model_spawn_outcome.patch deleted file mode 100644 index 7f23a73..0000000 --- a/0034-libxl-Fix-segfault-in-device_model_spawn_outcome.patch +++ /dev/null @@ -1,39 +0,0 @@ -From c4ee68eda9937743527fff41f4ede0f6a3228080 Mon Sep 17 00:00:00 2001 -From: Jason Andryuk <jandryuk@gmail.com> -Date: Tue, 5 Mar 2024 12:00:30 +0100 -Subject: [PATCH 34/67] libxl: Fix segfault in device_model_spawn_outcome - -libxl__spawn_qdisk_backend() explicitly sets guest_config to NULL when -starting QEMU (the usual launch through libxl__spawn_local_dm() has a -guest_config though). - -Bail early on a NULL guest_config/d_config. This skips the QMP queries -for chardevs and VNC, but this xenpv QEMU instance isn't expected to -provide those - only qdisk (or 9pfs backends after an upcoming change). - -Signed-off-by: Jason Andryuk <jandryuk@gmail.com> -Acked-by: Anthony PERARD <anthony.perard@citrix.com> -master commit: d4f3d35f043f6ef29393166b0dd131c8102cf255 -master date: 2024-02-29 08:18:38 +0100 ---- - tools/libs/light/libxl_dm.c | 4 ++-- - 1 file changed, 2 insertions(+), 2 deletions(-) - -diff --git a/tools/libs/light/libxl_dm.c b/tools/libs/light/libxl_dm.c -index ed620a9d8e..29b43ed20a 100644 ---- a/tools/libs/light/libxl_dm.c -+++ b/tools/libs/light/libxl_dm.c -@@ -3172,8 +3172,8 @@ static void device_model_spawn_outcome(libxl__egc *egc, - - /* Check if spawn failed */ - if (rc) goto out; -- -- if (d_config->b_info.device_model_version -+ /* d_config is NULL for xl devd/libxl__spawn_qemu_xenpv_backend(). */ -+ if (d_config && d_config->b_info.device_model_version - == LIBXL_DEVICE_MODEL_VERSION_QEMU_XEN) { - rc = libxl__ev_time_register_rel(ao, &dmss->timeout, - devise_model_postconfig_timeout, --- -2.44.0 - diff --git a/0034-x86-cpuid-Fix-handling-of-XSAVE-dynamic-leaves.patch b/0034-x86-cpuid-Fix-handling-of-XSAVE-dynamic-leaves.patch new file mode 100644 index 0000000..1905728 --- /dev/null +++ b/0034-x86-cpuid-Fix-handling-of-XSAVE-dynamic-leaves.patch @@ -0,0 +1,72 @@ +From 9b43092d54b5f9e9d39d9f20393671e303b19e81 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Wed, 26 Jun 2024 13:43:44 +0200 +Subject: [PATCH 34/56] x86/cpuid: Fix handling of XSAVE dynamic leaves + +[ This is a minimal backport of commit 71cacfb035f4 ("x86/cpuid: Fix handling + of XSAVE dynamic leaves") to fix the bugs without depending on the large + rework of XSTATE handling in Xen 4.19 ] + +First, if XSAVE is available in hardware but not visible to the guest, the +dynamic leaves shouldn't be filled in. + +Second, the comment concerning XSS state is wrong. VT-x doesn't manage +host/guest state automatically, but there is provision for "host only" bits to +be set, so the implications are still accurate. + +In Xen 4.18, no XSS states are supported, so it's safe to keep deferring to +real hardware. + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +master commit: 71cacfb035f4a78ee10970dc38a3baa04d387451 +master date: 2024-06-19 13:00:06 +0100 +--- + xen/arch/x86/cpuid.c | 30 +++++++++++++----------------- + 1 file changed, 13 insertions(+), 17 deletions(-) + +diff --git a/xen/arch/x86/cpuid.c b/xen/arch/x86/cpuid.c +index 455a09b2dd..f6fd6cc6b3 100644 +--- a/xen/arch/x86/cpuid.c ++++ b/xen/arch/x86/cpuid.c +@@ -330,24 +330,20 @@ void guest_cpuid(const struct vcpu *v, uint32_t leaf, + case XSTATE_CPUID: + switch ( subleaf ) + { +- case 1: +- if ( p->xstate.xsavec || p->xstate.xsaves ) +- { +- /* +- * TODO: Figure out what to do for XSS state. VT-x manages +- * host vs guest MSR_XSS automatically, so as soon as we start +- * supporting any XSS states, the wrong XSS will be in +- * context. +- */ +- BUILD_BUG_ON(XSTATE_XSAVES_ONLY != 0); +- +- /* +- * Read CPUID[0xD,0/1].EBX from hardware. They vary with +- * enabled XSTATE, and appropraite XCR0|XSS are in context. +- */ ++ /* ++ * Read CPUID[0xd,0/1].EBX from hardware. They vary with enabled ++ * XSTATE, and the appropriate XCR0 is in context. ++ */ + case 0: +- res->b = cpuid_count_ebx(leaf, subleaf); +- } ++ if ( p->basic.xsave ) ++ res->b = cpuid_count_ebx(0xd, 0); ++ break; ++ ++ case 1: ++ /* This only works because Xen doesn't support XSS states yet. */ ++ BUILD_BUG_ON(XSTATE_XSAVES_ONLY != 0); ++ if ( p->xstate.xsavec ) ++ res->b = cpuid_count_ebx(0xd, 1); + break; + } + break; +-- +2.45.2 + diff --git a/0035-x86-altcall-always-use-a-temporary-parameter-stashin.patch b/0035-x86-altcall-always-use-a-temporary-parameter-stashin.patch deleted file mode 100644 index 177c73b..0000000 --- a/0035-x86-altcall-always-use-a-temporary-parameter-stashin.patch +++ /dev/null @@ -1,197 +0,0 @@ -From 2f49d9f89c14519d4cb1e06ab8370cf4ba50fab7 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 5 Mar 2024 12:00:47 +0100 -Subject: [PATCH 35/67] x86/altcall: always use a temporary parameter stashing - variable -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The usage in ALT_CALL_ARG() on clang of: - -register union { - typeof(arg) e; - const unsigned long r; -} ... - -When `arg` is the first argument to alternative_{,v}call() and -const_vlapic_vcpu() is used results in clang 3.5.0 complaining with: - -arch/x86/hvm/vlapic.c:141:47: error: non-const static data member must be initialized out of line - alternative_call(hvm_funcs.test_pir, const_vlapic_vcpu(vlapic), vec) ) - -Workaround this by pulling `arg1` into a local variable, like it's done for -further arguments (arg2, arg3...) - -Originally arg1 wasn't pulled into a variable because for the a1_ register -local variable the possible clobbering as a result of operators on other -variables don't matter: - -https://gcc.gnu.org/onlinedocs/gcc/Local-Register-Variables.html#Local-Register-Variables - -Note clang version 3.8.1 seems to already be fixed and don't require the -workaround, but since it's harmless do it uniformly everywhere. - -Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> -Fixes: 2ce562b2a413 ('x86/altcall: use a union as register type for function parameters on clang') -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Acked-by: Jan Beulich <jbeulich@suse.com> -master commit: c20850540ad6a32f4fc17bde9b01c92b0df18bf0 -master date: 2024-02-29 08:21:49 +0100 ---- - xen/arch/x86/include/asm/alternative.h | 36 +++++++++++++++++--------- - 1 file changed, 24 insertions(+), 12 deletions(-) - -diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h -index bcb1dc94f4..fa04481316 100644 ---- a/xen/arch/x86/include/asm/alternative.h -+++ b/xen/arch/x86/include/asm/alternative.h -@@ -253,21 +253,24 @@ extern void alternative_branches(void); - }) - - #define alternative_vcall1(func, arg) ({ \ -- ALT_CALL_ARG(arg, 1); \ -+ typeof(arg) v1_ = (arg); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_NO_ARG2; \ - (void)sizeof(func(arg)); \ - (void)alternative_callN(1, int, func); \ - }) - - #define alternative_call1(func, arg) ({ \ -- ALT_CALL_ARG(arg, 1); \ -+ typeof(arg) v1_ = (arg); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_NO_ARG2; \ - alternative_callN(1, typeof(func(arg)), func); \ - }) - - #define alternative_vcall2(func, arg1, arg2) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_NO_ARG3; \ - (void)sizeof(func(arg1, arg2)); \ -@@ -275,17 +278,19 @@ extern void alternative_branches(void); - }) - - #define alternative_call2(func, arg1, arg2) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_NO_ARG3; \ - alternative_callN(2, typeof(func(arg1, arg2)), func); \ - }) - - #define alternative_vcall3(func, arg1, arg2, arg3) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ - typeof(arg3) v3_ = (arg3); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_ARG(v3_, 3); \ - ALT_CALL_NO_ARG4; \ -@@ -294,9 +299,10 @@ extern void alternative_branches(void); - }) - - #define alternative_call3(func, arg1, arg2, arg3) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ - typeof(arg3) v3_ = (arg3); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_ARG(v3_, 3); \ - ALT_CALL_NO_ARG4; \ -@@ -305,10 +311,11 @@ extern void alternative_branches(void); - }) - - #define alternative_vcall4(func, arg1, arg2, arg3, arg4) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ - typeof(arg3) v3_ = (arg3); \ - typeof(arg4) v4_ = (arg4); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_ARG(v3_, 3); \ - ALT_CALL_ARG(v4_, 4); \ -@@ -318,10 +325,11 @@ extern void alternative_branches(void); - }) - - #define alternative_call4(func, arg1, arg2, arg3, arg4) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ - typeof(arg3) v3_ = (arg3); \ - typeof(arg4) v4_ = (arg4); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_ARG(v3_, 3); \ - ALT_CALL_ARG(v4_, 4); \ -@@ -332,11 +340,12 @@ extern void alternative_branches(void); - }) - - #define alternative_vcall5(func, arg1, arg2, arg3, arg4, arg5) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ - typeof(arg3) v3_ = (arg3); \ - typeof(arg4) v4_ = (arg4); \ - typeof(arg5) v5_ = (arg5); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_ARG(v3_, 3); \ - ALT_CALL_ARG(v4_, 4); \ -@@ -347,11 +356,12 @@ extern void alternative_branches(void); - }) - - #define alternative_call5(func, arg1, arg2, arg3, arg4, arg5) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ - typeof(arg3) v3_ = (arg3); \ - typeof(arg4) v4_ = (arg4); \ - typeof(arg5) v5_ = (arg5); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_ARG(v3_, 3); \ - ALT_CALL_ARG(v4_, 4); \ -@@ -363,12 +373,13 @@ extern void alternative_branches(void); - }) - - #define alternative_vcall6(func, arg1, arg2, arg3, arg4, arg5, arg6) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ - typeof(arg3) v3_ = (arg3); \ - typeof(arg4) v4_ = (arg4); \ - typeof(arg5) v5_ = (arg5); \ - typeof(arg6) v6_ = (arg6); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_ARG(v3_, 3); \ - ALT_CALL_ARG(v4_, 4); \ -@@ -379,12 +390,13 @@ extern void alternative_branches(void); - }) - - #define alternative_call6(func, arg1, arg2, arg3, arg4, arg5, arg6) ({ \ -+ typeof(arg1) v1_ = (arg1); \ - typeof(arg2) v2_ = (arg2); \ - typeof(arg3) v3_ = (arg3); \ - typeof(arg4) v4_ = (arg4); \ - typeof(arg5) v5_ = (arg5); \ - typeof(arg6) v6_ = (arg6); \ -- ALT_CALL_ARG(arg1, 1); \ -+ ALT_CALL_ARG(v1_, 1); \ - ALT_CALL_ARG(v2_, 2); \ - ALT_CALL_ARG(v3_, 3); \ - ALT_CALL_ARG(v4_, 4); \ --- -2.44.0 - diff --git a/0035-x86-irq-forward-pending-interrupts-to-new-destinatio.patch b/0035-x86-irq-forward-pending-interrupts-to-new-destinatio.patch new file mode 100644 index 0000000..f05b09e --- /dev/null +++ b/0035-x86-irq-forward-pending-interrupts-to-new-destinatio.patch @@ -0,0 +1,143 @@ +From e95d30f9e5eed0c5d9dbf72d4cc3ae373152ab10 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> +Date: Wed, 26 Jun 2024 13:44:08 +0200 +Subject: [PATCH 35/56] x86/irq: forward pending interrupts to new destination + in fixup_irqs() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +fixup_irqs() is used to evacuate interrupts from to be offlined CPUs. Given +the CPU is to become offline, the normal migration logic used by Xen where the +vector in the previous target(s) is left configured until the interrupt is +received on the new destination is not suitable. + +Instead attempt to do as much as possible in order to prevent loosing +interrupts. If fixup_irqs() is called from the CPU to be offlined (as is +currently the case for CPU hot unplug) attempt to forward pending vectors when +interrupts that target the current CPU are migrated to a different destination. + +Additionally, for interrupts that have already been moved from the current CPU +prior to the call to fixup_irqs() but that haven't been delivered to the new +destination (iow: interrupts with move_in_progress set and the current CPU set +in ->arch.old_cpu_mask) also check whether the previous vector is pending and +forward it to the new destination. + +This allows us to remove the window with interrupts enabled at the bottom of +fixup_irqs(). Such window wasn't safe anyway: references to the CPU to become +offline are removed from interrupts masks, but the per-CPU vector_irq[] array +is not updated to reflect those changes (as the CPU is going offline anyway). + +Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +master commit: e2bb28d621584fce15c907002ddc7c6772644b64 +master date: 2024-06-20 12:09:32 +0200 +--- + xen/arch/x86/include/asm/apic.h | 5 ++++ + xen/arch/x86/irq.c | 46 ++++++++++++++++++++++++++++----- + 2 files changed, 45 insertions(+), 6 deletions(-) + +diff --git a/xen/arch/x86/include/asm/apic.h b/xen/arch/x86/include/asm/apic.h +index 7625c0ecd6..ad8d7cc054 100644 +--- a/xen/arch/x86/include/asm/apic.h ++++ b/xen/arch/x86/include/asm/apic.h +@@ -145,6 +145,11 @@ static __inline bool_t apic_isr_read(u8 vector) + (vector & 0x1f)) & 1; + } + ++static inline bool apic_irr_read(unsigned int vector) ++{ ++ return apic_read(APIC_IRR + (vector / 32 * 0x10)) & (1U << (vector % 32)); ++} ++ + static __inline u32 get_apic_id(void) /* Get the physical APIC id */ + { + u32 id = apic_read(APIC_ID); +diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c +index 13ef61a5b7..290f8d26e7 100644 +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -2604,7 +2604,7 @@ void fixup_irqs(const cpumask_t *mask, bool verbose) + + for ( irq = 0; irq < nr_irqs; irq++ ) + { +- bool break_affinity = false, set_affinity = true; ++ bool break_affinity = false, set_affinity = true, check_irr = false; + unsigned int vector, cpu = smp_processor_id(); + cpumask_t *affinity = this_cpu(scratch_cpumask); + +@@ -2657,6 +2657,25 @@ void fixup_irqs(const cpumask_t *mask, bool verbose) + !cpu_online(cpu) && + cpumask_test_cpu(cpu, desc->arch.old_cpu_mask) ) + { ++ /* ++ * This to be offlined CPU was the target of an interrupt that's ++ * been moved, and the new destination target hasn't yet ++ * acknowledged any interrupt from it. ++ * ++ * We know the interrupt is configured to target the new CPU at ++ * this point, so we can check IRR for any pending vectors and ++ * forward them to the new destination. ++ * ++ * Note that for the other case of an interrupt movement being in ++ * progress (move_cleanup_count being non-zero) we know the new ++ * destination has already acked at least one interrupt from this ++ * source, and hence there's no need to forward any stale ++ * interrupts. ++ */ ++ if ( apic_irr_read(desc->arch.old_vector) ) ++ send_IPI_mask(cpumask_of(cpumask_any(desc->arch.cpu_mask)), ++ desc->arch.vector); ++ + /* + * This CPU is going offline, remove it from ->arch.old_cpu_mask + * and possibly release the old vector if the old mask becomes +@@ -2697,6 +2716,14 @@ void fixup_irqs(const cpumask_t *mask, bool verbose) + if ( desc->handler->disable ) + desc->handler->disable(desc); + ++ /* ++ * If the current CPU is going offline and is (one of) the target(s) of ++ * the interrupt, signal to check whether there are any pending vectors ++ * to be handled in the local APIC after the interrupt has been moved. ++ */ ++ if ( !cpu_online(cpu) && cpumask_test_cpu(cpu, desc->arch.cpu_mask) ) ++ check_irr = true; ++ + if ( desc->handler->set_affinity ) + desc->handler->set_affinity(desc, affinity); + else if ( !(warned++) ) +@@ -2707,6 +2734,18 @@ void fixup_irqs(const cpumask_t *mask, bool verbose) + + cpumask_copy(affinity, desc->affinity); + ++ if ( check_irr && apic_irr_read(vector) ) ++ /* ++ * Forward pending interrupt to the new destination, this CPU is ++ * going offline and otherwise the interrupt would be lost. ++ * ++ * Do the IRR check as late as possible before releasing the irq ++ * desc in order for any in-flight interrupts to be delivered to ++ * the lapic. ++ */ ++ send_IPI_mask(cpumask_of(cpumask_any(desc->arch.cpu_mask)), ++ desc->arch.vector); ++ + spin_unlock(&desc->lock); + + if ( !verbose ) +@@ -2718,11 +2757,6 @@ void fixup_irqs(const cpumask_t *mask, bool verbose) + printk("Broke affinity for IRQ%u, new: %*pb\n", + irq, CPUMASK_PR(affinity)); + } +- +- /* That doesn't seem sufficient. Give it 1ms. */ +- local_irq_enable(); +- mdelay(1); +- local_irq_disable(); + } + + void fixup_eoi(void) +-- +2.45.2 + diff --git a/0036-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch b/0036-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch deleted file mode 100644 index b91ff52..0000000 --- a/0036-x86-cpu-policy-Allow-for-levelling-of-VERW-side-effe.patch +++ /dev/null @@ -1,102 +0,0 @@ -From 54dacb5c02cba4676879ed077765734326b78e39 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 5 Mar 2024 12:01:22 +0100 -Subject: [PATCH 36/67] x86/cpu-policy: Allow for levelling of VERW side - effects -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -MD_CLEAR and FB_CLEAR need OR-ing across a migrate pool. Allow this, by -having them unconditinally set in max, with the host values reflected in -default. Annotate the bits as having special properies. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -master commit: de17162cafd27f2865a3102a2ec0f386a02ed03d -master date: 2024-03-01 20:14:19 +0000 ---- - xen/arch/x86/cpu-policy.c | 24 +++++++++++++++++++++ - xen/arch/x86/include/asm/cpufeature.h | 1 + - xen/include/public/arch-x86/cpufeatureset.h | 4 ++-- - 3 files changed, 27 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index f0f2c8a1c0..7b875a7221 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -435,6 +435,16 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs) - __set_bit(X86_FEATURE_RSBA, fs); - __set_bit(X86_FEATURE_RRSBA, fs); - -+ /* -+ * These bits indicate that the VERW instruction may have gained -+ * scrubbing side effects. With pooling, they mean "you might migrate -+ * somewhere where scrubbing is necessary", and may need exposing on -+ * unaffected hardware. This is fine, because the VERW instruction -+ * has been around since the 286. -+ */ -+ __set_bit(X86_FEATURE_MD_CLEAR, fs); -+ __set_bit(X86_FEATURE_FB_CLEAR, fs); -+ - /* - * The Gather Data Sampling microcode mitigation (August 2023) has an - * adverse performance impact on the CLWB instruction on SKX/CLX/CPX. -@@ -469,6 +479,20 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs) - cpu_has_rdrand && !is_forced_cpu_cap(X86_FEATURE_RDRAND) ) - __clear_bit(X86_FEATURE_RDRAND, fs); - -+ /* -+ * These bits indicate that the VERW instruction may have gained -+ * scrubbing side effects. The max policy has them set for migration -+ * reasons, so reset the default policy back to the host values in -+ * case we're unaffected. -+ */ -+ __clear_bit(X86_FEATURE_MD_CLEAR, fs); -+ if ( cpu_has_md_clear ) -+ __set_bit(X86_FEATURE_MD_CLEAR, fs); -+ -+ __clear_bit(X86_FEATURE_FB_CLEAR, fs); -+ if ( cpu_has_fb_clear ) -+ __set_bit(X86_FEATURE_FB_CLEAR, fs); -+ - /* - * The Gather Data Sampling microcode mitigation (August 2023) has an - * adverse performance impact on the CLWB instruction on SKX/CLX/CPX. -diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h -index 9ef7756593..ec824e8954 100644 ---- a/xen/arch/x86/include/asm/cpufeature.h -+++ b/xen/arch/x86/include/asm/cpufeature.h -@@ -136,6 +136,7 @@ - #define cpu_has_avx512_4fmaps boot_cpu_has(X86_FEATURE_AVX512_4FMAPS) - #define cpu_has_avx512_vp2intersect boot_cpu_has(X86_FEATURE_AVX512_VP2INTERSECT) - #define cpu_has_srbds_ctrl boot_cpu_has(X86_FEATURE_SRBDS_CTRL) -+#define cpu_has_md_clear boot_cpu_has(X86_FEATURE_MD_CLEAR) - #define cpu_has_rtm_always_abort boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) - #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) - #define cpu_has_serialize boot_cpu_has(X86_FEATURE_SERIALIZE) -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index 94d211df2f..aec1407613 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -260,7 +260,7 @@ XEN_CPUFEATURE(AVX512_4FMAPS, 9*32+ 3) /*A AVX512 Multiply Accumulation Single - XEN_CPUFEATURE(FSRM, 9*32+ 4) /*A Fast Short REP MOVS */ - XEN_CPUFEATURE(AVX512_VP2INTERSECT, 9*32+8) /*a VP2INTERSECT{D,Q} insns */ - XEN_CPUFEATURE(SRBDS_CTRL, 9*32+ 9) /* MSR_MCU_OPT_CTRL and RNGDS_MITG_DIS. */ --XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*A VERW clears microarchitectural buffers */ -+XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*!A VERW clears microarchitectural buffers */ - XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in microcode. */ - XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */ - XEN_CPUFEATURE(SERIALIZE, 9*32+14) /*A SERIALIZE insn */ -@@ -321,7 +321,7 @@ XEN_CPUFEATURE(DOITM, 16*32+12) /* Data Operand Invariant Timing - XEN_CPUFEATURE(SBDR_SSDP_NO, 16*32+13) /*A No Shared Buffer Data Read or Sideband Stale Data Propagation */ - XEN_CPUFEATURE(FBSDP_NO, 16*32+14) /*A No Fill Buffer Stale Data Propagation */ - XEN_CPUFEATURE(PSDP_NO, 16*32+15) /*A No Primary Stale Data Propagation */ --XEN_CPUFEATURE(FB_CLEAR, 16*32+17) /*A Fill Buffers cleared by VERW */ -+XEN_CPUFEATURE(FB_CLEAR, 16*32+17) /*!A Fill Buffers cleared by VERW */ - XEN_CPUFEATURE(FB_CLEAR_CTRL, 16*32+18) /* MSR_OPT_CPU_CTRL.FB_CLEAR_DIS */ - XEN_CPUFEATURE(RRSBA, 16*32+19) /*! Restricted RSB Alternative */ - XEN_CPUFEATURE(BHI_NO, 16*32+20) /*A No Branch History Injection */ --- -2.44.0 - diff --git a/0036-x86-re-run-exception-from-stub-recovery-selftests-wi.patch b/0036-x86-re-run-exception-from-stub-recovery-selftests-wi.patch new file mode 100644 index 0000000..a552e9c --- /dev/null +++ b/0036-x86-re-run-exception-from-stub-recovery-selftests-wi.patch @@ -0,0 +1,84 @@ +From 5ac3cbbf83e1f955aeaf5d0f503099f5249b5c25 Mon Sep 17 00:00:00 2001 +From: Jan Beulich <jbeulich@suse.com> +Date: Thu, 4 Jul 2024 14:06:19 +0200 +Subject: [PATCH 36/56] x86: re-run exception-from-stub recovery selftests with + CET-SS enabled + +On the BSP, shadow stacks are enabled only relatively late in the +booting process. They in particular aren't active yet when initcalls are +run. Keep the testing there, but invoke that testing a 2nd time when +shadow stacks are active, to make sure we won't regress that case after +addressing XSA-451. + +While touching this code, switch the guard from NDEBUG to CONFIG_DEBUG, +such that IS_ENABLED() can validly be used at the new call site. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> +master commit: cfe3ad67127b86e1b1c06993b86422673a51b050 +master date: 2024-02-27 13:49:52 +0100 +--- + xen/arch/x86/extable.c | 8 +++++--- + xen/arch/x86/include/asm/setup.h | 2 ++ + xen/arch/x86/setup.c | 4 ++++ + 3 files changed, 11 insertions(+), 3 deletions(-) + +diff --git a/xen/arch/x86/extable.c b/xen/arch/x86/extable.c +index 8ffcd346d7..12cc9935d8 100644 +--- a/xen/arch/x86/extable.c ++++ b/xen/arch/x86/extable.c +@@ -128,10 +128,11 @@ search_exception_table(const struct cpu_user_regs *regs, unsigned long *stub_ra) + return 0; + } + +-#ifndef NDEBUG ++#ifdef CONFIG_DEBUG ++#include <asm/setup.h> + #include <asm/traps.h> + +-static int __init cf_check stub_selftest(void) ++int __init cf_check stub_selftest(void) + { + static const struct { + uint8_t opc[8]; +@@ -155,7 +156,8 @@ static int __init cf_check stub_selftest(void) + unsigned int i; + bool fail = false; + +- printk("Running stub recovery selftests...\n"); ++ printk("%s stub recovery selftests...\n", ++ system_state < SYS_STATE_active ? "Running" : "Re-running"); + + for ( i = 0; i < ARRAY_SIZE(tests); ++i ) + { +diff --git a/xen/arch/x86/include/asm/setup.h b/xen/arch/x86/include/asm/setup.h +index 9a460e4db8..14d15048eb 100644 +--- a/xen/arch/x86/include/asm/setup.h ++++ b/xen/arch/x86/include/asm/setup.h +@@ -38,6 +38,8 @@ void *bootstrap_map(const module_t *mod); + + int xen_in_range(unsigned long mfn); + ++int cf_check stub_selftest(void); ++ + extern uint8_t kbd_shift_flags; + + #ifdef NDEBUG +diff --git a/xen/arch/x86/setup.c b/xen/arch/x86/setup.c +index 25017b5d96..f2592c3dc9 100644 +--- a/xen/arch/x86/setup.c ++++ b/xen/arch/x86/setup.c +@@ -738,6 +738,10 @@ static void noreturn init_done(void) + + system_state = SYS_STATE_active; + ++ /* Re-run stub recovery self-tests with CET-SS active. */ ++ if ( IS_ENABLED(CONFIG_DEBUG) && cpu_has_xen_shstk ) ++ stub_selftest(); ++ + domain_unpause_by_systemcontroller(dom0); + + /* MUST be done prior to removing .init data. */ +-- +2.45.2 + diff --git a/0037-hvmloader-PCI-skip-huge-BARs-in-certain-calculations.patch b/0037-hvmloader-PCI-skip-huge-BARs-in-certain-calculations.patch deleted file mode 100644 index a46f913..0000000 --- a/0037-hvmloader-PCI-skip-huge-BARs-in-certain-calculations.patch +++ /dev/null @@ -1,99 +0,0 @@ -From 1e9808227c10717228969e924cab49cad4af6265 Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Tue, 12 Mar 2024 12:08:48 +0100 -Subject: [PATCH 37/67] hvmloader/PCI: skip huge BARs in certain calculations -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -BARs of size 2Gb and up can't possibly fit below 4Gb: Both the bottom of -the lower 2Gb range and the top of the higher 2Gb range have special -purpose. Don't even have them influence whether to (perhaps) relocate -low RAM. - -Reported-by: Neowutran <xen@neowutran.ovh> -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Acked-by: Roger Pau Monné <roger.pau@citrix.com> -master commit: 57acad12a09ffa490e870ebe17596aad858f0191 -master date: 2024-03-06 10:19:29 +0100 ---- - tools/firmware/hvmloader/pci.c | 28 ++++++++++++++++++++-------- - 1 file changed, 20 insertions(+), 8 deletions(-) - -diff --git a/tools/firmware/hvmloader/pci.c b/tools/firmware/hvmloader/pci.c -index 257a6feb61..c3c61ca060 100644 ---- a/tools/firmware/hvmloader/pci.c -+++ b/tools/firmware/hvmloader/pci.c -@@ -33,6 +33,13 @@ uint32_t pci_mem_start = HVM_BELOW_4G_MMIO_START; - const uint32_t pci_mem_end = RESERVED_MEMBASE; - uint64_t pci_hi_mem_start = 0, pci_hi_mem_end = 0; - -+/* -+ * BARs larger than this value are put in 64-bit space unconditionally. That -+ * is, such BARs also don't play into the determination of how big the lowmem -+ * MMIO hole needs to be. -+ */ -+#define BAR_RELOC_THRESH GB(1) -+ - enum virtual_vga virtual_vga = VGA_none; - unsigned long igd_opregion_pgbase = 0; - -@@ -286,9 +293,11 @@ void pci_setup(void) - bars[i].bar_reg = bar_reg; - bars[i].bar_sz = bar_sz; - -- if ( ((bar_data & PCI_BASE_ADDRESS_SPACE) == -- PCI_BASE_ADDRESS_SPACE_MEMORY) || -- (bar_reg == PCI_ROM_ADDRESS) ) -+ if ( is_64bar && bar_sz > BAR_RELOC_THRESH ) -+ bar64_relocate = 1; -+ else if ( ((bar_data & PCI_BASE_ADDRESS_SPACE) == -+ PCI_BASE_ADDRESS_SPACE_MEMORY) || -+ (bar_reg == PCI_ROM_ADDRESS) ) - mmio_total += bar_sz; - - nr_bars++; -@@ -367,7 +376,7 @@ void pci_setup(void) - pci_mem_start = hvm_info->low_mem_pgend << PAGE_SHIFT; - } - -- if ( mmio_total > (pci_mem_end - pci_mem_start) ) -+ if ( mmio_total > (pci_mem_end - pci_mem_start) || bar64_relocate ) - { - printf("Low MMIO hole not large enough for all devices," - " relocating some BARs to 64-bit\n"); -@@ -430,7 +439,8 @@ void pci_setup(void) - - /* - * Relocate to high memory if the total amount of MMIO needed -- * is more than the low MMIO available. Because devices are -+ * is more than the low MMIO available or BARs bigger than -+ * BAR_RELOC_THRESH are present. Because devices are - * processed in order of bar_sz, this will preferentially - * relocate larger devices to high memory first. - * -@@ -446,8 +456,9 @@ void pci_setup(void) - * the code here assumes it to be.) - * Should either of those two conditions change, this code will break. - */ -- using_64bar = bars[i].is_64bar && bar64_relocate -- && (mmio_total > (mem_resource.max - mem_resource.base)); -+ using_64bar = bars[i].is_64bar && bar64_relocate && -+ (mmio_total > (mem_resource.max - mem_resource.base) || -+ bar_sz > BAR_RELOC_THRESH); - bar_data = pci_readl(devfn, bar_reg); - - if ( (bar_data & PCI_BASE_ADDRESS_SPACE) == -@@ -467,7 +478,8 @@ void pci_setup(void) - resource = &mem_resource; - bar_data &= ~PCI_BASE_ADDRESS_MEM_MASK; - } -- mmio_total -= bar_sz; -+ if ( bar_sz <= BAR_RELOC_THRESH ) -+ mmio_total -= bar_sz; - } - else - { --- -2.44.0 - diff --git a/0037-tools-tests-don-t-let-test-xenstore-write-nodes-exce.patch b/0037-tools-tests-don-t-let-test-xenstore-write-nodes-exce.patch new file mode 100644 index 0000000..cc7e47d --- /dev/null +++ b/0037-tools-tests-don-t-let-test-xenstore-write-nodes-exce.patch @@ -0,0 +1,41 @@ +From 0ebfa35965257343ba3d8377be91ad8512a9c749 Mon Sep 17 00:00:00 2001 +From: Juergen Gross <jgross@suse.com> +Date: Thu, 4 Jul 2024 14:06:54 +0200 +Subject: [PATCH 37/56] tools/tests: don't let test-xenstore write nodes + exceeding default size + +Today test-xenstore will write nodes with 3000 bytes node data. This +size is exceeding the default quota for the allowed node size. While +working in dom0 with C-xenstored, OCAML-xenstored does not like that. + +Use a size of 2000 instead, which is lower than the allowed default +node size of 2048. + +Fixes: 3afc5e4a5b75 ("tools/tests: add xenstore testing framework") +Signed-off-by: Juergen Gross <jgross@suse.com> +Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> +master commit: 642005e310483c490b0725fab4672f2b77fdf2ba +master date: 2024-05-02 18:15:31 +0100 +--- + tools/tests/xenstore/test-xenstore.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tools/tests/xenstore/test-xenstore.c b/tools/tests/xenstore/test-xenstore.c +index d491dac53b..73a7011d21 100644 +--- a/tools/tests/xenstore/test-xenstore.c ++++ b/tools/tests/xenstore/test-xenstore.c +@@ -408,9 +408,9 @@ static int test_ta3_deinit(uintptr_t par) + #define TEST(s, f, p, l) { s, f ## _init, f, f ## _deinit, (uintptr_t)(p), l } + struct test tests[] = { + TEST("read 1", test_read, 1, "Read node with 1 byte data"), +-TEST("read 3000", test_read, 3000, "Read node with 3000 bytes data"), ++TEST("read 2000", test_read, 2000, "Read node with 2000 bytes data"), + TEST("write 1", test_write, 1, "Write node with 1 byte data"), +-TEST("write 3000", test_write, 3000, "Write node with 3000 bytes data"), ++TEST("write 2000", test_write, 2000, "Write node with 2000 bytes data"), + TEST("dir", test_dir, 0, "List directory"), + TEST("rm node", test_rm, 0, "Remove single node"), + TEST("rm dir", test_rm, WRITE_BUFFERS_N, "Remove node with sub-nodes"), +-- +2.45.2 + diff --git a/0038-tools-tests-let-test-xenstore-exit-with-non-0-status.patch b/0038-tools-tests-let-test-xenstore-exit-with-non-0-status.patch new file mode 100644 index 0000000..ee0a497 --- /dev/null +++ b/0038-tools-tests-let-test-xenstore-exit-with-non-0-status.patch @@ -0,0 +1,57 @@ +From 22f623622cc60571be9cccc323a1d17749683667 Mon Sep 17 00:00:00 2001 +From: Juergen Gross <jgross@suse.com> +Date: Thu, 4 Jul 2024 14:07:12 +0200 +Subject: [PATCH 38/56] tools/tests: let test-xenstore exit with non-0 status + in case of error + +In case a test is failing in test-xenstore, let the tool exit with an +exit status other than 0. + +Fix a typo in an error message. + +Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> +Fixes: 3afc5e4a5b75 ("tools/tests: add xenstore testing framework") +Signed-off-by: Juergen Gross <jgross@suse.com> +master commit: 2d4ba205591ba64f31149ae31051678159ee9e11 +master date: 2024-05-02 18:15:46 +0100 +--- + tools/tests/xenstore/test-xenstore.c | 8 ++++---- + 1 file changed, 4 insertions(+), 4 deletions(-) + +diff --git a/tools/tests/xenstore/test-xenstore.c b/tools/tests/xenstore/test-xenstore.c +index 73a7011d21..7a9bd9afb3 100644 +--- a/tools/tests/xenstore/test-xenstore.c ++++ b/tools/tests/xenstore/test-xenstore.c +@@ -506,14 +506,14 @@ int main(int argc, char *argv[]) + stop = time(NULL) + randtime; + srandom((unsigned int)stop); + +- while ( time(NULL) < stop ) ++ while ( time(NULL) < stop && !ret ) + { + t = random() % ARRAY_SIZE(tests); + ret = call_test(tests + t, iters, true); + } + } + else +- for ( t = 0; t < ARRAY_SIZE(tests); t++ ) ++ for ( t = 0; t < ARRAY_SIZE(tests) && !ret; t++ ) + { + if ( !test || !strcmp(test, tests[t].name) ) + ret = call_test(tests + t, iters, false); +@@ -525,10 +525,10 @@ int main(int argc, char *argv[]) + xs_close(xsh); + + if ( ta_loops ) +- printf("Exhaustive transaction retries (%d) occurrred %d times.\n", ++ printf("Exhaustive transaction retries (%d) occurred %d times.\n", + MAX_TA_LOOPS, ta_loops); + +- return 0; ++ return ret ? 3 : 0; + } + + /* +-- +2.45.2 + diff --git a/0038-x86-mm-fix-detection-of-last-L1-entry-in-modify_xen_.patch b/0038-x86-mm-fix-detection-of-last-L1-entry-in-modify_xen_.patch deleted file mode 100644 index 66b4db3..0000000 --- a/0038-x86-mm-fix-detection-of-last-L1-entry-in-modify_xen_.patch +++ /dev/null @@ -1,41 +0,0 @@ -From 1f94117bec55a7b934fed3dfd3529db624eb441f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 12 Mar 2024 12:08:59 +0100 -Subject: [PATCH 38/67] x86/mm: fix detection of last L1 entry in - modify_xen_mappings_lite() -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The current logic to detect when to switch to the next L1 table is incorrectly -using l2_table_offset() in order to notice when the last entry on the current -L1 table has been reached. - -It should instead use l1_table_offset() to check whether the index has wrapped -to point to the first entry, and so the next L1 table should be used. - -Fixes: 8676092a0f16 ('x86/livepatch: Fix livepatch application when CET is active') -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> -master commit: 7c81558208de7858251b62f168a449be84305595 -master date: 2024-03-11 11:09:42 +0000 ---- - xen/arch/x86/mm.c | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index e884a6fdbd..330c4abcd1 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -5963,7 +5963,7 @@ void init_or_livepatch modify_xen_mappings_lite( - - v += 1UL << L1_PAGETABLE_SHIFT; - -- if ( l2_table_offset(v) == 0 ) -+ if ( l1_table_offset(v) == 0 ) - break; - } - --- -2.44.0 - diff --git a/0039-LICENSES-Add-MIT-0-MIT-No-Attribution.patch b/0039-LICENSES-Add-MIT-0-MIT-No-Attribution.patch new file mode 100644 index 0000000..8b2c4ec --- /dev/null +++ b/0039-LICENSES-Add-MIT-0-MIT-No-Attribution.patch @@ -0,0 +1,58 @@ +From 75b4f9474a1aa33a6f9e0986b51c390f9b38ae5a Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Thu, 4 Jul 2024 14:08:11 +0200 +Subject: [PATCH 39/56] LICENSES: Add MIT-0 (MIT No Attribution) + +We are about to import code licensed under MIT-0. It's compatible for us to +use, so identify it as a permitted license. + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Stefano Stabellini <sstabellini@kernel.org> +Acked-by: Christian Lindig <christian.lindig@cloud.com> +master commit: 219cdff3fb7b4a03ab14869584f111e0f623b330 +master date: 2024-05-23 15:04:40 +0100 +--- + LICENSES/MIT-0 | 31 +++++++++++++++++++++++++++++++ + 1 file changed, 31 insertions(+) + create mode 100644 LICENSES/MIT-0 + +diff --git a/LICENSES/MIT-0 b/LICENSES/MIT-0 +new file mode 100644 +index 0000000000..70fb90ee34 +--- /dev/null ++++ b/LICENSES/MIT-0 +@@ -0,0 +1,31 @@ ++Valid-License-Identifier: MIT-0 ++ ++SPDX-URL: https://spdx.org/licenses/MIT-0.html ++ ++Usage-Guide: ++ ++ To use the MIT-0 License put the following SPDX tag/value pair into a ++ comment according to the placement guidelines in the licensing rules ++ documentation: ++ SPDX-License-Identifier: MIT-0 ++ ++License-Text: ++ ++MIT No Attribution ++ ++Copyright <year> <copyright holder> ++ ++Permission is hereby granted, free of charge, to any person obtaining a copy ++of this software and associated documentation files (the "Software"), to deal ++in the Software without restriction, including without limitation the rights ++to use, copy, modify, merge, publish, distribute, sublicense, and/or sell ++copies of the Software, and to permit persons to whom the Software is ++furnished to do so. ++ ++THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR ++IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, ++FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE ++AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER ++LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, ++OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE ++SOFTWARE. +-- +2.45.2 + diff --git a/0039-x86-entry-Introduce-EFRAME_-constants.patch b/0039-x86-entry-Introduce-EFRAME_-constants.patch deleted file mode 100644 index c280286..0000000 --- a/0039-x86-entry-Introduce-EFRAME_-constants.patch +++ /dev/null @@ -1,314 +0,0 @@ -From e691f99f17198906f813b85dcabafe5addb9a57a Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Sat, 27 Jan 2024 17:52:09 +0000 -Subject: [PATCH 39/67] x86/entry: Introduce EFRAME_* constants - -restore_all_guest() does a lot of manipulation of the stack after popping the -GPRs, and uses raw %rsp displacements to do so. Also, almost all entrypaths -use raw %rsp displacements prior to pushing GPRs. - -Provide better mnemonics, to aid readability and reduce the chance of errors -when editing. - -No functional change. The resulting binary is identical. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit 37541208f119a9c552c6c6c3246ea61be0d44035) ---- - xen/arch/x86/x86_64/asm-offsets.c | 17 ++++++++ - xen/arch/x86/x86_64/compat/entry.S | 2 +- - xen/arch/x86/x86_64/entry.S | 70 +++++++++++++++--------------- - 3 files changed, 53 insertions(+), 36 deletions(-) - -diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c -index 287dac101a..31fa63b77f 100644 ---- a/xen/arch/x86/x86_64/asm-offsets.c -+++ b/xen/arch/x86/x86_64/asm-offsets.c -@@ -51,6 +51,23 @@ void __dummy__(void) - OFFSET(UREGS_kernel_sizeof, struct cpu_user_regs, es); - BLANK(); - -+ /* -+ * EFRAME_* is for the entry/exit logic where %rsp is pointing at -+ * UREGS_error_code and GPRs are still/already guest values. -+ */ -+#define OFFSET_EF(sym, mem) \ -+ DEFINE(sym, offsetof(struct cpu_user_regs, mem) - \ -+ offsetof(struct cpu_user_regs, error_code)) -+ -+ OFFSET_EF(EFRAME_entry_vector, entry_vector); -+ OFFSET_EF(EFRAME_rip, rip); -+ OFFSET_EF(EFRAME_cs, cs); -+ OFFSET_EF(EFRAME_eflags, eflags); -+ OFFSET_EF(EFRAME_rsp, rsp); -+ BLANK(); -+ -+#undef OFFSET_EF -+ - OFFSET(VCPU_processor, struct vcpu, processor); - OFFSET(VCPU_domain, struct vcpu, domain); - OFFSET(VCPU_vcpu_info, struct vcpu, vcpu_info); -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index 253bb1688c..7c211314d8 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -15,7 +15,7 @@ ENTRY(entry_int82) - ENDBR64 - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP - pushq $0 -- movl $HYPERCALL_VECTOR, 4(%rsp) -+ movl $HYPERCALL_VECTOR, EFRAME_entry_vector(%rsp) - SAVE_ALL compat=1 /* DPL1 gate, restricted to 32bit PV guests only. */ - - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 585b0c9551..412cbeb3ec 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -190,15 +190,15 @@ restore_all_guest: - SPEC_CTRL_EXIT_TO_PV /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ - - RESTORE_ALL -- testw $TRAP_syscall,4(%rsp) -+ testw $TRAP_syscall, EFRAME_entry_vector(%rsp) - jz iret_exit_to_guest - -- movq 24(%rsp),%r11 # RFLAGS -+ mov EFRAME_eflags(%rsp), %r11 - andq $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), %r11 - orq $X86_EFLAGS_IF,%r11 - - /* Don't use SYSRET path if the return address is not canonical. */ -- movq 8(%rsp),%rcx -+ mov EFRAME_rip(%rsp), %rcx - sarq $47,%rcx - incl %ecx - cmpl $1,%ecx -@@ -213,20 +213,20 @@ restore_all_guest: - ALTERNATIVE "", rag_clrssbsy, X86_FEATURE_XEN_SHSTK - #endif - -- movq 8(%rsp), %rcx # RIP -- cmpw $FLAT_USER_CS32,16(%rsp)# CS -- movq 32(%rsp),%rsp # RSP -+ mov EFRAME_rip(%rsp), %rcx -+ cmpw $FLAT_USER_CS32, EFRAME_cs(%rsp) -+ mov EFRAME_rsp(%rsp), %rsp - je 1f - sysretq - 1: sysretl - - ALIGN - .Lrestore_rcx_iret_exit_to_guest: -- movq 8(%rsp), %rcx # RIP -+ mov EFRAME_rip(%rsp), %rcx - /* No special register assumptions. */ - iret_exit_to_guest: -- andl $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), 24(%rsp) -- orl $X86_EFLAGS_IF,24(%rsp) -+ andl $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), EFRAME_eflags(%rsp) -+ orl $X86_EFLAGS_IF, EFRAME_eflags(%rsp) - addq $8,%rsp - .Lft0: iretq - _ASM_PRE_EXTABLE(.Lft0, handle_exception) -@@ -257,7 +257,7 @@ ENTRY(lstar_enter) - pushq $FLAT_KERNEL_CS64 - pushq %rcx - pushq $0 -- movl $TRAP_syscall, 4(%rsp) -+ movl $TRAP_syscall, EFRAME_entry_vector(%rsp) - SAVE_ALL - - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -@@ -294,7 +294,7 @@ ENTRY(cstar_enter) - pushq $FLAT_USER_CS32 - pushq %rcx - pushq $0 -- movl $TRAP_syscall, 4(%rsp) -+ movl $TRAP_syscall, EFRAME_entry_vector(%rsp) - SAVE_ALL - - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -@@ -335,7 +335,7 @@ GLOBAL(sysenter_eflags_saved) - pushq $3 /* ring 3 null cs */ - pushq $0 /* null rip */ - pushq $0 -- movl $TRAP_syscall, 4(%rsp) -+ movl $TRAP_syscall, EFRAME_entry_vector(%rsp) - SAVE_ALL - - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -@@ -389,7 +389,7 @@ ENTRY(int80_direct_trap) - ENDBR64 - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP - pushq $0 -- movl $0x80, 4(%rsp) -+ movl $0x80, EFRAME_entry_vector(%rsp) - SAVE_ALL - - SPEC_CTRL_ENTRY_FROM_PV /* Req: %rsp=regs/cpuinfo, %rdx=0, Clob: acd */ -@@ -649,7 +649,7 @@ ret_from_intr: - .section .init.text, "ax", @progbits - ENTRY(early_page_fault) - ENDBR64 -- movl $TRAP_page_fault, 4(%rsp) -+ movl $TRAP_page_fault, EFRAME_entry_vector(%rsp) - SAVE_ALL - movq %rsp, %rdi - call do_early_page_fault -@@ -716,7 +716,7 @@ ENTRY(common_interrupt) - - ENTRY(page_fault) - ENDBR64 -- movl $TRAP_page_fault,4(%rsp) -+ movl $TRAP_page_fault, EFRAME_entry_vector(%rsp) - /* No special register assumptions. */ - GLOBAL(handle_exception) - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP -@@ -892,90 +892,90 @@ FATAL_exception_with_ints_disabled: - ENTRY(divide_error) - ENDBR64 - pushq $0 -- movl $TRAP_divide_error,4(%rsp) -+ movl $TRAP_divide_error, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(coprocessor_error) - ENDBR64 - pushq $0 -- movl $TRAP_copro_error,4(%rsp) -+ movl $TRAP_copro_error, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(simd_coprocessor_error) - ENDBR64 - pushq $0 -- movl $TRAP_simd_error,4(%rsp) -+ movl $TRAP_simd_error, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(device_not_available) - ENDBR64 - pushq $0 -- movl $TRAP_no_device,4(%rsp) -+ movl $TRAP_no_device, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(debug) - ENDBR64 - pushq $0 -- movl $TRAP_debug,4(%rsp) -+ movl $TRAP_debug, EFRAME_entry_vector(%rsp) - jmp handle_ist_exception - - ENTRY(int3) - ENDBR64 - pushq $0 -- movl $TRAP_int3,4(%rsp) -+ movl $TRAP_int3, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(overflow) - ENDBR64 - pushq $0 -- movl $TRAP_overflow,4(%rsp) -+ movl $TRAP_overflow, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(bounds) - ENDBR64 - pushq $0 -- movl $TRAP_bounds,4(%rsp) -+ movl $TRAP_bounds, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(invalid_op) - ENDBR64 - pushq $0 -- movl $TRAP_invalid_op,4(%rsp) -+ movl $TRAP_invalid_op, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(invalid_TSS) - ENDBR64 -- movl $TRAP_invalid_tss,4(%rsp) -+ movl $TRAP_invalid_tss, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(segment_not_present) - ENDBR64 -- movl $TRAP_no_segment,4(%rsp) -+ movl $TRAP_no_segment, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(stack_segment) - ENDBR64 -- movl $TRAP_stack_error,4(%rsp) -+ movl $TRAP_stack_error, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(general_protection) - ENDBR64 -- movl $TRAP_gp_fault,4(%rsp) -+ movl $TRAP_gp_fault, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(alignment_check) - ENDBR64 -- movl $TRAP_alignment_check,4(%rsp) -+ movl $TRAP_alignment_check, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(entry_CP) - ENDBR64 -- movl $X86_EXC_CP, 4(%rsp) -+ movl $X86_EXC_CP, EFRAME_entry_vector(%rsp) - jmp handle_exception - - ENTRY(double_fault) - ENDBR64 -- movl $TRAP_double_fault,4(%rsp) -+ movl $TRAP_double_fault, EFRAME_entry_vector(%rsp) - /* Set AC to reduce chance of further SMAP faults */ - ALTERNATIVE "", stac, X86_FEATURE_XEN_SMAP - SAVE_ALL -@@ -1001,7 +1001,7 @@ ENTRY(double_fault) - ENTRY(nmi) - ENDBR64 - pushq $0 -- movl $TRAP_nmi,4(%rsp) -+ movl $TRAP_nmi, EFRAME_entry_vector(%rsp) - handle_ist_exception: - ALTERNATIVE "", clac, X86_FEATURE_XEN_SMAP - SAVE_ALL -@@ -1134,7 +1134,7 @@ handle_ist_exception: - ENTRY(machine_check) - ENDBR64 - pushq $0 -- movl $TRAP_machine_check,4(%rsp) -+ movl $TRAP_machine_check, EFRAME_entry_vector(%rsp) - jmp handle_ist_exception - - /* No op trap handler. Required for kexec crash path. */ -@@ -1171,7 +1171,7 @@ autogen_stubs: /* Automatically generated stubs. */ - 1: - ENDBR64 - pushq $0 -- movb $vec,4(%rsp) -+ movb $vec, EFRAME_entry_vector(%rsp) - jmp common_interrupt - - entrypoint 1b -@@ -1185,7 +1185,7 @@ autogen_stubs: /* Automatically generated stubs. */ - test $8,%spl /* 64bit exception frames are 16 byte aligned, but the word */ - jz 2f /* size is 8 bytes. Check whether the processor gave us an */ - pushq $0 /* error code, and insert an empty one if not. */ --2: movb $vec,4(%rsp) -+2: movb $vec, EFRAME_entry_vector(%rsp) - jmp handle_exception - - entrypoint 1b --- -2.44.0 - diff --git a/0040-tools-Import-stand-alone-sd_notify-implementation-fr.patch b/0040-tools-Import-stand-alone-sd_notify-implementation-fr.patch new file mode 100644 index 0000000..990158d --- /dev/null +++ b/0040-tools-Import-stand-alone-sd_notify-implementation-fr.patch @@ -0,0 +1,130 @@ +From 1743102a92479834c8e17b20697129e05b7c8313 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Thu, 4 Jul 2024 14:10:10 +0200 +Subject: [PATCH 40/56] tools: Import stand-alone sd_notify() implementation + from systemd + +... in order to avoid linking against the whole of libsystemd. + +Only minimal changes to the upstream copy, to function as a drop-in +replacement for sd_notify() and as a header-only library. + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Juergen Gross <jgross@suse.com> +Acked-by: Christian Lindig <christian.lindig@cloud.com> +master commit: 78510f3a1522f2856330ffa429e0e35f8aab4277 +master date: 2024-05-23 15:04:40 +0100 +master commit: 78510f3a1522f2856330ffa429e0e35f8aab4277 +master date: 2024-05-23 15:04:40 +0100 +--- + tools/include/xen-sd-notify.h | 98 +++++++++++++++++++++++++++++++++++ + 1 file changed, 98 insertions(+) + create mode 100644 tools/include/xen-sd-notify.h + +diff --git a/tools/include/xen-sd-notify.h b/tools/include/xen-sd-notify.h +new file mode 100644 +index 0000000000..28c9b20f15 +--- /dev/null ++++ b/tools/include/xen-sd-notify.h +@@ -0,0 +1,98 @@ ++/* SPDX-License-Identifier: MIT-0 */ ++ ++/* ++ * Implement the systemd notify protocol without external dependencies. ++ * Supports both readiness notification on startup and on reloading, ++ * according to the protocol defined at: ++ * https://www.freedesktop.org/software/systemd/man/latest/sd_notify.html ++ * This protocol is guaranteed to be stable as per: ++ * https://systemd.io/PORTABILITY_AND_STABILITY/ ++ * ++ * Differences from the upstream copy: ++ * - Rename/rework as a drop-in replacement for systemd/sd-daemon.h ++ * - Only take the subset Xen cares about ++ * - Respect -Wdeclaration-after-statement ++ */ ++ ++#ifndef XEN_SD_NOTIFY ++#define XEN_SD_NOTIFY ++ ++#include <errno.h> ++#include <stddef.h> ++#include <stdlib.h> ++#include <sys/socket.h> ++#include <sys/un.h> ++#include <unistd.h> ++ ++static inline void xen_sd_closep(int *fd) { ++ if (!fd || *fd < 0) ++ return; ++ ++ close(*fd); ++ *fd = -1; ++} ++ ++static inline int xen_sd_notify(const char *message) { ++ union sockaddr_union { ++ struct sockaddr sa; ++ struct sockaddr_un sun; ++ } socket_addr = { ++ .sun.sun_family = AF_UNIX, ++ }; ++ size_t path_length, message_length; ++ ssize_t written; ++ const char *socket_path; ++ int __attribute__((cleanup(xen_sd_closep))) fd = -1; ++ ++ /* Verify the argument first */ ++ if (!message) ++ return -EINVAL; ++ ++ message_length = strlen(message); ++ if (message_length == 0) ++ return -EINVAL; ++ ++ /* If the variable is not set, the protocol is a noop */ ++ socket_path = getenv("NOTIFY_SOCKET"); ++ if (!socket_path) ++ return 0; /* Not set? Nothing to do */ ++ ++ /* Only AF_UNIX is supported, with path or abstract sockets */ ++ if (socket_path[0] != '/' && socket_path[0] != '@') ++ return -EAFNOSUPPORT; ++ ++ path_length = strlen(socket_path); ++ /* Ensure there is room for NUL byte */ ++ if (path_length >= sizeof(socket_addr.sun.sun_path)) ++ return -E2BIG; ++ ++ memcpy(socket_addr.sun.sun_path, socket_path, path_length); ++ ++ /* Support for abstract socket */ ++ if (socket_addr.sun.sun_path[0] == '@') ++ socket_addr.sun.sun_path[0] = 0; ++ ++ fd = socket(AF_UNIX, SOCK_DGRAM|SOCK_CLOEXEC, 0); ++ if (fd < 0) ++ return -errno; ++ ++ if (connect(fd, &socket_addr.sa, offsetof(struct sockaddr_un, sun_path) + path_length) != 0) ++ return -errno; ++ ++ written = write(fd, message, message_length); ++ if (written != (ssize_t) message_length) ++ return written < 0 ? -errno : -EPROTO; ++ ++ return 1; /* Notified! */ ++} ++ ++static inline int sd_notify(int unset_environment, const char *message) { ++ int r = xen_sd_notify(message); ++ ++ if (unset_environment) ++ unsetenv("NOTIFY_SOCKET"); ++ ++ return r; ++} ++ ++#endif /* XEN_SD_NOTIFY */ +-- +2.45.2 + diff --git a/0040-x86-Resync-intel-family.h-from-Linux.patch b/0040-x86-Resync-intel-family.h-from-Linux.patch deleted file mode 100644 index 84e0304..0000000 --- a/0040-x86-Resync-intel-family.h-from-Linux.patch +++ /dev/null @@ -1,98 +0,0 @@ -From abc43cf5a6579f1aa0decf0a2349cdd2d2473117 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 27 Feb 2024 16:07:39 +0000 -Subject: [PATCH 40/67] x86: Resync intel-family.h from Linux - -From v6.8-rc6 - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Acked-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit 195e75371b13c4f7ecdf7b5c50aed0d02f2d7ce8) ---- - xen/arch/x86/include/asm/intel-family.h | 38 ++++++++++++++++++++++--- - 1 file changed, 34 insertions(+), 4 deletions(-) - -diff --git a/xen/arch/x86/include/asm/intel-family.h b/xen/arch/x86/include/asm/intel-family.h -index ffc49151be..b65e9c46b9 100644 ---- a/xen/arch/x86/include/asm/intel-family.h -+++ b/xen/arch/x86/include/asm/intel-family.h -@@ -26,6 +26,9 @@ - * _G - parts with extra graphics on - * _X - regular server parts - * _D - micro server parts -+ * _N,_P - other mobile parts -+ * _H - premium mobile parts -+ * _S - other client parts - * - * Historical OPTDIFFs: - * -@@ -37,6 +40,9 @@ - * their own names :-( - */ - -+/* Wildcard match for FAM6 so X86_MATCH_INTEL_FAM6_MODEL(ANY) works */ -+#define INTEL_FAM6_ANY X86_MODEL_ANY -+ - #define INTEL_FAM6_CORE_YONAH 0x0E - - #define INTEL_FAM6_CORE2_MEROM 0x0F -@@ -93,8 +99,6 @@ - #define INTEL_FAM6_ICELAKE_L 0x7E /* Sunny Cove */ - #define INTEL_FAM6_ICELAKE_NNPI 0x9D /* Sunny Cove */ - --#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */ -- - #define INTEL_FAM6_ROCKETLAKE 0xA7 /* Cypress Cove */ - - #define INTEL_FAM6_TIGERLAKE_L 0x8C /* Willow Cove */ -@@ -102,12 +106,31 @@ - - #define INTEL_FAM6_SAPPHIRERAPIDS_X 0x8F /* Golden Cove */ - -+#define INTEL_FAM6_EMERALDRAPIDS_X 0xCF -+ -+#define INTEL_FAM6_GRANITERAPIDS_X 0xAD -+#define INTEL_FAM6_GRANITERAPIDS_D 0xAE -+ -+/* "Hybrid" Processors (P-Core/E-Core) */ -+ -+#define INTEL_FAM6_LAKEFIELD 0x8A /* Sunny Cove / Tremont */ -+ - #define INTEL_FAM6_ALDERLAKE 0x97 /* Golden Cove / Gracemont */ - #define INTEL_FAM6_ALDERLAKE_L 0x9A /* Golden Cove / Gracemont */ - --#define INTEL_FAM6_RAPTORLAKE 0xB7 -+#define INTEL_FAM6_RAPTORLAKE 0xB7 /* Raptor Cove / Enhanced Gracemont */ -+#define INTEL_FAM6_RAPTORLAKE_P 0xBA -+#define INTEL_FAM6_RAPTORLAKE_S 0xBF -+ -+#define INTEL_FAM6_METEORLAKE 0xAC -+#define INTEL_FAM6_METEORLAKE_L 0xAA -+ -+#define INTEL_FAM6_ARROWLAKE_H 0xC5 -+#define INTEL_FAM6_ARROWLAKE 0xC6 -+ -+#define INTEL_FAM6_LUNARLAKE_M 0xBD - --/* "Small Core" Processors (Atom) */ -+/* "Small Core" Processors (Atom/E-Core) */ - - #define INTEL_FAM6_ATOM_BONNELL 0x1C /* Diamondville, Pineview */ - #define INTEL_FAM6_ATOM_BONNELL_MID 0x26 /* Silverthorne, Lincroft */ -@@ -134,6 +157,13 @@ - #define INTEL_FAM6_ATOM_TREMONT 0x96 /* Elkhart Lake */ - #define INTEL_FAM6_ATOM_TREMONT_L 0x9C /* Jasper Lake */ - -+#define INTEL_FAM6_ATOM_GRACEMONT 0xBE /* Alderlake N */ -+ -+#define INTEL_FAM6_ATOM_CRESTMONT_X 0xAF /* Sierra Forest */ -+#define INTEL_FAM6_ATOM_CRESTMONT 0xB6 /* Grand Ridge */ -+ -+#define INTEL_FAM6_ATOM_DARKMONT_X 0xDD /* Clearwater Forest */ -+ - /* Xeon Phi */ - - #define INTEL_FAM6_XEON_PHI_KNL 0x57 /* Knights Landing */ --- -2.44.0 - diff --git a/0041-tools-c-o-xenstored-Don-t-link-against-libsystemd.patch b/0041-tools-c-o-xenstored-Don-t-link-against-libsystemd.patch new file mode 100644 index 0000000..5bf3f98 --- /dev/null +++ b/0041-tools-c-o-xenstored-Don-t-link-against-libsystemd.patch @@ -0,0 +1,87 @@ +From 77cf215157d267a7776f3c4ec32e89064dcd84cd Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Thu, 4 Jul 2024 14:10:29 +0200 +Subject: [PATCH 41/56] tools/{c,o}xenstored: Don't link against libsystemd + +Use the local freestanding wrapper instead. + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Juergen Gross <jgross@suse.com> +Acked-by: Christian Lindig <christian.lindig@cloud.com> +master commit: caf864482689a5dd6a945759b6372bb260d49665 +master date: 2024-05-23 15:04:40 +0100 +--- + tools/ocaml/xenstored/Makefile | 3 +-- + tools/ocaml/xenstored/systemd_stubs.c | 2 +- + tools/xenstored/Makefile | 5 ----- + tools/xenstored/core.c | 4 ++-- + 4 files changed, 4 insertions(+), 10 deletions(-) + +diff --git a/tools/ocaml/xenstored/Makefile b/tools/ocaml/xenstored/Makefile +index e8aaecf2e6..fa45305d8c 100644 +--- a/tools/ocaml/xenstored/Makefile ++++ b/tools/ocaml/xenstored/Makefile +@@ -4,8 +4,7 @@ include $(OCAML_TOPLEVEL)/common.make + + # Include configure output (config.h) + CFLAGS += -include $(XEN_ROOT)/tools/config.h +-CFLAGS-$(CONFIG_SYSTEMD) += $(SYSTEMD_CFLAGS) +-LDFLAGS-$(CONFIG_SYSTEMD) += $(SYSTEMD_LIBS) ++CFLAGS-$(CONFIG_SYSTEMD) += $(CFLAGS_xeninclude) + + CFLAGS += $(CFLAGS-y) + CFLAGS += $(APPEND_CFLAGS) +diff --git a/tools/ocaml/xenstored/systemd_stubs.c b/tools/ocaml/xenstored/systemd_stubs.c +index f4c875075a..7dbbdd35bf 100644 +--- a/tools/ocaml/xenstored/systemd_stubs.c ++++ b/tools/ocaml/xenstored/systemd_stubs.c +@@ -25,7 +25,7 @@ + + #if defined(HAVE_SYSTEMD) + +-#include <systemd/sd-daemon.h> ++#include <xen-sd-notify.h> + + CAMLprim value ocaml_sd_notify_ready(value ignore) + { +diff --git a/tools/xenstored/Makefile b/tools/xenstored/Makefile +index e0897ed1ba..09adfe1d50 100644 +--- a/tools/xenstored/Makefile ++++ b/tools/xenstored/Makefile +@@ -9,11 +9,6 @@ xenstored: LDLIBS += $(LDLIBS_libxenctrl) + xenstored: LDLIBS += -lrt + xenstored: LDLIBS += $(SOCKET_LIBS) + +-ifeq ($(CONFIG_SYSTEMD),y) +-$(XENSTORED_OBJS-y): CFLAGS += $(SYSTEMD_CFLAGS) +-xenstored: LDLIBS += $(SYSTEMD_LIBS) +-endif +- + TARGETS := xenstored + + .PHONY: all +diff --git a/tools/xenstored/core.c b/tools/xenstored/core.c +index edd07711db..dfe98e7bfc 100644 +--- a/tools/xenstored/core.c ++++ b/tools/xenstored/core.c +@@ -61,7 +61,7 @@ + #endif + + #if defined(XEN_SYSTEMD_ENABLED) +-#include <systemd/sd-daemon.h> ++#include <xen-sd-notify.h> + #endif + + extern xenevtchn_handle *xce_handle; /* in domain.c */ +@@ -3000,7 +3000,7 @@ int main(int argc, char *argv[]) + #if defined(XEN_SYSTEMD_ENABLED) + if (!live_update) { + sd_notify(1, "READY=1"); +- fprintf(stderr, SD_NOTICE "xenstored is ready\n"); ++ fprintf(stderr, "xenstored is ready\n"); + } + #endif + +-- +2.45.2 + diff --git a/0041-x86-vmx-Perform-VERW-flushing-later-in-the-VMExit-pa.patch b/0041-x86-vmx-Perform-VERW-flushing-later-in-the-VMExit-pa.patch deleted file mode 100644 index 871f10f..0000000 --- a/0041-x86-vmx-Perform-VERW-flushing-later-in-the-VMExit-pa.patch +++ /dev/null @@ -1,146 +0,0 @@ -From 77f2bec134049aba29b9b459f955022722d10847 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Fri, 23 Jun 2023 11:32:00 +0100 -Subject: [PATCH 41/67] x86/vmx: Perform VERW flushing later in the VMExit path - -Broken out of the following patch because this change is subtle enough on its -own. See it for the rational of why we're moving VERW. - -As for how, extend the trick already used to hold one condition in -flags (RESUME vs LAUNCH) through the POPing of GPRs. - -Move the MOV CR earlier. Intel specify flags to be undefined across it. - -Encode the two conditions we want using SF and PF. See the code comment for -exactly how. - -Leave a comment to explain the lack of any content around -SPEC_CTRL_EXIT_TO_VMX, but leave the block in place. Sods law says if we -delete it, we'll need to reintroduce it. - -This is part of XSA-452 / CVE-2023-28746. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit 475fa20b7384464210f42bad7195f87bd6f1c63f) ---- - xen/arch/x86/hvm/vmx/entry.S | 36 +++++++++++++++++++++--- - xen/arch/x86/include/asm/asm_defns.h | 8 ++++++ - xen/arch/x86/include/asm/spec_ctrl_asm.h | 7 +++++ - xen/arch/x86/x86_64/asm-offsets.c | 1 + - 4 files changed, 48 insertions(+), 4 deletions(-) - -diff --git a/xen/arch/x86/hvm/vmx/entry.S b/xen/arch/x86/hvm/vmx/entry.S -index 5f5de45a13..cdde76e138 100644 ---- a/xen/arch/x86/hvm/vmx/entry.S -+++ b/xen/arch/x86/hvm/vmx/entry.S -@@ -87,17 +87,39 @@ UNLIKELY_END(realmode) - - /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ - /* SPEC_CTRL_EXIT_TO_VMX Req: %rsp=regs/cpuinfo Clob: */ -- DO_SPEC_CTRL_COND_VERW -+ /* -+ * All speculation safety work happens to be elsewhere. VERW is after -+ * popping the GPRs, while restoring the guest MSR_SPEC_CTRL is left -+ * to the MSR load list. -+ */ - - mov VCPU_hvm_guest_cr2(%rbx),%rax -+ mov %rax, %cr2 -+ -+ /* -+ * We need to perform two conditional actions (VERW, and Resume vs -+ * Launch) after popping GPRs. With some cunning, we can encode both -+ * of these in eflags together. -+ * -+ * Parity is only calculated over the bottom byte of the answer, while -+ * Sign is simply the top bit. -+ * -+ * Therefore, the final OR instruction ends up producing: -+ * SF = VCPU_vmx_launched -+ * PF = !SCF_verw -+ */ -+ BUILD_BUG_ON(SCF_verw & ~0xff) -+ movzbl VCPU_vmx_launched(%rbx), %ecx -+ shl $31, %ecx -+ movzbl CPUINFO_spec_ctrl_flags(%rsp), %eax -+ and $SCF_verw, %eax -+ or %eax, %ecx - - pop %r15 - pop %r14 - pop %r13 - pop %r12 - pop %rbp -- mov %rax,%cr2 -- cmpb $0,VCPU_vmx_launched(%rbx) - pop %rbx - pop %r11 - pop %r10 -@@ -108,7 +130,13 @@ UNLIKELY_END(realmode) - pop %rdx - pop %rsi - pop %rdi -- je .Lvmx_launch -+ -+ jpe .L_skip_verw -+ /* VERW clobbers ZF, but preserves all others, including SF. */ -+ verw STK_REL(CPUINFO_verw_sel, CPUINFO_error_code)(%rsp) -+.L_skip_verw: -+ -+ jns .Lvmx_launch - - /*.Lvmx_resume:*/ - VMRESUME -diff --git a/xen/arch/x86/include/asm/asm_defns.h b/xen/arch/x86/include/asm/asm_defns.h -index d9431180cf..abc6822b08 100644 ---- a/xen/arch/x86/include/asm/asm_defns.h -+++ b/xen/arch/x86/include/asm/asm_defns.h -@@ -81,6 +81,14 @@ register unsigned long current_stack_pointer asm("rsp"); - - #ifdef __ASSEMBLY__ - -+.macro BUILD_BUG_ON condstr, cond:vararg -+ .if \cond -+ .error "Condition \"\condstr\" not satisfied" -+ .endif -+.endm -+/* preprocessor macro to make error message more user friendly */ -+#define BUILD_BUG_ON(cond) BUILD_BUG_ON #cond, cond -+ - #ifdef HAVE_AS_QUOTED_SYM - #define SUBSECTION_LBL(tag) \ - .ifndef .L.tag; \ -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index f4b8b9d956..ca9cb0f5dd 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -164,6 +164,13 @@ - #endif - .endm - -+/* -+ * Helper to improve the readibility of stack dispacements with %rsp in -+ * unusual positions. Both @field and @top_of_stack should be constants from -+ * the same object. @top_of_stack should be where %rsp is currently pointing. -+ */ -+#define STK_REL(field, top_of_stk) ((field) - (top_of_stk)) -+ - .macro DO_SPEC_CTRL_COND_VERW - /* - * Requires %rsp=cpuinfo -diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c -index 31fa63b77f..a4e94d6930 100644 ---- a/xen/arch/x86/x86_64/asm-offsets.c -+++ b/xen/arch/x86/x86_64/asm-offsets.c -@@ -135,6 +135,7 @@ void __dummy__(void) - #endif - - OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs); -+ OFFSET(CPUINFO_error_code, struct cpu_info, guest_cpu_user_regs.error_code); - OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel); - OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu); - OFFSET(CPUINFO_per_cpu_offset, struct cpu_info, per_cpu_offset); --- -2.44.0 - diff --git a/0042-tools-Drop-libsystemd-as-a-dependency.patch b/0042-tools-Drop-libsystemd-as-a-dependency.patch new file mode 100644 index 0000000..168680e --- /dev/null +++ b/0042-tools-Drop-libsystemd-as-a-dependency.patch @@ -0,0 +1,648 @@ +From 7967bd358e93ed83e01813a8d0dfd68aa67f5780 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Thu, 4 Jul 2024 14:10:40 +0200 +Subject: [PATCH 42/56] tools: Drop libsystemd as a dependency +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +There are no more users, and we want to disuade people from introducing new +users just for sd_notify() and friends. Drop the dependency. + +We still want the overall --with{,out}-systemd to gate the generation of the +service/unit/mount/etc files. + +Rerun autogen.sh, and mark the dependency as removed in the build containers. + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Juergen Gross <jgross@suse.com> +Acked-by: Christian Lindig <christian.lindig@cloud.com> + +tools: (Actually) drop libsystemd as a dependency + +When reinstating some of systemd.m4 between v1 and v2, I reintroduced a little +too much. While {c,o}xenstored are indeed no longer linked against +libsystemd, ./configure still looks for it. + +Drop this too. + +Fixes: ae26101f6bfc ("tools: Drop libsystemd as a dependency") +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> +master commit: ae26101f6bfc8185adcdb9165d469bdc467780db +master date: 2024-05-23 15:04:40 +0100 +master commit: 6ef4fa1e7fe78c1dae07b451292b07facfce4902 +master date: 2024-05-30 12:15:25 +0100 +--- + CHANGELOG.md | 7 +- + config/Tools.mk.in | 2 - + m4/systemd.m4 | 17 -- + tools/configure | 485 +-------------------------------------------- + 4 files changed, 7 insertions(+), 504 deletions(-) + +diff --git a/CHANGELOG.md b/CHANGELOG.md +index fa54d59df1..ceca12eb5f 100644 +--- a/CHANGELOG.md ++++ b/CHANGELOG.md +@@ -4,7 +4,12 @@ Notable changes to Xen will be documented in this file. + + The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/) + +-## [4.18.2](https://xenbits.xen.org/gitweb/?p=xen.git;a=shortlog;h=RELEASE-4.18.2) ++## [4.18.3](https://xenbits.xen.org/gitweb/?p=xen.git;a=shortlog;h=RELEASE-4.18.3) ++ ++### Changed ++ - When building with Systemd support (./configure --enable-systemd), remove ++ libsystemd as a build dependency. Systemd Notify support is retained, now ++ using a standalone library implementation. + + ## [4.18.1](https://xenbits.xen.org/gitweb/?p=xen.git;a=shortlog;h=RELEASE-4.18.1) + +diff --git a/config/Tools.mk.in b/config/Tools.mk.in +index b54ab21f96..50fbef841f 100644 +--- a/config/Tools.mk.in ++++ b/config/Tools.mk.in +@@ -52,8 +52,6 @@ CONFIG_PYGRUB := @pygrub@ + CONFIG_LIBFSIMAGE := @libfsimage@ + + CONFIG_SYSTEMD := @systemd@ +-SYSTEMD_CFLAGS := @SYSTEMD_CFLAGS@ +-SYSTEMD_LIBS := @SYSTEMD_LIBS@ + XEN_SYSTEMD_DIR := @SYSTEMD_DIR@ + XEN_SYSTEMD_MODULES_LOAD := @SYSTEMD_MODULES_LOAD@ + CONFIG_9PFS := @ninepfs@ +diff --git a/m4/systemd.m4 b/m4/systemd.m4 +index 112dc11b5e..ab12ea313d 100644 +--- a/m4/systemd.m4 ++++ b/m4/systemd.m4 +@@ -41,15 +41,6 @@ AC_DEFUN([AX_ALLOW_SYSTEMD_OPTS], [ + ]) + + AC_DEFUN([AX_CHECK_SYSTEMD_LIBS], [ +- PKG_CHECK_MODULES([SYSTEMD], [libsystemd-daemon],, +- [PKG_CHECK_MODULES([SYSTEMD], [libsystemd >= 209])] +- ) +- dnl pkg-config older than 0.24 does not set these for +- dnl PKG_CHECK_MODULES() worth also noting is that as of version 208 +- dnl of systemd pkg-config --cflags currently yields no extra flags yet. +- AC_SUBST([SYSTEMD_CFLAGS]) +- AC_SUBST([SYSTEMD_LIBS]) +- + AS_IF([test "x$SYSTEMD_DIR" = x], [ + dnl In order to use the line below we need to fix upstream systemd + dnl to properly ${prefix} for child variables in +@@ -95,13 +86,6 @@ AC_DEFUN([AX_CHECK_SYSTEMD], [ + ],[systemd=n]) + ]) + +-AC_DEFUN([AX_CHECK_SYSTEMD_ENABLE_AVAILABLE], [ +- PKG_CHECK_MODULES([SYSTEMD], [libsystemd-daemon], [systemd="y"],[ +- PKG_CHECK_MODULES([SYSTEMD], [libsystemd >= 209], +- [systemd="y"],[systemd="n"]) +- ]) +-]) +- + dnl Enables systemd by default and requires a --disable-systemd option flag + dnl to configure if you want to disable. + AC_DEFUN([AX_ENABLE_SYSTEMD], [ +@@ -121,6 +105,5 @@ dnl to have systemd build libraries it will be enabled. You can always force + dnl disable with --disable-systemd + AC_DEFUN([AX_AVAILABLE_SYSTEMD], [ + AX_ALLOW_SYSTEMD_OPTS() +- AX_CHECK_SYSTEMD_ENABLE_AVAILABLE() + AX_CHECK_SYSTEMD() + ]) +diff --git a/tools/configure b/tools/configure +index 38c0808d3a..7bb935d23b 100755 +--- a/tools/configure ++++ b/tools/configure +@@ -626,8 +626,6 @@ ac_subst_vars='LTLIBOBJS + LIBOBJS + pvshim + ninepfs +-SYSTEMD_LIBS +-SYSTEMD_CFLAGS + SYSTEMD_MODULES_LOAD + SYSTEMD_DIR + systemd +@@ -864,9 +862,7 @@ pixman_LIBS + libzstd_CFLAGS + libzstd_LIBS + LIBNL3_CFLAGS +-LIBNL3_LIBS +-SYSTEMD_CFLAGS +-SYSTEMD_LIBS' ++LIBNL3_LIBS' + + + # Initialize some variables set by options. +@@ -1621,10 +1617,6 @@ Some influential environment variables: + LIBNL3_CFLAGS + C compiler flags for LIBNL3, overriding pkg-config + LIBNL3_LIBS linker flags for LIBNL3, overriding pkg-config +- SYSTEMD_CFLAGS +- C compiler flags for SYSTEMD, overriding pkg-config +- SYSTEMD_LIBS +- linker flags for SYSTEMD, overriding pkg-config + + Use these variables to override the choices made by `configure' or to help + it to find libraries and programs with nonstandard names/locations. +@@ -3889,8 +3881,6 @@ esac + + + +- +- + + + +@@ -9540,223 +9530,6 @@ fi + + + +- +-pkg_failed=no +-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SYSTEMD" >&5 +-$as_echo_n "checking for SYSTEMD... " >&6; } +- +-if test -n "$SYSTEMD_CFLAGS"; then +- pkg_cv_SYSTEMD_CFLAGS="$SYSTEMD_CFLAGS" +- elif test -n "$PKG_CONFIG"; then +- if test -n "$PKG_CONFIG" && \ +- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd-daemon\""; } >&5 +- ($PKG_CONFIG --exists --print-errors "libsystemd-daemon") 2>&5 +- ac_status=$? +- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 +- test $ac_status = 0; }; then +- pkg_cv_SYSTEMD_CFLAGS=`$PKG_CONFIG --cflags "libsystemd-daemon" 2>/dev/null` +- test "x$?" != "x0" && pkg_failed=yes +-else +- pkg_failed=yes +-fi +- else +- pkg_failed=untried +-fi +-if test -n "$SYSTEMD_LIBS"; then +- pkg_cv_SYSTEMD_LIBS="$SYSTEMD_LIBS" +- elif test -n "$PKG_CONFIG"; then +- if test -n "$PKG_CONFIG" && \ +- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd-daemon\""; } >&5 +- ($PKG_CONFIG --exists --print-errors "libsystemd-daemon") 2>&5 +- ac_status=$? +- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 +- test $ac_status = 0; }; then +- pkg_cv_SYSTEMD_LIBS=`$PKG_CONFIG --libs "libsystemd-daemon" 2>/dev/null` +- test "x$?" != "x0" && pkg_failed=yes +-else +- pkg_failed=yes +-fi +- else +- pkg_failed=untried +-fi +- +- +- +-if test $pkg_failed = yes; then +- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +-$as_echo "no" >&6; } +- +-if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then +- _pkg_short_errors_supported=yes +-else +- _pkg_short_errors_supported=no +-fi +- if test $_pkg_short_errors_supported = yes; then +- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libsystemd-daemon" 2>&1` +- else +- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libsystemd-daemon" 2>&1` +- fi +- # Put the nasty error message in config.log where it belongs +- echo "$SYSTEMD_PKG_ERRORS" >&5 +- +- +- +-pkg_failed=no +-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SYSTEMD" >&5 +-$as_echo_n "checking for SYSTEMD... " >&6; } +- +-if test -n "$SYSTEMD_CFLAGS"; then +- pkg_cv_SYSTEMD_CFLAGS="$SYSTEMD_CFLAGS" +- elif test -n "$PKG_CONFIG"; then +- if test -n "$PKG_CONFIG" && \ +- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5 +- ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5 +- ac_status=$? +- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 +- test $ac_status = 0; }; then +- pkg_cv_SYSTEMD_CFLAGS=`$PKG_CONFIG --cflags "libsystemd >= 209" 2>/dev/null` +- test "x$?" != "x0" && pkg_failed=yes +-else +- pkg_failed=yes +-fi +- else +- pkg_failed=untried +-fi +-if test -n "$SYSTEMD_LIBS"; then +- pkg_cv_SYSTEMD_LIBS="$SYSTEMD_LIBS" +- elif test -n "$PKG_CONFIG"; then +- if test -n "$PKG_CONFIG" && \ +- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5 +- ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5 +- ac_status=$? +- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 +- test $ac_status = 0; }; then +- pkg_cv_SYSTEMD_LIBS=`$PKG_CONFIG --libs "libsystemd >= 209" 2>/dev/null` +- test "x$?" != "x0" && pkg_failed=yes +-else +- pkg_failed=yes +-fi +- else +- pkg_failed=untried +-fi +- +- +- +-if test $pkg_failed = yes; then +- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +-$as_echo "no" >&6; } +- +-if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then +- _pkg_short_errors_supported=yes +-else +- _pkg_short_errors_supported=no +-fi +- if test $_pkg_short_errors_supported = yes; then +- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libsystemd >= 209" 2>&1` +- else +- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libsystemd >= 209" 2>&1` +- fi +- # Put the nasty error message in config.log where it belongs +- echo "$SYSTEMD_PKG_ERRORS" >&5 +- +- systemd="n" +-elif test $pkg_failed = untried; then +- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +-$as_echo "no" >&6; } +- systemd="n" +-else +- SYSTEMD_CFLAGS=$pkg_cv_SYSTEMD_CFLAGS +- SYSTEMD_LIBS=$pkg_cv_SYSTEMD_LIBS +- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +-$as_echo "yes" >&6; } +- systemd="y" +-fi +- +-elif test $pkg_failed = untried; then +- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +-$as_echo "no" >&6; } +- +- +-pkg_failed=no +-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SYSTEMD" >&5 +-$as_echo_n "checking for SYSTEMD... " >&6; } +- +-if test -n "$SYSTEMD_CFLAGS"; then +- pkg_cv_SYSTEMD_CFLAGS="$SYSTEMD_CFLAGS" +- elif test -n "$PKG_CONFIG"; then +- if test -n "$PKG_CONFIG" && \ +- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5 +- ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5 +- ac_status=$? +- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 +- test $ac_status = 0; }; then +- pkg_cv_SYSTEMD_CFLAGS=`$PKG_CONFIG --cflags "libsystemd >= 209" 2>/dev/null` +- test "x$?" != "x0" && pkg_failed=yes +-else +- pkg_failed=yes +-fi +- else +- pkg_failed=untried +-fi +-if test -n "$SYSTEMD_LIBS"; then +- pkg_cv_SYSTEMD_LIBS="$SYSTEMD_LIBS" +- elif test -n "$PKG_CONFIG"; then +- if test -n "$PKG_CONFIG" && \ +- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5 +- ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5 +- ac_status=$? +- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 +- test $ac_status = 0; }; then +- pkg_cv_SYSTEMD_LIBS=`$PKG_CONFIG --libs "libsystemd >= 209" 2>/dev/null` +- test "x$?" != "x0" && pkg_failed=yes +-else +- pkg_failed=yes +-fi +- else +- pkg_failed=untried +-fi +- +- +- +-if test $pkg_failed = yes; then +- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +-$as_echo "no" >&6; } +- +-if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then +- _pkg_short_errors_supported=yes +-else +- _pkg_short_errors_supported=no +-fi +- if test $_pkg_short_errors_supported = yes; then +- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libsystemd >= 209" 2>&1` +- else +- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libsystemd >= 209" 2>&1` +- fi +- # Put the nasty error message in config.log where it belongs +- echo "$SYSTEMD_PKG_ERRORS" >&5 +- +- systemd="n" +-elif test $pkg_failed = untried; then +- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +-$as_echo "no" >&6; } +- systemd="n" +-else +- SYSTEMD_CFLAGS=$pkg_cv_SYSTEMD_CFLAGS +- SYSTEMD_LIBS=$pkg_cv_SYSTEMD_LIBS +- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +-$as_echo "yes" >&6; } +- systemd="y" +-fi +- +-else +- SYSTEMD_CFLAGS=$pkg_cv_SYSTEMD_CFLAGS +- SYSTEMD_LIBS=$pkg_cv_SYSTEMD_LIBS +- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +-$as_echo "yes" >&6; } +- systemd="y" +-fi +- +- + if test "x$enable_systemd" != "xno"; then : + + if test "x$systemd" = "xy" ; then : +@@ -9766,262 +9539,6 @@ $as_echo "#define HAVE_SYSTEMD 1" >>confdefs.h + + systemd=y + +- +-pkg_failed=no +-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SYSTEMD" >&5 +-$as_echo_n "checking for SYSTEMD... " >&6; } +- +-if test -n "$SYSTEMD_CFLAGS"; then +- pkg_cv_SYSTEMD_CFLAGS="$SYSTEMD_CFLAGS" +- elif test -n "$PKG_CONFIG"; then +- if test -n "$PKG_CONFIG" && \ +- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd-daemon\""; } >&5 +- ($PKG_CONFIG --exists --print-errors "libsystemd-daemon") 2>&5 +- ac_status=$? +- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 +- test $ac_status = 0; }; then +- pkg_cv_SYSTEMD_CFLAGS=`$PKG_CONFIG --cflags "libsystemd-daemon" 2>/dev/null` +- test "x$?" != "x0" && pkg_failed=yes +-else +- pkg_failed=yes +-fi +- else +- pkg_failed=untried +-fi +-if test -n "$SYSTEMD_LIBS"; then +- pkg_cv_SYSTEMD_LIBS="$SYSTEMD_LIBS" +- elif test -n "$PKG_CONFIG"; then +- if test -n "$PKG_CONFIG" && \ +- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd-daemon\""; } >&5 +- ($PKG_CONFIG --exists --print-errors "libsystemd-daemon") 2>&5 +- ac_status=$? +- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 +- test $ac_status = 0; }; then +- pkg_cv_SYSTEMD_LIBS=`$PKG_CONFIG --libs "libsystemd-daemon" 2>/dev/null` +- test "x$?" != "x0" && pkg_failed=yes +-else +- pkg_failed=yes +-fi +- else +- pkg_failed=untried +-fi +- +- +- +-if test $pkg_failed = yes; then +- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +-$as_echo "no" >&6; } +- +-if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then +- _pkg_short_errors_supported=yes +-else +- _pkg_short_errors_supported=no +-fi +- if test $_pkg_short_errors_supported = yes; then +- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libsystemd-daemon" 2>&1` +- else +- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libsystemd-daemon" 2>&1` +- fi +- # Put the nasty error message in config.log where it belongs +- echo "$SYSTEMD_PKG_ERRORS" >&5 +- +- +-pkg_failed=no +-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SYSTEMD" >&5 +-$as_echo_n "checking for SYSTEMD... " >&6; } +- +-if test -n "$SYSTEMD_CFLAGS"; then +- pkg_cv_SYSTEMD_CFLAGS="$SYSTEMD_CFLAGS" +- elif test -n "$PKG_CONFIG"; then +- if test -n "$PKG_CONFIG" && \ +- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5 +- ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5 +- ac_status=$? +- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 +- test $ac_status = 0; }; then +- pkg_cv_SYSTEMD_CFLAGS=`$PKG_CONFIG --cflags "libsystemd >= 209" 2>/dev/null` +- test "x$?" != "x0" && pkg_failed=yes +-else +- pkg_failed=yes +-fi +- else +- pkg_failed=untried +-fi +-if test -n "$SYSTEMD_LIBS"; then +- pkg_cv_SYSTEMD_LIBS="$SYSTEMD_LIBS" +- elif test -n "$PKG_CONFIG"; then +- if test -n "$PKG_CONFIG" && \ +- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5 +- ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5 +- ac_status=$? +- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 +- test $ac_status = 0; }; then +- pkg_cv_SYSTEMD_LIBS=`$PKG_CONFIG --libs "libsystemd >= 209" 2>/dev/null` +- test "x$?" != "x0" && pkg_failed=yes +-else +- pkg_failed=yes +-fi +- else +- pkg_failed=untried +-fi +- +- +- +-if test $pkg_failed = yes; then +- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +-$as_echo "no" >&6; } +- +-if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then +- _pkg_short_errors_supported=yes +-else +- _pkg_short_errors_supported=no +-fi +- if test $_pkg_short_errors_supported = yes; then +- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libsystemd >= 209" 2>&1` +- else +- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libsystemd >= 209" 2>&1` +- fi +- # Put the nasty error message in config.log where it belongs +- echo "$SYSTEMD_PKG_ERRORS" >&5 +- +- as_fn_error $? "Package requirements (libsystemd >= 209) were not met: +- +-$SYSTEMD_PKG_ERRORS +- +-Consider adjusting the PKG_CONFIG_PATH environment variable if you +-installed software in a non-standard prefix. +- +-Alternatively, you may set the environment variables SYSTEMD_CFLAGS +-and SYSTEMD_LIBS to avoid the need to call pkg-config. +-See the pkg-config man page for more details." "$LINENO" 5 +-elif test $pkg_failed = untried; then +- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +-$as_echo "no" >&6; } +- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +-as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it +-is in your PATH or set the PKG_CONFIG environment variable to the full +-path to pkg-config. +- +-Alternatively, you may set the environment variables SYSTEMD_CFLAGS +-and SYSTEMD_LIBS to avoid the need to call pkg-config. +-See the pkg-config man page for more details. +- +-To get pkg-config, see <http://pkg-config.freedesktop.org/>. +-See \`config.log' for more details" "$LINENO" 5; } +-else +- SYSTEMD_CFLAGS=$pkg_cv_SYSTEMD_CFLAGS +- SYSTEMD_LIBS=$pkg_cv_SYSTEMD_LIBS +- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +-$as_echo "yes" >&6; } +- +-fi +- +-elif test $pkg_failed = untried; then +- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +-$as_echo "no" >&6; } +- +-pkg_failed=no +-{ $as_echo "$as_me:${as_lineno-$LINENO}: checking for SYSTEMD" >&5 +-$as_echo_n "checking for SYSTEMD... " >&6; } +- +-if test -n "$SYSTEMD_CFLAGS"; then +- pkg_cv_SYSTEMD_CFLAGS="$SYSTEMD_CFLAGS" +- elif test -n "$PKG_CONFIG"; then +- if test -n "$PKG_CONFIG" && \ +- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5 +- ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5 +- ac_status=$? +- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 +- test $ac_status = 0; }; then +- pkg_cv_SYSTEMD_CFLAGS=`$PKG_CONFIG --cflags "libsystemd >= 209" 2>/dev/null` +- test "x$?" != "x0" && pkg_failed=yes +-else +- pkg_failed=yes +-fi +- else +- pkg_failed=untried +-fi +-if test -n "$SYSTEMD_LIBS"; then +- pkg_cv_SYSTEMD_LIBS="$SYSTEMD_LIBS" +- elif test -n "$PKG_CONFIG"; then +- if test -n "$PKG_CONFIG" && \ +- { { $as_echo "$as_me:${as_lineno-$LINENO}: \$PKG_CONFIG --exists --print-errors \"libsystemd >= 209\""; } >&5 +- ($PKG_CONFIG --exists --print-errors "libsystemd >= 209") 2>&5 +- ac_status=$? +- $as_echo "$as_me:${as_lineno-$LINENO}: \$? = $ac_status" >&5 +- test $ac_status = 0; }; then +- pkg_cv_SYSTEMD_LIBS=`$PKG_CONFIG --libs "libsystemd >= 209" 2>/dev/null` +- test "x$?" != "x0" && pkg_failed=yes +-else +- pkg_failed=yes +-fi +- else +- pkg_failed=untried +-fi +- +- +- +-if test $pkg_failed = yes; then +- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +-$as_echo "no" >&6; } +- +-if $PKG_CONFIG --atleast-pkgconfig-version 0.20; then +- _pkg_short_errors_supported=yes +-else +- _pkg_short_errors_supported=no +-fi +- if test $_pkg_short_errors_supported = yes; then +- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --short-errors --print-errors --cflags --libs "libsystemd >= 209" 2>&1` +- else +- SYSTEMD_PKG_ERRORS=`$PKG_CONFIG --print-errors --cflags --libs "libsystemd >= 209" 2>&1` +- fi +- # Put the nasty error message in config.log where it belongs +- echo "$SYSTEMD_PKG_ERRORS" >&5 +- +- as_fn_error $? "Package requirements (libsystemd >= 209) were not met: +- +-$SYSTEMD_PKG_ERRORS +- +-Consider adjusting the PKG_CONFIG_PATH environment variable if you +-installed software in a non-standard prefix. +- +-Alternatively, you may set the environment variables SYSTEMD_CFLAGS +-and SYSTEMD_LIBS to avoid the need to call pkg-config. +-See the pkg-config man page for more details." "$LINENO" 5 +-elif test $pkg_failed = untried; then +- { $as_echo "$as_me:${as_lineno-$LINENO}: result: no" >&5 +-$as_echo "no" >&6; } +- { { $as_echo "$as_me:${as_lineno-$LINENO}: error: in \`$ac_pwd':" >&5 +-$as_echo "$as_me: error: in \`$ac_pwd':" >&2;} +-as_fn_error $? "The pkg-config script could not be found or is too old. Make sure it +-is in your PATH or set the PKG_CONFIG environment variable to the full +-path to pkg-config. +- +-Alternatively, you may set the environment variables SYSTEMD_CFLAGS +-and SYSTEMD_LIBS to avoid the need to call pkg-config. +-See the pkg-config man page for more details. +- +-To get pkg-config, see <http://pkg-config.freedesktop.org/>. +-See \`config.log' for more details" "$LINENO" 5; } +-else +- SYSTEMD_CFLAGS=$pkg_cv_SYSTEMD_CFLAGS +- SYSTEMD_LIBS=$pkg_cv_SYSTEMD_LIBS +- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +-$as_echo "yes" >&6; } +- +-fi +- +-else +- SYSTEMD_CFLAGS=$pkg_cv_SYSTEMD_CFLAGS +- SYSTEMD_LIBS=$pkg_cv_SYSTEMD_LIBS +- { $as_echo "$as_me:${as_lineno-$LINENO}: result: yes" >&5 +-$as_echo "yes" >&6; } +- +-fi +- +- +- + if test "x$SYSTEMD_DIR" = x; then : + + SYSTEMD_DIR="\$(prefix)/lib/systemd/system/" +-- +2.45.2 + diff --git a/0042-x86-spec-ctrl-Perform-VERW-flushing-later-in-exit-pa.patch b/0042-x86-spec-ctrl-Perform-VERW-flushing-later-in-exit-pa.patch deleted file mode 100644 index ac78acd..0000000 --- a/0042-x86-spec-ctrl-Perform-VERW-flushing-later-in-exit-pa.patch +++ /dev/null @@ -1,209 +0,0 @@ -From 76af773de5d3e68b7140cc9c5343be6746c9101c Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Sat, 27 Jan 2024 18:20:56 +0000 -Subject: [PATCH 42/67] x86/spec-ctrl: Perform VERW flushing later in exit - paths - -On parts vulnerable to RFDS, VERW's side effects are extended to scrub all -non-architectural entries in various Physical Register Files. To remove all -of Xen's values, the VERW must be after popping the GPRs. - -Rework SPEC_CTRL_COND_VERW to default to an CPUINFO_error_code %rsp position, -but with overrides for other contexts. Identify that it clobbers eflags; this -is particularly relevant for the SYSRET path. - -For the IST exit return to Xen, have the main SPEC_CTRL_EXIT_TO_XEN put a -shadow copy of spec_ctrl_flags, as GPRs can't be used at the point we want to -issue the VERW. - -This is part of XSA-452 / CVE-2023-28746. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit 0a666cf2cd99df6faf3eebc81a1fc286e4eca4c7) ---- - xen/arch/x86/include/asm/spec_ctrl_asm.h | 36 ++++++++++++++++-------- - xen/arch/x86/x86_64/asm-offsets.c | 13 +++++++-- - xen/arch/x86/x86_64/compat/entry.S | 6 ++++ - xen/arch/x86/x86_64/entry.S | 21 +++++++++++++- - 4 files changed, 61 insertions(+), 15 deletions(-) - -diff --git a/xen/arch/x86/include/asm/spec_ctrl_asm.h b/xen/arch/x86/include/asm/spec_ctrl_asm.h -index ca9cb0f5dd..97a97b2b82 100644 ---- a/xen/arch/x86/include/asm/spec_ctrl_asm.h -+++ b/xen/arch/x86/include/asm/spec_ctrl_asm.h -@@ -171,16 +171,23 @@ - */ - #define STK_REL(field, top_of_stk) ((field) - (top_of_stk)) - --.macro DO_SPEC_CTRL_COND_VERW -+.macro SPEC_CTRL_COND_VERW \ -+ scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_error_code), \ -+ sel=STK_REL(CPUINFO_verw_sel, CPUINFO_error_code) - /* -- * Requires %rsp=cpuinfo -+ * Requires \scf and \sel as %rsp-relative expressions -+ * Clobbers eflags -+ * -+ * VERW needs to run after guest GPRs have been restored, where only %rsp is -+ * good to use. Default to expecting %rsp pointing at CPUINFO_error_code. -+ * Contexts where this is not true must provide an alternative \scf and \sel. - * - * Issue a VERW for its flushing side effect, if indicated. This is a Spectre - * v1 gadget, but the IRET/VMEntry is serialising. - */ -- testb $SCF_verw, CPUINFO_spec_ctrl_flags(%rsp) -+ testb $SCF_verw, \scf(%rsp) - jz .L\@_verw_skip -- verw CPUINFO_verw_sel(%rsp) -+ verw \sel(%rsp) - .L\@_verw_skip: - .endm - -@@ -298,8 +305,6 @@ - */ - ALTERNATIVE "", DO_SPEC_CTRL_EXIT_TO_GUEST, X86_FEATURE_SC_MSR_PV - -- DO_SPEC_CTRL_COND_VERW -- - ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV - .endm - -@@ -379,7 +384,7 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - */ - .macro SPEC_CTRL_EXIT_TO_XEN - /* -- * Requires %r12=ist_exit, %r14=stack_end -+ * Requires %r12=ist_exit, %r14=stack_end, %rsp=regs - * Clobbers %rax, %rbx, %rcx, %rdx - */ - movzbl STACK_CPUINFO_FIELD(spec_ctrl_flags)(%r14), %ebx -@@ -407,11 +412,18 @@ UNLIKELY_DISPATCH_LABEL(\@_serialise): - test %r12, %r12 - jz .L\@_skip_ist_exit - -- /* Logically DO_SPEC_CTRL_COND_VERW but without the %rsp=cpuinfo dependency */ -- testb $SCF_verw, %bl -- jz .L\@_skip_verw -- verw STACK_CPUINFO_FIELD(verw_sel)(%r14) --.L\@_skip_verw: -+ /* -+ * Stash SCF and verw_sel above eflags in the case of an IST_exit. The -+ * VERW logic needs to run after guest GPRs have been restored; i.e. where -+ * we cannot use %r12 or %r14 for the purposes they have here. -+ * -+ * When the CPU pushed this exception frame, it zero-extended eflags. -+ * Therefore it is safe for the VERW logic to look at the stashed SCF -+ * outside of the ist_exit condition. Also, this stashing won't influence -+ * any other restore_all_guest() paths. -+ */ -+ or $(__HYPERVISOR_DS32 << 16), %ebx -+ mov %ebx, UREGS_eflags + 4(%rsp) /* EFRAME_shadow_scf/sel */ - - ALTERNATIVE "", DO_SPEC_CTRL_DIV, X86_FEATURE_SC_DIV - -diff --git a/xen/arch/x86/x86_64/asm-offsets.c b/xen/arch/x86/x86_64/asm-offsets.c -index a4e94d6930..4cd5938d7b 100644 ---- a/xen/arch/x86/x86_64/asm-offsets.c -+++ b/xen/arch/x86/x86_64/asm-offsets.c -@@ -55,14 +55,22 @@ void __dummy__(void) - * EFRAME_* is for the entry/exit logic where %rsp is pointing at - * UREGS_error_code and GPRs are still/already guest values. - */ --#define OFFSET_EF(sym, mem) \ -+#define OFFSET_EF(sym, mem, ...) \ - DEFINE(sym, offsetof(struct cpu_user_regs, mem) - \ -- offsetof(struct cpu_user_regs, error_code)) -+ offsetof(struct cpu_user_regs, error_code) __VA_ARGS__) - - OFFSET_EF(EFRAME_entry_vector, entry_vector); - OFFSET_EF(EFRAME_rip, rip); - OFFSET_EF(EFRAME_cs, cs); - OFFSET_EF(EFRAME_eflags, eflags); -+ -+ /* -+ * These aren't real fields. They're spare space, used by the IST -+ * exit-to-xen path. -+ */ -+ OFFSET_EF(EFRAME_shadow_scf, eflags, +4); -+ OFFSET_EF(EFRAME_shadow_sel, eflags, +6); -+ - OFFSET_EF(EFRAME_rsp, rsp); - BLANK(); - -@@ -136,6 +144,7 @@ void __dummy__(void) - - OFFSET(CPUINFO_guest_cpu_user_regs, struct cpu_info, guest_cpu_user_regs); - OFFSET(CPUINFO_error_code, struct cpu_info, guest_cpu_user_regs.error_code); -+ OFFSET(CPUINFO_rip, struct cpu_info, guest_cpu_user_regs.rip); - OFFSET(CPUINFO_verw_sel, struct cpu_info, verw_sel); - OFFSET(CPUINFO_current_vcpu, struct cpu_info, current_vcpu); - OFFSET(CPUINFO_per_cpu_offset, struct cpu_info, per_cpu_offset); -diff --git a/xen/arch/x86/x86_64/compat/entry.S b/xen/arch/x86/x86_64/compat/entry.S -index 7c211314d8..3b2fbcd873 100644 ---- a/xen/arch/x86/x86_64/compat/entry.S -+++ b/xen/arch/x86/x86_64/compat/entry.S -@@ -161,6 +161,12 @@ ENTRY(compat_restore_all_guest) - SPEC_CTRL_EXIT_TO_PV /* Req: a=spec_ctrl %rsp=regs/cpuinfo, Clob: cd */ - - RESTORE_ALL adj=8 compat=1 -+ -+ /* Account for ev/ec having already been popped off the stack. */ -+ SPEC_CTRL_COND_VERW \ -+ scf=STK_REL(CPUINFO_spec_ctrl_flags, CPUINFO_rip), \ -+ sel=STK_REL(CPUINFO_verw_sel, CPUINFO_rip) -+ - .Lft0: iretq - _ASM_PRE_EXTABLE(.Lft0, handle_exception) - -diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S -index 412cbeb3ec..ef517e2945 100644 ---- a/xen/arch/x86/x86_64/entry.S -+++ b/xen/arch/x86/x86_64/entry.S -@@ -214,6 +214,9 @@ restore_all_guest: - #endif - - mov EFRAME_rip(%rsp), %rcx -+ -+ SPEC_CTRL_COND_VERW /* Req: %rsp=eframe Clob: efl */ -+ - cmpw $FLAT_USER_CS32, EFRAME_cs(%rsp) - mov EFRAME_rsp(%rsp), %rsp - je 1f -@@ -227,6 +230,9 @@ restore_all_guest: - iret_exit_to_guest: - andl $~(X86_EFLAGS_IOPL | X86_EFLAGS_VM), EFRAME_eflags(%rsp) - orl $X86_EFLAGS_IF, EFRAME_eflags(%rsp) -+ -+ SPEC_CTRL_COND_VERW /* Req: %rsp=eframe Clob: efl */ -+ - addq $8,%rsp - .Lft0: iretq - _ASM_PRE_EXTABLE(.Lft0, handle_exception) -@@ -679,9 +685,22 @@ UNLIKELY_START(ne, exit_cr3) - UNLIKELY_END(exit_cr3) - - /* WARNING! `ret`, `call *`, `jmp *` not safe beyond this point. */ -- SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end, Clob: abcd */ -+ SPEC_CTRL_EXIT_TO_XEN /* Req: %r12=ist_exit %r14=end %rsp=regs, Clob: abcd */ - - RESTORE_ALL adj=8 -+ -+ /* -+ * When the CPU pushed this exception frame, it zero-extended eflags. -+ * For an IST exit, SPEC_CTRL_EXIT_TO_XEN stashed shadow copies of -+ * spec_ctrl_flags and ver_sel above eflags, as we can't use any GPRs, -+ * and we're at a random place on the stack, not in a CPUFINFO block. -+ * -+ * Account for ev/ec having already been popped off the stack. -+ */ -+ SPEC_CTRL_COND_VERW \ -+ scf=STK_REL(EFRAME_shadow_scf, EFRAME_rip), \ -+ sel=STK_REL(EFRAME_shadow_sel, EFRAME_rip) -+ - iretq - - ENTRY(common_interrupt) --- -2.44.0 - diff --git a/0043-x86-ioapic-Fix-signed-shifts-in-io_apic.c.patch b/0043-x86-ioapic-Fix-signed-shifts-in-io_apic.c.patch new file mode 100644 index 0000000..c368c1d --- /dev/null +++ b/0043-x86-ioapic-Fix-signed-shifts-in-io_apic.c.patch @@ -0,0 +1,46 @@ +From 0dc5fbee17cd2bcb1aa6a1cf420dd80381587de8 Mon Sep 17 00:00:00 2001 +From: Matthew Barnes <matthew.barnes@cloud.com> +Date: Thu, 4 Jul 2024 14:11:03 +0200 +Subject: [PATCH 43/56] x86/ioapic: Fix signed shifts in io_apic.c + +There exists bitshifts in the IOAPIC code where signed integers are +shifted to the left by up to 31 bits, which is undefined behaviour. + +This patch fixes this by changing the integers from signed to unsigned. + +Signed-off-by: Matthew Barnes <matthew.barnes@cloud.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> +master commit: c5746b021e573184fb92b601a0e93a295485054e +master date: 2024-06-21 15:09:26 +0100 +--- + xen/arch/x86/io_apic.c | 6 ++++-- + 1 file changed, 4 insertions(+), 2 deletions(-) + +diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c +index 0ef61fb2f1..c5342789e8 100644 +--- a/xen/arch/x86/io_apic.c ++++ b/xen/arch/x86/io_apic.c +@@ -1692,7 +1692,8 @@ static void cf_check mask_and_ack_level_ioapic_irq(struct irq_desc *desc) + !io_apic_level_ack_pending(desc->irq)) + move_masked_irq(desc); + +- if ( !(v & (1 << (i & 0x1f))) ) { ++ if ( !(v & (1U << (i & 0x1f))) ) ++ { + spin_lock(&ioapic_lock); + __edge_IO_APIC_irq(desc->irq); + __level_IO_APIC_irq(desc->irq); +@@ -1756,7 +1757,8 @@ static void cf_check end_level_ioapic_irq_new(struct irq_desc *desc, u8 vector) + !io_apic_level_ack_pending(desc->irq) ) + move_native_irq(desc); + +- if (!(v & (1 << (i & 0x1f)))) { ++ if ( !(v & (1U << (i & 0x1f))) ) ++ { + spin_lock(&ioapic_lock); + __mask_IO_APIC_irq(desc->irq); + __edge_IO_APIC_irq(desc->irq); +-- +2.45.2 + diff --git a/0043-x86-spec-ctrl-Rename-VERW-related-options.patch b/0043-x86-spec-ctrl-Rename-VERW-related-options.patch deleted file mode 100644 index 38edc15..0000000 --- a/0043-x86-spec-ctrl-Rename-VERW-related-options.patch +++ /dev/null @@ -1,248 +0,0 @@ -From d55d52961d13d4fcd1441fcfca98f690e687b941 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Mon, 12 Feb 2024 17:50:43 +0000 -Subject: [PATCH 43/67] x86/spec-ctrl: Rename VERW related options - -VERW is going to be used for a 3rd purpose, and the existing nomenclature -didn't survive the Stale MMIO issues terribly well. - -Rename the command line option from `md-clear=` to `verw=`. This is more -consistent with other options which tend to be named based on what they're -doing, not which feature enumeration they use behind the scenes. Retain -`md-clear=` as a deprecated alias. - -Rename opt_md_clear_{pv,hvm} and opt_fb_clear_mmio to opt_verw_{pv,hvm,mmio}, -which has a side effect of making spec_ctrl_init_domain() rather clearer to -follow. - -No functional change. - -This is part of XSA-452 / CVE-2023-28746. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit f7603ca252e4226739eb3129a5290ee3da3f8ea4) ---- - docs/misc/xen-command-line.pandoc | 15 ++++---- - xen/arch/x86/spec_ctrl.c | 62 ++++++++++++++++--------------- - 2 files changed, 40 insertions(+), 37 deletions(-) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index 2006697226..d909ec94fe 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2324,7 +2324,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). - - ### spec-ctrl (x86) - > `= List of [ <bool>, xen=<bool>, {pv,hvm}=<bool>, --> {msr-sc,rsb,md-clear,ibpb-entry}=<bool>|{pv,hvm}=<bool>, -+> {msr-sc,rsb,verw,ibpb-entry}=<bool>|{pv,hvm}=<bool>, - > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, - > eager-fpu,l1d-flush,branch-harden,srb-lock, - > unpriv-mmio,gds-mit,div-scrub}=<bool> ]` -@@ -2349,7 +2349,7 @@ in place for guests to use. - - Use of a positive boolean value for either of these options is invalid. - --The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `md-clear=` and `ibpb-entry=` options -+The `pv=`, `hvm=`, `msr-sc=`, `rsb=`, `verw=` and `ibpb-entry=` options - offer fine grained control over the primitives by Xen. These impact Xen's - ability to protect itself, and/or Xen's ability to virtualise support for - guests to use. -@@ -2366,11 +2366,12 @@ guests to use. - guests and if disabled, guests will be unable to use IBRS/STIBP/SSBD/etc. - * `rsb=` offers control over whether to overwrite the Return Stack Buffer / - Return Address Stack on entry to Xen and on idle. --* `md-clear=` offers control over whether to use VERW to flush -- microarchitectural buffers on idle and exit from Xen. *Note: For -- compatibility with development versions of this fix, `mds=` is also accepted -- on Xen 4.12 and earlier as an alias. Consult vendor documentation in -- preference to here.* -+* `verw=` offers control over whether to use VERW for its scrubbing side -+ effects at appropriate privilege transitions. The exact side effects are -+ microarchitecture and microcode specific. *Note: `md-clear=` is accepted as -+ a deprecated alias. For compatibility with development versions of XSA-297, -+ `mds=` is also accepted on Xen 4.12 and earlier as an alias. Consult vendor -+ documentation in preference to here.* - * `ibpb-entry=` offers control over whether IBPB (Indirect Branch Prediction - Barrier) is used on entry to Xen. This is used by default on hardware - vulnerable to Branch Type Confusion, and hardware vulnerable to Speculative -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 25a18ac598..e12ec9930c 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -37,8 +37,8 @@ static bool __initdata opt_msr_sc_pv = true; - static bool __initdata opt_msr_sc_hvm = true; - static int8_t __initdata opt_rsb_pv = -1; - static bool __initdata opt_rsb_hvm = true; --static int8_t __ro_after_init opt_md_clear_pv = -1; --static int8_t __ro_after_init opt_md_clear_hvm = -1; -+static int8_t __ro_after_init opt_verw_pv = -1; -+static int8_t __ro_after_init opt_verw_hvm = -1; - - static int8_t __ro_after_init opt_ibpb_entry_pv = -1; - static int8_t __ro_after_init opt_ibpb_entry_hvm = -1; -@@ -78,7 +78,7 @@ static bool __initdata cpu_has_bug_mds; /* Any other M{LP,SB,FB}DS combination. - - static int8_t __initdata opt_srb_lock = -1; - static bool __initdata opt_unpriv_mmio; --static bool __ro_after_init opt_fb_clear_mmio; -+static bool __ro_after_init opt_verw_mmio; - static int8_t __initdata opt_gds_mit = -1; - static int8_t __initdata opt_div_scrub = -1; - -@@ -120,8 +120,8 @@ static int __init cf_check parse_spec_ctrl(const char *s) - disable_common: - opt_rsb_pv = false; - opt_rsb_hvm = false; -- opt_md_clear_pv = 0; -- opt_md_clear_hvm = 0; -+ opt_verw_pv = 0; -+ opt_verw_hvm = 0; - opt_ibpb_entry_pv = 0; - opt_ibpb_entry_hvm = 0; - opt_ibpb_entry_dom0 = false; -@@ -152,14 +152,14 @@ static int __init cf_check parse_spec_ctrl(const char *s) - { - opt_msr_sc_pv = val; - opt_rsb_pv = val; -- opt_md_clear_pv = val; -+ opt_verw_pv = val; - opt_ibpb_entry_pv = val; - } - else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) - { - opt_msr_sc_hvm = val; - opt_rsb_hvm = val; -- opt_md_clear_hvm = val; -+ opt_verw_hvm = val; - opt_ibpb_entry_hvm = val; - } - else if ( (val = parse_boolean("msr-sc", s, ss)) != -1 ) -@@ -204,21 +204,22 @@ static int __init cf_check parse_spec_ctrl(const char *s) - break; - } - } -- else if ( (val = parse_boolean("md-clear", s, ss)) != -1 ) -+ else if ( (val = parse_boolean("verw", s, ss)) != -1 || -+ (val = parse_boolean("md-clear", s, ss)) != -1 ) - { - switch ( val ) - { - case 0: - case 1: -- opt_md_clear_pv = opt_md_clear_hvm = val; -+ opt_verw_pv = opt_verw_hvm = val; - break; - - case -2: -- s += strlen("md-clear="); -+ s += (*s == 'v') ? strlen("verw=") : strlen("md-clear="); - if ( (val = parse_boolean("pv", s, ss)) >= 0 ) -- opt_md_clear_pv = val; -+ opt_verw_pv = val; - else if ( (val = parse_boolean("hvm", s, ss)) >= 0 ) -- opt_md_clear_hvm = val; -+ opt_verw_hvm = val; - else - default: - rc = -EINVAL; -@@ -540,8 +541,8 @@ static void __init print_details(enum ind_thunk thunk) - opt_srb_lock ? " SRB_LOCK+" : " SRB_LOCK-", - opt_ibpb_ctxt_switch ? " IBPB-ctxt" : "", - opt_l1d_flush ? " L1D_FLUSH" : "", -- opt_md_clear_pv || opt_md_clear_hvm || -- opt_fb_clear_mmio ? " VERW" : "", -+ opt_verw_pv || opt_verw_hvm || -+ opt_verw_mmio ? " VERW" : "", - opt_div_scrub ? " DIV" : "", - opt_branch_harden ? " BRANCH_HARDEN" : ""); - -@@ -562,13 +563,13 @@ static void __init print_details(enum ind_thunk thunk) - boot_cpu_has(X86_FEATURE_SC_RSB_HVM) || - boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) || - amd_virt_spec_ctrl || -- opt_eager_fpu || opt_md_clear_hvm) ? "" : " None", -+ opt_eager_fpu || opt_verw_hvm) ? "" : " None", - boot_cpu_has(X86_FEATURE_SC_MSR_HVM) ? " MSR_SPEC_CTRL" : "", - (boot_cpu_has(X86_FEATURE_SC_MSR_HVM) || - amd_virt_spec_ctrl) ? " MSR_VIRT_SPEC_CTRL" : "", - boot_cpu_has(X86_FEATURE_SC_RSB_HVM) ? " RSB" : "", - opt_eager_fpu ? " EAGER_FPU" : "", -- opt_md_clear_hvm ? " MD_CLEAR" : "", -+ opt_verw_hvm ? " VERW" : "", - boot_cpu_has(X86_FEATURE_IBPB_ENTRY_HVM) ? " IBPB-entry" : ""); - - #endif -@@ -577,11 +578,11 @@ static void __init print_details(enum ind_thunk thunk) - (boot_cpu_has(X86_FEATURE_SC_MSR_PV) || - boot_cpu_has(X86_FEATURE_SC_RSB_PV) || - boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) || -- opt_eager_fpu || opt_md_clear_pv) ? "" : " None", -+ opt_eager_fpu || opt_verw_pv) ? "" : " None", - boot_cpu_has(X86_FEATURE_SC_MSR_PV) ? " MSR_SPEC_CTRL" : "", - boot_cpu_has(X86_FEATURE_SC_RSB_PV) ? " RSB" : "", - opt_eager_fpu ? " EAGER_FPU" : "", -- opt_md_clear_pv ? " MD_CLEAR" : "", -+ opt_verw_pv ? " VERW" : "", - boot_cpu_has(X86_FEATURE_IBPB_ENTRY_PV) ? " IBPB-entry" : ""); - - printk(" XPTI (64-bit PV only): Dom0 %s, DomU %s (with%s PCID)\n", -@@ -1514,8 +1515,8 @@ void spec_ctrl_init_domain(struct domain *d) - { - bool pv = is_pv_domain(d); - -- bool verw = ((pv ? opt_md_clear_pv : opt_md_clear_hvm) || -- (opt_fb_clear_mmio && is_iommu_enabled(d))); -+ bool verw = ((pv ? opt_verw_pv : opt_verw_hvm) || -+ (opt_verw_mmio && is_iommu_enabled(d))); - - bool ibpb = ((pv ? opt_ibpb_entry_pv : opt_ibpb_entry_hvm) && - (d->domain_id != 0 || opt_ibpb_entry_dom0)); -@@ -1878,19 +1879,20 @@ void __init init_speculation_mitigations(void) - * the return-to-guest path. - */ - if ( opt_unpriv_mmio ) -- opt_fb_clear_mmio = cpu_has_fb_clear; -+ opt_verw_mmio = cpu_has_fb_clear; - - /* - * By default, enable PV and HVM mitigations on MDS-vulnerable hardware. - * This will only be a token effort for MLPDS/MFBDS when HT is enabled, - * but it is somewhat better than nothing. - */ -- if ( opt_md_clear_pv == -1 ) -- opt_md_clear_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && -- boot_cpu_has(X86_FEATURE_MD_CLEAR)); -- if ( opt_md_clear_hvm == -1 ) -- opt_md_clear_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && -- boot_cpu_has(X86_FEATURE_MD_CLEAR)); -+ if ( opt_verw_pv == -1 ) -+ opt_verw_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && -+ cpu_has_md_clear); -+ -+ if ( opt_verw_hvm == -1 ) -+ opt_verw_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && -+ cpu_has_md_clear); - - /* - * Enable MDS/MMIO defences as applicable. The Idle blocks need using if -@@ -1903,12 +1905,12 @@ void __init init_speculation_mitigations(void) - * MDS mitigations. L1D_FLUSH is not safe for MMIO mitigations.) - * - * After calculating the appropriate idle setting, simplify -- * opt_md_clear_hvm to mean just "should we VERW on the way into HVM -+ * opt_verw_hvm to mean just "should we VERW on the way into HVM - * guests", so spec_ctrl_init_domain() can calculate suitable settings. - */ -- if ( opt_md_clear_pv || opt_md_clear_hvm || opt_fb_clear_mmio ) -+ if ( opt_verw_pv || opt_verw_hvm || opt_verw_mmio ) - setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE); -- opt_md_clear_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush; -+ opt_verw_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush; - - /* - * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT --- -2.44.0 - diff --git a/0044-tools-xl-Open-xldevd.log-with-O_CLOEXEC.patch b/0044-tools-xl-Open-xldevd.log-with-O_CLOEXEC.patch new file mode 100644 index 0000000..39dc3eb --- /dev/null +++ b/0044-tools-xl-Open-xldevd.log-with-O_CLOEXEC.patch @@ -0,0 +1,53 @@ +From 2b3bf02c4f5e44d7d7bd3636530c9ebc837dea87 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Thu, 4 Jul 2024 14:11:36 +0200 +Subject: [PATCH 44/56] tools/xl: Open xldevd.log with O_CLOEXEC +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +`xl devd` has been observed leaking /var/log/xldevd.log into children. + +Note this is specifically safe; dup2() leaves O_CLOEXEC disabled on newfd, so +after setting up stdout/stderr, it's only the logfile fd which will close on +exec(). + +Link: https://github.com/QubesOS/qubes-issues/issues/8292 +Reported-by: Demi Marie Obenour <demi@invisiblethingslab.com> +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Marek Marczykowski-Górecki <marmarek@invisiblethingslab.com> +Reviewed-by: Demi Marie Obenour <demi@invisiblethingslab.com> +Acked-by: Anthony PERARD <anthony.perard@vates.tech> +master commit: ba52b3b624e4a1a976908552364eba924ca45430 +master date: 2024-06-24 16:22:59 +0100 +--- + tools/xl/xl_utils.c | 6 +++++- + 1 file changed, 5 insertions(+), 1 deletion(-) + +diff --git a/tools/xl/xl_utils.c b/tools/xl/xl_utils.c +index 17489d1829..b0d23b2cdb 100644 +--- a/tools/xl/xl_utils.c ++++ b/tools/xl/xl_utils.c +@@ -27,6 +27,10 @@ + #include "xl.h" + #include "xl_utils.h" + ++#ifndef O_CLOEXEC ++#define O_CLOEXEC 0 ++#endif ++ + void dolog(const char *file, int line, const char *func, const char *fmt, ...) + { + va_list ap; +@@ -270,7 +274,7 @@ int do_daemonize(const char *name, const char *pidfile) + exit(-1); + } + +- CHK_SYSCALL(logfile = open(fullname, O_WRONLY|O_CREAT|O_APPEND, 0644)); ++ CHK_SYSCALL(logfile = open(fullname, O_WRONLY | O_CREAT | O_APPEND | O_CLOEXEC, 0644)); + free(fullname); + assert(logfile >= 3); + +-- +2.45.2 + diff --git a/0044-x86-spec-ctrl-VERW-handling-adjustments.patch b/0044-x86-spec-ctrl-VERW-handling-adjustments.patch deleted file mode 100644 index e2458c9..0000000 --- a/0044-x86-spec-ctrl-VERW-handling-adjustments.patch +++ /dev/null @@ -1,171 +0,0 @@ -From 6663430b442fdf9698bd8e03f701a4547309ad71 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 5 Mar 2024 19:33:37 +0000 -Subject: [PATCH 44/67] x86/spec-ctrl: VERW-handling adjustments - -... before we add yet more complexity to this logic. Mostly expanded -comments, but with three minor changes. - -1) Introduce cpu_has_useful_md_clear to simplify later logic in this patch and - future ones. - -2) We only ever need SC_VERW_IDLE when SMT is active. If SMT isn't active, - then there's no re-partition of pipeline resources based on thread-idleness - to worry about. - -3) The logic to adjust HVM VERW based on L1D_FLUSH is unmaintainable and, as - it turns out, wrong. SKIP_L1DFL is just a hint bit, whereas opt_l1d_flush - is the relevant decision of whether to use L1D_FLUSH based on - susceptibility and user preference. - - Rewrite the logic so it can be followed, and incorporate the fact that when - FB_CLEAR is visible, L1D_FLUSH isn't a safe substitution. - -This is part of XSA-452 / CVE-2023-28746. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Acked-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit 1eb91a8a06230b4b64228c9a380194f8cfe6c5e2) ---- - xen/arch/x86/spec_ctrl.c | 99 +++++++++++++++++++++++++++++----------- - 1 file changed, 73 insertions(+), 26 deletions(-) - -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index e12ec9930c..adb6bc74e8 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -1531,7 +1531,7 @@ void __init init_speculation_mitigations(void) - { - enum ind_thunk thunk = THUNK_DEFAULT; - bool has_spec_ctrl, ibrs = false, hw_smt_enabled; -- bool cpu_has_bug_taa, retpoline_safe; -+ bool cpu_has_bug_taa, cpu_has_useful_md_clear, retpoline_safe; - - hw_smt_enabled = check_smt_enabled(); - -@@ -1867,50 +1867,97 @@ void __init init_speculation_mitigations(void) - "enabled. Please assess your configuration and choose an\n" - "explicit 'smt=<bool>' setting. See XSA-273.\n"); - -+ /* -+ * A brief summary of VERW-related changes. -+ * -+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/intel-analysis-microarchitectural-data-sampling.html -+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/processor-mmio-stale-data-vulnerabilities.html -+ * -+ * Relevant ucodes: -+ * -+ * - May 2019, for MDS. Introduces the MD_CLEAR CPUID bit and VERW side -+ * effects to scrub Store/Load/Fill buffers as applicable. MD_CLEAR -+ * exists architecturally, even when the side effects have been removed. -+ * -+ * Use VERW to scrub on return-to-guest. Parts with L1D_FLUSH to -+ * mitigate L1TF have the same side effect, so no need to do both. -+ * -+ * Various Atoms suffer from Store-buffer sampling only. Store buffers -+ * are statically partitioned between non-idle threads, so scrubbing is -+ * wanted when going idle too. -+ * -+ * Load ports and Fill buffers are competitively shared between threads. -+ * SMT must be disabled for VERW scrubbing to be fully effective. -+ * -+ * - November 2019, for TAA. Extended VERW side effects to TSX-enabled -+ * MDS_NO parts. -+ * -+ * - February 2022, for Client TSX de-feature. Removed VERW side effects -+ * from Client CPUs only. -+ * -+ * - May 2022, for MMIO Stale Data. (Re)introduced Fill Buffer scrubbing -+ * on all MMIO-affected parts which didn't already have it for MDS -+ * reasons, enumerating FB_CLEAR on those parts only. -+ * -+ * If FB_CLEAR is enumerated, L1D_FLUSH does not have the same scrubbing -+ * side effects as VERW and cannot be used in its place. -+ */ - mds_calculations(); - - /* -- * Parts which enumerate FB_CLEAR are those which are post-MDS_NO and have -- * reintroduced the VERW fill buffer flushing side effect because of a -- * susceptibility to FBSDP. -+ * Parts which enumerate FB_CLEAR are those with now-updated microcode -+ * which weren't susceptible to the original MFBDS (and therefore didn't -+ * have Fill Buffer scrubbing side effects to begin with, or were Client -+ * MDS_NO non-TAA_NO parts where the scrubbing was removed), but have had -+ * the scrubbing reintroduced because of a susceptibility to FBSDP. - * - * If unprivileged guests have (or will have) MMIO mappings, we can - * mitigate cross-domain leakage of fill buffer data by issuing VERW on -- * the return-to-guest path. -+ * the return-to-guest path. This is only a token effort if SMT is -+ * active. - */ - if ( opt_unpriv_mmio ) - opt_verw_mmio = cpu_has_fb_clear; - - /* -- * By default, enable PV and HVM mitigations on MDS-vulnerable hardware. -- * This will only be a token effort for MLPDS/MFBDS when HT is enabled, -- * but it is somewhat better than nothing. -+ * MD_CLEAR is enumerated architecturally forevermore, even after the -+ * scrubbing side effects have been removed. Create ourselves an version -+ * which expressed whether we think MD_CLEAR is having any useful side -+ * effect. -+ */ -+ cpu_has_useful_md_clear = (cpu_has_md_clear && -+ (cpu_has_bug_mds || cpu_has_bug_msbds_only)); -+ -+ /* -+ * By default, use VERW scrubbing on applicable hardware, if we think it's -+ * going to have an effect. This will only be a token effort for -+ * MLPDS/MFBDS when SMT is enabled. - */ - if ( opt_verw_pv == -1 ) -- opt_verw_pv = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && -- cpu_has_md_clear); -+ opt_verw_pv = cpu_has_useful_md_clear; - - if ( opt_verw_hvm == -1 ) -- opt_verw_hvm = ((cpu_has_bug_mds || cpu_has_bug_msbds_only) && -- cpu_has_md_clear); -+ opt_verw_hvm = cpu_has_useful_md_clear; - - /* -- * Enable MDS/MMIO defences as applicable. The Idle blocks need using if -- * either the PV or HVM MDS defences are used, or if we may give MMIO -- * access to untrusted guests. -- * -- * HVM is more complicated. The MD_CLEAR microcode extends L1D_FLUSH with -- * equivalent semantics to avoid needing to perform both flushes on the -- * HVM path. Therefore, we don't need VERW in addition to L1D_FLUSH (for -- * MDS mitigations. L1D_FLUSH is not safe for MMIO mitigations.) -- * -- * After calculating the appropriate idle setting, simplify -- * opt_verw_hvm to mean just "should we VERW on the way into HVM -- * guests", so spec_ctrl_init_domain() can calculate suitable settings. -+ * If SMT is active, and we're protecting against MDS or MMIO stale data, -+ * we need to scrub before going idle as well as on return to guest. -+ * Various pipeline resources are repartitioned amongst non-idle threads. - */ -- if ( opt_verw_pv || opt_verw_hvm || opt_verw_mmio ) -+ if ( ((cpu_has_useful_md_clear && (opt_verw_pv || opt_verw_hvm)) || -+ opt_verw_mmio) && hw_smt_enabled ) - setup_force_cpu_cap(X86_FEATURE_SC_VERW_IDLE); -- opt_verw_hvm &= !cpu_has_skip_l1dfl && !opt_l1d_flush; -+ -+ /* -+ * After calculating the appropriate idle setting, simplify opt_verw_hvm -+ * to mean just "should we VERW on the way into HVM guests", so -+ * spec_ctrl_init_domain() can calculate suitable settings. -+ * -+ * It is only safe to use L1D_FLUSH in place of VERW when MD_CLEAR is the -+ * only *_CLEAR we can see. -+ */ -+ if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear ) -+ opt_verw_hvm = false; - - /* - * Warn the user if they are on MLPDS/MFBDS-vulnerable hardware with HT --- -2.44.0 - diff --git a/0045-pirq_cleanup_check-leaks.patch b/0045-pirq_cleanup_check-leaks.patch new file mode 100644 index 0000000..dcf96c7 --- /dev/null +++ b/0045-pirq_cleanup_check-leaks.patch @@ -0,0 +1,84 @@ +From c9f50d2c5f29b630603e2b95f29e5b6e416a6187 Mon Sep 17 00:00:00 2001 +From: Jan Beulich <jbeulich@suse.com> +Date: Thu, 4 Jul 2024 14:11:57 +0200 +Subject: [PATCH 45/56] pirq_cleanup_check() leaks +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Its original introduction had two issues: For one the "common" part of +the checks (carried out in the macro) was inverted. And then after +removal from the radix tree the structure wasn't scheduled for freeing. +(All structures still left in the radix tree would be freed upon domain +destruction, though.) + +For the freeing to be safe even if it didn't use RCU (i.e. to avoid use- +after-free), re-arrange checks/operations in evtchn_close(), such that +the pointer wouldn't be used anymore after calling pirq_cleanup_check() +(noting that unmap_domain_pirq_emuirq() itself calls the function in the +success case). + +Fixes: c24536b636f2 ("replace d->nr_pirqs sized arrays with radix tree") +Fixes: 79858fee307c ("xen: fix hvm_domain_use_pirq's behavior") +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> +master commit: daa90dfea9175c07f13d1a2d901857b2dd14d080 +master date: 2024-07-02 08:35:56 +0200 +--- + xen/arch/x86/irq.c | 1 + + xen/common/event_channel.c | 11 ++++++++--- + xen/include/xen/irq.h | 2 +- + 3 files changed, 10 insertions(+), 4 deletions(-) + +diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c +index 290f8d26e7..00be3b88e8 100644 +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -1413,6 +1413,7 @@ void (pirq_cleanup_check)(struct pirq *pirq, struct domain *d) + + if ( radix_tree_delete(&d->pirq_tree, pirq->pirq) != pirq ) + BUG(); ++ free_pirq_struct(pirq); + } + + /* Flush all ready EOIs from the top of this CPU's pending-EOI stack. */ +diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c +index 66f924a7b0..b1a6215c37 100644 +--- a/xen/common/event_channel.c ++++ b/xen/common/event_channel.c +@@ -705,11 +705,16 @@ int evtchn_close(struct domain *d1, int port1, bool guest) + if ( !is_hvm_domain(d1) ) + pirq_guest_unbind(d1, pirq); + pirq->evtchn = 0; +- pirq_cleanup_check(pirq, d1); + #ifdef CONFIG_X86 +- if ( is_hvm_domain(d1) && domain_pirq_to_irq(d1, pirq->pirq) > 0 ) +- unmap_domain_pirq_emuirq(d1, pirq->pirq); ++ if ( !is_hvm_domain(d1) || ++ domain_pirq_to_irq(d1, pirq->pirq) <= 0 || ++ unmap_domain_pirq_emuirq(d1, pirq->pirq) < 0 ) ++ /* ++ * The successful path of unmap_domain_pirq_emuirq() will have ++ * called pirq_cleanup_check() already. ++ */ + #endif ++ pirq_cleanup_check(pirq, d1); + } + unlink_pirq_port(chn1, d1->vcpu[chn1->notify_vcpu_id]); + break; +diff --git a/xen/include/xen/irq.h b/xen/include/xen/irq.h +index 65083135e1..5dcd2d8f0c 100644 +--- a/xen/include/xen/irq.h ++++ b/xen/include/xen/irq.h +@@ -180,7 +180,7 @@ extern struct pirq *pirq_get_info(struct domain *d, int pirq); + void pirq_cleanup_check(struct pirq *pirq, struct domain *d); + + #define pirq_cleanup_check(pirq, d) \ +- ((pirq)->evtchn ? pirq_cleanup_check(pirq, d) : (void)0) ++ (!(pirq)->evtchn ? pirq_cleanup_check(pirq, d) : (void)0) + + extern void pirq_guest_eoi(struct pirq *pirq); + extern void desc_guest_eoi(struct irq_desc *desc, struct pirq *pirq); +-- +2.45.2 + diff --git a/0045-x86-spec-ctrl-Mitigation-Register-File-Data-Sampling.patch b/0045-x86-spec-ctrl-Mitigation-Register-File-Data-Sampling.patch deleted file mode 100644 index 4a10524..0000000 --- a/0045-x86-spec-ctrl-Mitigation-Register-File-Data-Sampling.patch +++ /dev/null @@ -1,320 +0,0 @@ -From d85481135d87abbbf1feab18b749288fa08b65f2 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Thu, 22 Jun 2023 23:32:19 +0100 -Subject: [PATCH 45/67] x86/spec-ctrl: Mitigation Register File Data Sampling - -RFDS affects Atom cores, also branded E-cores, between the Goldmont and -Gracemont microarchitectures. This includes Alder Lake and Raptor Lake hybrid -clien systems which have a mix of Gracemont and other types of cores. - -Two new bits have been defined; RFDS_CLEAR to indicate VERW has more side -effets, and RFDS_NO to incidate that the system is unaffected. Plenty of -unaffected CPUs won't be getting RFDS_NO retrofitted in microcode, so we -synthesise it. Alder Lake and Raptor Lake Xeon-E's are unaffected due to -their platform configuration, and we must use the Hybrid CPUID bit to -distinguish them from their non-Xeon counterparts. - -Like MD_CLEAR and FB_CLEAR, RFDS_CLEAR needs OR-ing across a resource pool, so -set it in the max policies and reflect the host setting in default. - -This is part of XSA-452 / CVE-2023-28746. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit fb5b6f6744713410c74cfc12b7176c108e3c9a31) ---- - tools/misc/xen-cpuid.c | 5 +- - xen/arch/x86/cpu-policy.c | 5 + - xen/arch/x86/include/asm/cpufeature.h | 3 + - xen/arch/x86/include/asm/msr-index.h | 2 + - xen/arch/x86/spec_ctrl.c | 100 +++++++++++++++++++- - xen/include/public/arch-x86/cpufeatureset.h | 3 + - 6 files changed, 111 insertions(+), 7 deletions(-) - -diff --git a/tools/misc/xen-cpuid.c b/tools/misc/xen-cpuid.c -index aefc140d66..5ceea8be07 100644 ---- a/tools/misc/xen-cpuid.c -+++ b/tools/misc/xen-cpuid.c -@@ -172,7 +172,7 @@ static const char *const str_7d0[32] = - [ 8] = "avx512-vp2intersect", [ 9] = "srbds-ctrl", - [10] = "md-clear", [11] = "rtm-always-abort", - /* 12 */ [13] = "tsx-force-abort", -- [14] = "serialize", -+ [14] = "serialize", [15] = "hybrid", - [16] = "tsxldtrk", - [18] = "pconfig", - [20] = "cet-ibt", -@@ -237,7 +237,8 @@ static const char *const str_m10Al[32] = - [20] = "bhi-no", [21] = "xapic-status", - /* 22 */ [23] = "ovrclk-status", - [24] = "pbrsb-no", [25] = "gds-ctrl", -- [26] = "gds-no", -+ [26] = "gds-no", [27] = "rfds-no", -+ [28] = "rfds-clear", - }; - - static const char *const str_m10Ah[32] = -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index 7b875a7221..96c2cee1a8 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -444,6 +444,7 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs) - */ - __set_bit(X86_FEATURE_MD_CLEAR, fs); - __set_bit(X86_FEATURE_FB_CLEAR, fs); -+ __set_bit(X86_FEATURE_RFDS_CLEAR, fs); - - /* - * The Gather Data Sampling microcode mitigation (August 2023) has an -@@ -493,6 +494,10 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs) - if ( cpu_has_fb_clear ) - __set_bit(X86_FEATURE_FB_CLEAR, fs); - -+ __clear_bit(X86_FEATURE_RFDS_CLEAR, fs); -+ if ( cpu_has_rfds_clear ) -+ __set_bit(X86_FEATURE_RFDS_CLEAR, fs); -+ - /* - * The Gather Data Sampling microcode mitigation (August 2023) has an - * adverse performance impact on the CLWB instruction on SKX/CLX/CPX. -diff --git a/xen/arch/x86/include/asm/cpufeature.h b/xen/arch/x86/include/asm/cpufeature.h -index ec824e8954..a6b8af1296 100644 ---- a/xen/arch/x86/include/asm/cpufeature.h -+++ b/xen/arch/x86/include/asm/cpufeature.h -@@ -140,6 +140,7 @@ - #define cpu_has_rtm_always_abort boot_cpu_has(X86_FEATURE_RTM_ALWAYS_ABORT) - #define cpu_has_tsx_force_abort boot_cpu_has(X86_FEATURE_TSX_FORCE_ABORT) - #define cpu_has_serialize boot_cpu_has(X86_FEATURE_SERIALIZE) -+#define cpu_has_hybrid boot_cpu_has(X86_FEATURE_HYBRID) - #define cpu_has_avx512_fp16 boot_cpu_has(X86_FEATURE_AVX512_FP16) - #define cpu_has_arch_caps boot_cpu_has(X86_FEATURE_ARCH_CAPS) - -@@ -161,6 +162,8 @@ - #define cpu_has_rrsba boot_cpu_has(X86_FEATURE_RRSBA) - #define cpu_has_gds_ctrl boot_cpu_has(X86_FEATURE_GDS_CTRL) - #define cpu_has_gds_no boot_cpu_has(X86_FEATURE_GDS_NO) -+#define cpu_has_rfds_no boot_cpu_has(X86_FEATURE_RFDS_NO) -+#define cpu_has_rfds_clear boot_cpu_has(X86_FEATURE_RFDS_CLEAR) - - /* Synthesized. */ - #define cpu_has_arch_perfmon boot_cpu_has(X86_FEATURE_ARCH_PERFMON) -diff --git a/xen/arch/x86/include/asm/msr-index.h b/xen/arch/x86/include/asm/msr-index.h -index 6abf7bc34a..9b5f67711f 100644 ---- a/xen/arch/x86/include/asm/msr-index.h -+++ b/xen/arch/x86/include/asm/msr-index.h -@@ -88,6 +88,8 @@ - #define ARCH_CAPS_PBRSB_NO (_AC(1, ULL) << 24) - #define ARCH_CAPS_GDS_CTRL (_AC(1, ULL) << 25) - #define ARCH_CAPS_GDS_NO (_AC(1, ULL) << 26) -+#define ARCH_CAPS_RFDS_NO (_AC(1, ULL) << 27) -+#define ARCH_CAPS_RFDS_CLEAR (_AC(1, ULL) << 28) - - #define MSR_FLUSH_CMD 0x0000010b - #define FLUSH_CMD_L1D (_AC(1, ULL) << 0) -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index adb6bc74e8..1ee81e2dfe 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -24,6 +24,7 @@ - - #include <asm/amd.h> - #include <asm/hvm/svm/svm.h> -+#include <asm/intel-family.h> - #include <asm/microcode.h> - #include <asm/msr.h> - #include <asm/pv/domain.h> -@@ -447,7 +448,7 @@ static void __init print_details(enum ind_thunk thunk) - * Hardware read-only information, stating immunity to certain issues, or - * suggestions of which mitigation to use. - */ -- printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", -+ printk(" Hardware hints:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", - (caps & ARCH_CAPS_RDCL_NO) ? " RDCL_NO" : "", - (caps & ARCH_CAPS_EIBRS) ? " EIBRS" : "", - (caps & ARCH_CAPS_RSBA) ? " RSBA" : "", -@@ -463,6 +464,7 @@ static void __init print_details(enum ind_thunk thunk) - (caps & ARCH_CAPS_FB_CLEAR) ? " FB_CLEAR" : "", - (caps & ARCH_CAPS_PBRSB_NO) ? " PBRSB_NO" : "", - (caps & ARCH_CAPS_GDS_NO) ? " GDS_NO" : "", -+ (caps & ARCH_CAPS_RFDS_NO) ? " RFDS_NO" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS_ALWAYS)) ? " IBRS_ALWAYS" : "", - (e8b & cpufeat_mask(X86_FEATURE_STIBP_ALWAYS)) ? " STIBP_ALWAYS" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS_FAST)) ? " IBRS_FAST" : "", -@@ -473,7 +475,7 @@ static void __init print_details(enum ind_thunk thunk) - (e21a & cpufeat_mask(X86_FEATURE_SRSO_NO)) ? " SRSO_NO" : ""); - - /* Hardware features which need driving to mitigate issues. */ -- printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s\n", -+ printk(" Hardware features:%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n", - (e8b & cpufeat_mask(X86_FEATURE_IBPB)) || - (_7d0 & cpufeat_mask(X86_FEATURE_IBRSB)) ? " IBPB" : "", - (e8b & cpufeat_mask(X86_FEATURE_IBRS)) || -@@ -491,6 +493,7 @@ static void __init print_details(enum ind_thunk thunk) - (caps & ARCH_CAPS_TSX_CTRL) ? " TSX_CTRL" : "", - (caps & ARCH_CAPS_FB_CLEAR_CTRL) ? " FB_CLEAR_CTRL" : "", - (caps & ARCH_CAPS_GDS_CTRL) ? " GDS_CTRL" : "", -+ (caps & ARCH_CAPS_RFDS_CLEAR) ? " RFDS_CLEAR" : "", - (e21a & cpufeat_mask(X86_FEATURE_SBPB)) ? " SBPB" : ""); - - /* Compiled-in support which pertains to mitigations. */ -@@ -1359,6 +1362,83 @@ static __init void mds_calculations(void) - } - } - -+/* -+ * Register File Data Sampling affects Atom cores from the Goldmont to -+ * Gracemont microarchitectures. The March 2024 microcode adds RFDS_NO to -+ * some but not all unaffected parts, and RFDS_CLEAR to affected parts still -+ * in support. -+ * -+ * Alder Lake and Raptor Lake client CPUs have a mix of P cores -+ * (Golden/Raptor Cove, not vulnerable) and E cores (Gracemont, -+ * vulnerable), and both enumerate RFDS_CLEAR. -+ * -+ * Both exist in a Xeon SKU, which has the E cores (Gracemont) disabled by -+ * platform configuration, and enumerate RFDS_NO. -+ * -+ * With older parts, or with out-of-date microcode, synthesise RFDS_NO when -+ * safe to do so. -+ * -+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/register-file-data-sampling.html -+ */ -+static void __init rfds_calculations(void) -+{ -+ /* RFDS is only known to affect Intel Family 6 processors at this time. */ -+ if ( boot_cpu_data.x86_vendor != X86_VENDOR_INTEL || -+ boot_cpu_data.x86 != 6 ) -+ return; -+ -+ /* -+ * If RFDS_NO or RFDS_CLEAR are visible, we've either got suitable -+ * microcode, or an RFDS-aware hypervisor is levelling us in a pool. -+ */ -+ if ( cpu_has_rfds_no || cpu_has_rfds_clear ) -+ return; -+ -+ /* If we're virtualised, don't attempt to synthesise RFDS_NO. */ -+ if ( cpu_has_hypervisor ) -+ return; -+ -+ /* -+ * Not all CPUs are expected to get a microcode update enumerating one of -+ * RFDS_{NO,CLEAR}, or we might have out-of-date microcode. -+ */ -+ switch ( boot_cpu_data.x86_model ) -+ { -+ case INTEL_FAM6_ALDERLAKE: -+ case INTEL_FAM6_RAPTORLAKE: -+ /* -+ * Alder Lake and Raptor Lake might be a client SKU (with the -+ * Gracemont cores active, and therefore vulnerable) or might be a -+ * server SKU (with the Gracemont cores disabled, and therefore not -+ * vulnerable). -+ * -+ * See if the CPU identifies as hybrid to distinguish the two cases. -+ */ -+ if ( !cpu_has_hybrid ) -+ break; -+ fallthrough; -+ case INTEL_FAM6_ALDERLAKE_L: -+ case INTEL_FAM6_RAPTORLAKE_P: -+ case INTEL_FAM6_RAPTORLAKE_S: -+ -+ case INTEL_FAM6_ATOM_GOLDMONT: /* Apollo Lake */ -+ case INTEL_FAM6_ATOM_GOLDMONT_D: /* Denverton */ -+ case INTEL_FAM6_ATOM_GOLDMONT_PLUS: /* Gemini Lake */ -+ case INTEL_FAM6_ATOM_TREMONT_D: /* Snow Ridge / Parker Ridge */ -+ case INTEL_FAM6_ATOM_TREMONT: /* Elkhart Lake */ -+ case INTEL_FAM6_ATOM_TREMONT_L: /* Jasper Lake */ -+ case INTEL_FAM6_ATOM_GRACEMONT: /* Alder Lake N */ -+ return; -+ } -+ -+ /* -+ * We appear to be on an unaffected CPU which didn't enumerate RFDS_NO, -+ * perhaps because of it's age or because of out-of-date microcode. -+ * Synthesise it. -+ */ -+ setup_force_cpu_cap(X86_FEATURE_RFDS_NO); -+} -+ - static bool __init cpu_has_gds(void) - { - /* -@@ -1872,6 +1952,7 @@ void __init init_speculation_mitigations(void) - * - * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/intel-analysis-microarchitectural-data-sampling.html - * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/technical-documentation/processor-mmio-stale-data-vulnerabilities.html -+ * https://www.intel.com/content/www/us/en/developer/articles/technical/software-security-guidance/advisory-guidance/register-file-data-sampling.html - * - * Relevant ucodes: - * -@@ -1901,8 +1982,12 @@ void __init init_speculation_mitigations(void) - * - * If FB_CLEAR is enumerated, L1D_FLUSH does not have the same scrubbing - * side effects as VERW and cannot be used in its place. -+ * -+ * - March 2023, for RFDS. Enumerate RFDS_CLEAR to mean that VERW now -+ * scrubs non-architectural entries from certain register files. - */ - mds_calculations(); -+ rfds_calculations(); - - /* - * Parts which enumerate FB_CLEAR are those with now-updated microcode -@@ -1934,15 +2019,19 @@ void __init init_speculation_mitigations(void) - * MLPDS/MFBDS when SMT is enabled. - */ - if ( opt_verw_pv == -1 ) -- opt_verw_pv = cpu_has_useful_md_clear; -+ opt_verw_pv = cpu_has_useful_md_clear || cpu_has_rfds_clear; - - if ( opt_verw_hvm == -1 ) -- opt_verw_hvm = cpu_has_useful_md_clear; -+ opt_verw_hvm = cpu_has_useful_md_clear || cpu_has_rfds_clear; - - /* - * If SMT is active, and we're protecting against MDS or MMIO stale data, - * we need to scrub before going idle as well as on return to guest. - * Various pipeline resources are repartitioned amongst non-idle threads. -+ * -+ * We don't need to scrub on idle for RFDS. There are no affected cores -+ * which support SMT, despite there being affected cores in hybrid systems -+ * which have SMT elsewhere in the platform. - */ - if ( ((cpu_has_useful_md_clear && (opt_verw_pv || opt_verw_hvm)) || - opt_verw_mmio) && hw_smt_enabled ) -@@ -1956,7 +2045,8 @@ void __init init_speculation_mitigations(void) - * It is only safe to use L1D_FLUSH in place of VERW when MD_CLEAR is the - * only *_CLEAR we can see. - */ -- if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear ) -+ if ( opt_l1d_flush && cpu_has_md_clear && !cpu_has_fb_clear && -+ !cpu_has_rfds_clear ) - opt_verw_hvm = false; - - /* -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index aec1407613..113e6cadc1 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -264,6 +264,7 @@ XEN_CPUFEATURE(MD_CLEAR, 9*32+10) /*!A VERW clears microarchitectural buffe - XEN_CPUFEATURE(RTM_ALWAYS_ABORT, 9*32+11) /*! June 2021 TSX defeaturing in microcode. */ - XEN_CPUFEATURE(TSX_FORCE_ABORT, 9*32+13) /* MSR_TSX_FORCE_ABORT.RTM_ABORT */ - XEN_CPUFEATURE(SERIALIZE, 9*32+14) /*A SERIALIZE insn */ -+XEN_CPUFEATURE(HYBRID, 9*32+15) /* Heterogeneous platform */ - XEN_CPUFEATURE(TSXLDTRK, 9*32+16) /*a TSX load tracking suspend/resume insns */ - XEN_CPUFEATURE(CET_IBT, 9*32+20) /* CET - Indirect Branch Tracking */ - XEN_CPUFEATURE(AVX512_FP16, 9*32+23) /* AVX512 FP16 instructions */ -@@ -330,6 +331,8 @@ XEN_CPUFEATURE(OVRCLK_STATUS, 16*32+23) /* MSR_OVERCLOCKING_STATUS */ - XEN_CPUFEATURE(PBRSB_NO, 16*32+24) /*A No Post-Barrier RSB predictions */ - XEN_CPUFEATURE(GDS_CTRL, 16*32+25) /* MCU_OPT_CTRL.GDS_MIT_{DIS,LOCK} */ - XEN_CPUFEATURE(GDS_NO, 16*32+26) /*A No Gather Data Sampling */ -+XEN_CPUFEATURE(RFDS_NO, 16*32+27) /*A No Register File Data Sampling */ -+XEN_CPUFEATURE(RFDS_CLEAR, 16*32+28) /*!A Register File(s) cleared by VERW */ - - /* Intel-defined CPU features, MSR_ARCH_CAPS 0x10a.edx, word 17 */ - --- -2.44.0 - diff --git a/0046-tools-dombuilder-Correct-the-length-calculation-in-x.patch b/0046-tools-dombuilder-Correct-the-length-calculation-in-x.patch new file mode 100644 index 0000000..b25f15d --- /dev/null +++ b/0046-tools-dombuilder-Correct-the-length-calculation-in-x.patch @@ -0,0 +1,44 @@ +From 8e51c8f1d45fad242a315fa17ba3582c02e66840 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Thu, 4 Jul 2024 14:12:31 +0200 +Subject: [PATCH 46/56] tools/dombuilder: Correct the length calculation in + xc_dom_alloc_segment() + +xc_dom_alloc_segment() is passed a size in bytes, calculates a size in pages +from it, then fills in the new segment information with a bytes value +re-calculated from the number of pages. + +This causes the module information given to the guest (MB, or PVH) to have +incorrect sizes; specifically, sizes rounded up to the next page. + +This in turn is problematic for Xen. When Xen finds a gzipped module, it +peeks at the end metadata to judge the decompressed size, which is a -4 +backreference from the reported end of the module. + +Fill in seg->vend using the correct number of bytes. + +Fixes: ea7c8a3d0e82 ("libxc: reorganize domain builder guest memory allocator") +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Acked-by: Anthony PERARD <anthony.perard@vates.tech> +master commit: 4c3a618b0adaa0cd59e0fa0898bb60978b8b3a5f +master date: 2024-07-02 10:50:18 +0100 +--- + tools/libs/guest/xg_dom_core.c | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/tools/libs/guest/xg_dom_core.c b/tools/libs/guest/xg_dom_core.c +index c4f4e7f3e2..f5521d528b 100644 +--- a/tools/libs/guest/xg_dom_core.c ++++ b/tools/libs/guest/xg_dom_core.c +@@ -601,7 +601,7 @@ int xc_dom_alloc_segment(struct xc_dom_image *dom, + memset(ptr, 0, pages * page_size); + + seg->vstart = start; +- seg->vend = dom->virt_alloc_end; ++ seg->vend = start + size; + + DOMPRINTF("%-20s: %-12s : 0x%" PRIx64 " -> 0x%" PRIx64 + " (pfn 0x%" PRIpfn " + 0x%" PRIpfn " pages)", +-- +2.45.2 + diff --git a/0046-x86-paging-Delete-update_cr3-s-do_locking-parameter.patch b/0046-x86-paging-Delete-update_cr3-s-do_locking-parameter.patch deleted file mode 100644 index ce397a1..0000000 --- a/0046-x86-paging-Delete-update_cr3-s-do_locking-parameter.patch +++ /dev/null @@ -1,161 +0,0 @@ -From bf70ce8b3449c49eb828d5b1f4934a49b00fef35 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Wed, 20 Sep 2023 20:06:53 +0100 -Subject: [PATCH 46/67] x86/paging: Delete update_cr3()'s do_locking parameter - -Nicola reports that the XSA-438 fix introduced new MISRA violations because of -some incidental tidying it tried to do. The parameter is useless, so resolve -the MISRA regression by removing it. - -hap_update_cr3() discards the parameter entirely, while sh_update_cr3() uses -it to distinguish internal and external callers and therefore whether the -paging lock should be taken. - -However, we have paging_lock_recursive() for this purpose, which also avoids -the ability for the shadow internal callers to accidentally not hold the lock. - -Fixes: fb0ff49fe9f7 ("x86/shadow: defer releasing of PV's top-level shadow reference") -Reported-by: Nicola Vetrini <nicola.vetrini@bugseng.com> -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -Release-acked-by: Henry Wang <Henry.Wang@arm.com> -(cherry picked from commit e71157d1ac2a7fbf413130663cf0a93ff9fbcf7e) ---- - xen/arch/x86/include/asm/paging.h | 5 ++--- - xen/arch/x86/mm/hap/hap.c | 5 ++--- - xen/arch/x86/mm/shadow/common.c | 2 +- - xen/arch/x86/mm/shadow/multi.c | 17 ++++++++--------- - xen/arch/x86/mm/shadow/none.c | 3 +-- - 5 files changed, 14 insertions(+), 18 deletions(-) - -diff --git a/xen/arch/x86/include/asm/paging.h b/xen/arch/x86/include/asm/paging.h -index 94c590f31a..809ff35d9a 100644 ---- a/xen/arch/x86/include/asm/paging.h -+++ b/xen/arch/x86/include/asm/paging.h -@@ -138,8 +138,7 @@ struct paging_mode { - paddr_t ga, uint32_t *pfec, - unsigned int *page_order); - #endif -- pagetable_t (*update_cr3 )(struct vcpu *v, bool do_locking, -- bool noflush); -+ pagetable_t (*update_cr3 )(struct vcpu *v, bool noflush); - void (*update_paging_modes )(struct vcpu *v); - bool (*flush_tlb )(const unsigned long *vcpu_bitmap); - -@@ -312,7 +311,7 @@ static inline unsigned long paging_ga_to_gfn_cr3(struct vcpu *v, - * as the value to load into the host CR3 to schedule this vcpu */ - static inline pagetable_t paging_update_cr3(struct vcpu *v, bool noflush) - { -- return paging_get_hostmode(v)->update_cr3(v, 1, noflush); -+ return paging_get_hostmode(v)->update_cr3(v, noflush); - } - - /* Update all the things that are derived from the guest's CR0/CR3/CR4. -diff --git a/xen/arch/x86/mm/hap/hap.c b/xen/arch/x86/mm/hap/hap.c -index 57a19c3d59..3ad39a7dd7 100644 ---- a/xen/arch/x86/mm/hap/hap.c -+++ b/xen/arch/x86/mm/hap/hap.c -@@ -739,8 +739,7 @@ static bool cf_check hap_invlpg(struct vcpu *v, unsigned long linear) - return 1; - } - --static pagetable_t cf_check hap_update_cr3( -- struct vcpu *v, bool do_locking, bool noflush) -+static pagetable_t cf_check hap_update_cr3(struct vcpu *v, bool noflush) - { - v->arch.hvm.hw_cr[3] = v->arch.hvm.guest_cr[3]; - hvm_update_guest_cr3(v, noflush); -@@ -826,7 +825,7 @@ static void cf_check hap_update_paging_modes(struct vcpu *v) - } - - /* CR3 is effectively updated by a mode change. Flush ASIDs, etc. */ -- hap_update_cr3(v, 0, false); -+ hap_update_cr3(v, false); - - unlock: - paging_unlock(d); -diff --git a/xen/arch/x86/mm/shadow/common.c b/xen/arch/x86/mm/shadow/common.c -index c0940f939e..18714dbd02 100644 ---- a/xen/arch/x86/mm/shadow/common.c -+++ b/xen/arch/x86/mm/shadow/common.c -@@ -2579,7 +2579,7 @@ static void sh_update_paging_modes(struct vcpu *v) - } - #endif /* OOS */ - -- v->arch.paging.mode->update_cr3(v, 0, false); -+ v->arch.paging.mode->update_cr3(v, false); - } - - void cf_check shadow_update_paging_modes(struct vcpu *v) -diff --git a/xen/arch/x86/mm/shadow/multi.c b/xen/arch/x86/mm/shadow/multi.c -index c92b354a78..e54a507b54 100644 ---- a/xen/arch/x86/mm/shadow/multi.c -+++ b/xen/arch/x86/mm/shadow/multi.c -@@ -2506,7 +2506,7 @@ static int cf_check sh_page_fault( - * In any case, in the PAE case, the ASSERT is not true; it can - * happen because of actions the guest is taking. */ - #if GUEST_PAGING_LEVELS == 3 -- v->arch.paging.mode->update_cr3(v, 0, false); -+ v->arch.paging.mode->update_cr3(v, false); - #else - ASSERT(d->is_shutting_down); - #endif -@@ -3224,17 +3224,13 @@ static void cf_check sh_detach_old_tables(struct vcpu *v) - } - } - --static pagetable_t cf_check sh_update_cr3(struct vcpu *v, bool do_locking, -- bool noflush) -+static pagetable_t cf_check sh_update_cr3(struct vcpu *v, bool noflush) - /* Updates vcpu->arch.cr3 after the guest has changed CR3. - * Paravirtual guests should set v->arch.guest_table (and guest_table_user, - * if appropriate). - * HVM guests should also make sure hvm_get_guest_cntl_reg(v, 3) works; - * this function will call hvm_update_guest_cr(v, 3) to tell them where the - * shadow tables are. -- * If do_locking != 0, assume we are being called from outside the -- * shadow code, and must take and release the paging lock; otherwise -- * that is the caller's responsibility. - */ - { - struct domain *d = v->domain; -@@ -3252,7 +3248,11 @@ static pagetable_t cf_check sh_update_cr3(struct vcpu *v, bool do_locking, - return old_entry; - } - -- if ( do_locking ) paging_lock(v->domain); -+ /* -+ * This is used externally (with the paging lock not taken) and internally -+ * by the shadow code (with the lock already taken). -+ */ -+ paging_lock_recursive(v->domain); - - #if (SHADOW_OPTIMIZATIONS & SHOPT_OUT_OF_SYNC) - /* Need to resync all the shadow entries on a TLB flush. Resync -@@ -3480,8 +3480,7 @@ static pagetable_t cf_check sh_update_cr3(struct vcpu *v, bool do_locking, - shadow_sync_other_vcpus(v); - #endif - -- /* Release the lock, if we took it (otherwise it's the caller's problem) */ -- if ( do_locking ) paging_unlock(v->domain); -+ paging_unlock(v->domain); - - return old_entry; - } -diff --git a/xen/arch/x86/mm/shadow/none.c b/xen/arch/x86/mm/shadow/none.c -index 743c0ffb85..7e4e386cd0 100644 ---- a/xen/arch/x86/mm/shadow/none.c -+++ b/xen/arch/x86/mm/shadow/none.c -@@ -52,8 +52,7 @@ static unsigned long cf_check _gva_to_gfn( - } - #endif - --static pagetable_t cf_check _update_cr3(struct vcpu *v, bool do_locking, -- bool noflush) -+static pagetable_t cf_check _update_cr3(struct vcpu *v, bool noflush) - { - ASSERT_UNREACHABLE(); - return pagetable_null(); --- -2.44.0 - diff --git a/0047-tools-libxs-Fix-CLOEXEC-handling-in-get_dev.patch b/0047-tools-libxs-Fix-CLOEXEC-handling-in-get_dev.patch new file mode 100644 index 0000000..aabae58 --- /dev/null +++ b/0047-tools-libxs-Fix-CLOEXEC-handling-in-get_dev.patch @@ -0,0 +1,95 @@ +From d1b3bbb46402af77089906a97c413c14ed1740d2 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Thu, 4 Jul 2024 14:13:10 +0200 +Subject: [PATCH 47/56] tools/libxs: Fix CLOEXEC handling in get_dev() + +Move the O_CLOEXEC compatibility outside of an #ifdef USE_PTHREAD block. + +Introduce set_cloexec() to wrap fcntl() setting FD_CLOEXEC. It will be reused +for other CLOEXEC fixes too. + +Use set_cloexec() when O_CLOEXEC isn't available as a best-effort fallback. + +Fixes: f4f2f3402b2f ("tools/libxs: Open /dev/xen/xenbus fds as O_CLOEXEC") +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Juergen Gross <jgross@suse.com> +Acked-by: Anthony PERARD <anthony.perard@vates.tech> +master commit: bf7c1464706adfa903f1e7d59383d042c3a88e39 +master date: 2024-07-02 10:51:06 +0100 +--- + tools/libs/store/xs.c | 38 ++++++++++++++++++++++++++++++++------ + 1 file changed, 32 insertions(+), 6 deletions(-) + +diff --git a/tools/libs/store/xs.c b/tools/libs/store/xs.c +index 1498515073..037e79d98b 100644 +--- a/tools/libs/store/xs.c ++++ b/tools/libs/store/xs.c +@@ -40,6 +40,10 @@ + #include <xentoolcore_internal.h> + #include <xen_list.h> + ++#ifndef O_CLOEXEC ++#define O_CLOEXEC 0 ++#endif ++ + struct xs_stored_msg { + XEN_TAILQ_ENTRY(struct xs_stored_msg) list; + struct xsd_sockmsg hdr; +@@ -54,10 +58,6 @@ struct xs_stored_msg { + #include <dlfcn.h> + #endif + +-#ifndef O_CLOEXEC +-#define O_CLOEXEC 0 +-#endif +- + struct xs_handle { + /* Communications channel to xenstore daemon. */ + int fd; +@@ -176,6 +176,16 @@ static bool setnonblock(int fd, int nonblock) { + return true; + } + ++static bool set_cloexec(int fd) ++{ ++ int flags = fcntl(fd, F_GETFL); ++ ++ if (flags < 0) ++ return false; ++ ++ return fcntl(fd, flags | FD_CLOEXEC) >= 0; ++} ++ + int xs_fileno(struct xs_handle *h) + { + char c = 0; +@@ -230,8 +240,24 @@ error: + + static int get_dev(const char *connect_to) + { +- /* We cannot open read-only because requests are writes */ +- return open(connect_to, O_RDWR | O_CLOEXEC); ++ int fd, saved_errno; ++ ++ fd = open(connect_to, O_RDWR | O_CLOEXEC); ++ if (fd < 0) ++ return -1; ++ ++ /* Compat for non-O_CLOEXEC environments. Racy. */ ++ if (!O_CLOEXEC && !set_cloexec(fd)) ++ goto error; ++ ++ return fd; ++ ++error: ++ saved_errno = errno; ++ close(fd); ++ errno = saved_errno; ++ ++ return -1; + } + + static int all_restrict_cb(Xentoolcore__Active_Handle *ah, domid_t domid) { +-- +2.45.2 + diff --git a/0047-xen-Swap-order-of-actions-in-the-FREE-macros.patch b/0047-xen-Swap-order-of-actions-in-the-FREE-macros.patch deleted file mode 100644 index 3e58906..0000000 --- a/0047-xen-Swap-order-of-actions-in-the-FREE-macros.patch +++ /dev/null @@ -1,58 +0,0 @@ -From 0a53565f1886201cc8a8afe9b2619ee297c20955 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Fri, 2 Feb 2024 00:39:42 +0000 -Subject: [PATCH 47/67] xen: Swap order of actions in the FREE*() macros - -Wherever possible, it is a good idea to NULL out the visible reference to an -object prior to freeing it. The FREE*() macros already collect together both -parts, making it easy to adjust. - -This has a marginal code generation improvement, as some of the calls to the -free() function can be tailcall optimised. - -No functional change. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Acked-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit c4f427ec879e7c0df6d44d02561e8bee838a293e) ---- - xen/include/xen/mm.h | 3 ++- - xen/include/xen/xmalloc.h | 7 ++++--- - 2 files changed, 6 insertions(+), 4 deletions(-) - -diff --git a/xen/include/xen/mm.h b/xen/include/xen/mm.h -index 3dc61bcc3c..211685a5d2 100644 ---- a/xen/include/xen/mm.h -+++ b/xen/include/xen/mm.h -@@ -80,8 +80,9 @@ bool scrub_free_pages(void); - - /* Free an allocation, and zero the pointer to it. */ - #define FREE_XENHEAP_PAGES(p, o) do { \ -- free_xenheap_pages(p, o); \ -+ void *_ptr_ = (p); \ - (p) = NULL; \ -+ free_xenheap_pages(_ptr_, o); \ - } while ( false ) - #define FREE_XENHEAP_PAGE(p) FREE_XENHEAP_PAGES(p, 0) - -diff --git a/xen/include/xen/xmalloc.h b/xen/include/xen/xmalloc.h -index 16979a117c..d857298011 100644 ---- a/xen/include/xen/xmalloc.h -+++ b/xen/include/xen/xmalloc.h -@@ -66,9 +66,10 @@ - extern void xfree(void *); - - /* Free an allocation, and zero the pointer to it. */ --#define XFREE(p) do { \ -- xfree(p); \ -- (p) = NULL; \ -+#define XFREE(p) do { \ -+ void *_ptr_ = (p); \ -+ (p) = NULL; \ -+ xfree(_ptr_); \ - } while ( false ) - - /* Underlying functions */ --- -2.44.0 - diff --git a/0048-tools-libxs-Fix-CLOEXEC-handling-in-get_socket.patch b/0048-tools-libxs-Fix-CLOEXEC-handling-in-get_socket.patch new file mode 100644 index 0000000..e01a6b4 --- /dev/null +++ b/0048-tools-libxs-Fix-CLOEXEC-handling-in-get_socket.patch @@ -0,0 +1,60 @@ +From d689bb4d2cd3ccdb0067b0ca953cccbc5ab375ae Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Thu, 4 Jul 2024 14:13:18 +0200 +Subject: [PATCH 48/56] tools/libxs: Fix CLOEXEC handling in get_socket() + +get_socket() opens a socket, then uses fcntl() to set CLOEXEC. This is racy +with exec(). + +Open the socket with SOCK_CLOEXEC. Use the same compatibility strategy as +O_CLOEXEC on ancient versions of Linux. + +Reported-by: Frediano Ziglio <frediano.ziglio@cloud.com> +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Juergen Gross <jgross@suse.com> +Acked-by: Anthony PERARD <anthony.perard@vates.tech> +master commit: 1957dd6aff931877fc22699d8f2d4be8728014ba +master date: 2024-07-02 10:51:11 +0100 +--- + tools/libs/store/xs.c | 14 ++++++++------ + 1 file changed, 8 insertions(+), 6 deletions(-) + +diff --git a/tools/libs/store/xs.c b/tools/libs/store/xs.c +index 037e79d98b..11a766c508 100644 +--- a/tools/libs/store/xs.c ++++ b/tools/libs/store/xs.c +@@ -44,6 +44,10 @@ + #define O_CLOEXEC 0 + #endif + ++#ifndef SOCK_CLOEXEC ++#define SOCK_CLOEXEC 0 ++#endif ++ + struct xs_stored_msg { + XEN_TAILQ_ENTRY(struct xs_stored_msg) list; + struct xsd_sockmsg hdr; +@@ -207,16 +211,14 @@ int xs_fileno(struct xs_handle *h) + static int get_socket(const char *connect_to) + { + struct sockaddr_un addr; +- int sock, saved_errno, flags; ++ int sock, saved_errno; + +- sock = socket(PF_UNIX, SOCK_STREAM, 0); ++ sock = socket(PF_UNIX, SOCK_STREAM | SOCK_CLOEXEC, 0); + if (sock < 0) + return -1; + +- if ((flags = fcntl(sock, F_GETFD)) < 0) +- goto error; +- flags |= FD_CLOEXEC; +- if (fcntl(sock, F_SETFD, flags) < 0) ++ /* Compat for non-SOCK_CLOEXEC environments. Racy. */ ++ if (!SOCK_CLOEXEC && !set_cloexec(sock)) + goto error; + + addr.sun_family = AF_UNIX; +-- +2.45.2 + diff --git a/0048-x86-spinlock-introduce-support-for-blocking-speculat.patch b/0048-x86-spinlock-introduce-support-for-blocking-speculat.patch deleted file mode 100644 index ecf0830..0000000 --- a/0048-x86-spinlock-introduce-support-for-blocking-speculat.patch +++ /dev/null @@ -1,331 +0,0 @@ -From 9d2f136328aab5537b7180a1b23e171893ebe455 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 13 Feb 2024 13:08:05 +0100 -Subject: [PATCH 48/67] x86/spinlock: introduce support for blocking - speculation into critical regions -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Introduce a new Kconfig option to block speculation into lock protected -critical regions. The Kconfig option is enabled by default, but the mitigation -won't be engaged unless it's explicitly enabled in the command line using -`spec-ctrl=lock-harden`. - -Convert the spinlock acquire macros into always-inline functions, and introduce -a speculation barrier after the lock has been taken. Note the speculation -barrier is not placed inside the implementation of the spin lock functions, as -to prevent speculation from falling through the call to the lock functions -resulting in the barrier also being skipped. - -trylock variants are protected using a construct akin to the existing -evaluate_nospec(). - -This patch only implements the speculation barrier for x86. - -Note spin locks are the only locking primitive taken care in this change, -further locking primitives will be adjusted by separate changes. - -This is part of XSA-453 / CVE-2024-2193 - -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit 7ef0084418e188d05f338c3e028fbbe8b6924afa) ---- - docs/misc/xen-command-line.pandoc | 7 ++++- - xen/arch/x86/include/asm/cpufeatures.h | 2 +- - xen/arch/x86/include/asm/nospec.h | 26 ++++++++++++++++++ - xen/arch/x86/spec_ctrl.c | 26 +++++++++++++++--- - xen/common/Kconfig | 17 ++++++++++++ - xen/include/xen/nospec.h | 15 +++++++++++ - xen/include/xen/spinlock.h | 37 +++++++++++++++++++++----- - 7 files changed, 119 insertions(+), 11 deletions(-) - -diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc -index d909ec94fe..e1d56407dd 100644 ---- a/docs/misc/xen-command-line.pandoc -+++ b/docs/misc/xen-command-line.pandoc -@@ -2327,7 +2327,7 @@ By default SSBD will be mitigated at runtime (i.e `ssbd=runtime`). - > {msr-sc,rsb,verw,ibpb-entry}=<bool>|{pv,hvm}=<bool>, - > bti-thunk=retpoline|lfence|jmp, {ibrs,ibpb,ssbd,psfd, - > eager-fpu,l1d-flush,branch-harden,srb-lock, --> unpriv-mmio,gds-mit,div-scrub}=<bool> ]` -+> unpriv-mmio,gds-mit,div-scrub,lock-harden}=<bool> ]` - - Controls for speculative execution sidechannel mitigations. By default, Xen - will pick the most appropriate mitigations based on compiled in support, -@@ -2454,6 +2454,11 @@ On all hardware, the `div-scrub=` option can be used to force or prevent Xen - from mitigating the DIV-leakage vulnerability. By default, Xen will mitigate - DIV-leakage on hardware believed to be vulnerable. - -+If Xen is compiled with `CONFIG_SPECULATIVE_HARDEN_LOCK`, the `lock-harden=` -+boolean can be used to force or prevent Xen from using speculation barriers to -+protect lock critical regions. This mitigation won't be engaged by default, -+and needs to be explicitly enabled on the command line. -+ - ### sync_console - > `= <boolean>` - -diff --git a/xen/arch/x86/include/asm/cpufeatures.h b/xen/arch/x86/include/asm/cpufeatures.h -index c3aad21c3b..7e8221fd85 100644 ---- a/xen/arch/x86/include/asm/cpufeatures.h -+++ b/xen/arch/x86/include/asm/cpufeatures.h -@@ -24,7 +24,7 @@ XEN_CPUFEATURE(APERFMPERF, X86_SYNTH( 8)) /* APERFMPERF */ - XEN_CPUFEATURE(MFENCE_RDTSC, X86_SYNTH( 9)) /* MFENCE synchronizes RDTSC */ - XEN_CPUFEATURE(XEN_SMEP, X86_SYNTH(10)) /* SMEP gets used by Xen itself */ - XEN_CPUFEATURE(XEN_SMAP, X86_SYNTH(11)) /* SMAP gets used by Xen itself */ --/* Bit 12 unused. */ -+XEN_CPUFEATURE(SC_NO_LOCK_HARDEN, X86_SYNTH(12)) /* (Disable) Lock critical region hardening */ - XEN_CPUFEATURE(IND_THUNK_LFENCE, X86_SYNTH(13)) /* Use IND_THUNK_LFENCE */ - XEN_CPUFEATURE(IND_THUNK_JMP, X86_SYNTH(14)) /* Use IND_THUNK_JMP */ - XEN_CPUFEATURE(SC_NO_BRANCH_HARDEN, X86_SYNTH(15)) /* (Disable) Conditional branch hardening */ -diff --git a/xen/arch/x86/include/asm/nospec.h b/xen/arch/x86/include/asm/nospec.h -index 7150e76b87..0725839e19 100644 ---- a/xen/arch/x86/include/asm/nospec.h -+++ b/xen/arch/x86/include/asm/nospec.h -@@ -38,6 +38,32 @@ static always_inline void block_speculation(void) - barrier_nospec_true(); - } - -+static always_inline void arch_block_lock_speculation(void) -+{ -+ alternative("lfence", "", X86_FEATURE_SC_NO_LOCK_HARDEN); -+} -+ -+/* Allow to insert a read memory barrier into conditionals */ -+static always_inline bool barrier_lock_true(void) -+{ -+ alternative("lfence #nospec-true", "", X86_FEATURE_SC_NO_LOCK_HARDEN); -+ return true; -+} -+ -+static always_inline bool barrier_lock_false(void) -+{ -+ alternative("lfence #nospec-false", "", X86_FEATURE_SC_NO_LOCK_HARDEN); -+ return false; -+} -+ -+static always_inline bool arch_lock_evaluate_nospec(bool condition) -+{ -+ if ( condition ) -+ return barrier_lock_true(); -+ else -+ return barrier_lock_false(); -+} -+ - #endif /* _ASM_X86_NOSPEC_H */ - - /* -diff --git a/xen/arch/x86/spec_ctrl.c b/xen/arch/x86/spec_ctrl.c -index 1ee81e2dfe..ac21af2c5c 100644 ---- a/xen/arch/x86/spec_ctrl.c -+++ b/xen/arch/x86/spec_ctrl.c -@@ -65,6 +65,7 @@ int8_t __read_mostly opt_eager_fpu = -1; - int8_t __read_mostly opt_l1d_flush = -1; - static bool __initdata opt_branch_harden = - IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH); -+static bool __initdata opt_lock_harden; - - bool __initdata bsp_delay_spec_ctrl; - uint8_t __read_mostly default_xen_spec_ctrl; -@@ -133,6 +134,7 @@ static int __init cf_check parse_spec_ctrl(const char *s) - opt_ssbd = false; - opt_l1d_flush = 0; - opt_branch_harden = false; -+ opt_lock_harden = false; - opt_srb_lock = 0; - opt_unpriv_mmio = false; - opt_gds_mit = 0; -@@ -298,6 +300,16 @@ static int __init cf_check parse_spec_ctrl(const char *s) - rc = -EINVAL; - } - } -+ else if ( (val = parse_boolean("lock-harden", s, ss)) >= 0 ) -+ { -+ if ( IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_LOCK) ) -+ opt_lock_harden = val; -+ else -+ { -+ no_config_param("SPECULATIVE_HARDEN_LOCK", "spec-ctrl", s, ss); -+ rc = -EINVAL; -+ } -+ } - else if ( (val = parse_boolean("srb-lock", s, ss)) >= 0 ) - opt_srb_lock = val; - else if ( (val = parse_boolean("unpriv-mmio", s, ss)) >= 0 ) -@@ -500,7 +512,8 @@ static void __init print_details(enum ind_thunk thunk) - if ( IS_ENABLED(CONFIG_INDIRECT_THUNK) || IS_ENABLED(CONFIG_SHADOW_PAGING) || - IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_ARRAY) || - IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_BRANCH) || -- IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS) ) -+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS) || -+ IS_ENABLED(CONFIG_SPECULATIVE_HARDEN_LOCK) ) - printk(" Compiled-in support:" - #ifdef CONFIG_INDIRECT_THUNK - " INDIRECT_THUNK" -@@ -516,11 +529,14 @@ static void __init print_details(enum ind_thunk thunk) - #endif - #ifdef CONFIG_SPECULATIVE_HARDEN_GUEST_ACCESS - " HARDEN_GUEST_ACCESS" -+#endif -+#ifdef CONFIG_SPECULATIVE_HARDEN_LOCK -+ " HARDEN_LOCK" - #endif - "\n"); - - /* Settings for Xen's protection, irrespective of guests. */ -- printk(" Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s\n", -+ printk(" Xen settings: %s%sSPEC_CTRL: %s%s%s%s%s, Other:%s%s%s%s%s%s%s\n", - thunk != THUNK_NONE ? "BTI-Thunk: " : "", - thunk == THUNK_NONE ? "" : - thunk == THUNK_RETPOLINE ? "RETPOLINE, " : -@@ -547,7 +563,8 @@ static void __init print_details(enum ind_thunk thunk) - opt_verw_pv || opt_verw_hvm || - opt_verw_mmio ? " VERW" : "", - opt_div_scrub ? " DIV" : "", -- opt_branch_harden ? " BRANCH_HARDEN" : ""); -+ opt_branch_harden ? " BRANCH_HARDEN" : "", -+ opt_lock_harden ? " LOCK_HARDEN" : ""); - - /* L1TF diagnostics, printed if vulnerable or PV shadowing is in use. */ - if ( cpu_has_bug_l1tf || opt_pv_l1tf_hwdom || opt_pv_l1tf_domu ) -@@ -1930,6 +1947,9 @@ void __init init_speculation_mitigations(void) - if ( !opt_branch_harden ) - setup_force_cpu_cap(X86_FEATURE_SC_NO_BRANCH_HARDEN); - -+ if ( !opt_lock_harden ) -+ setup_force_cpu_cap(X86_FEATURE_SC_NO_LOCK_HARDEN); -+ - /* - * We do not disable HT by default on affected hardware. - * -diff --git a/xen/common/Kconfig b/xen/common/Kconfig -index e7794cb7f6..cd73851538 100644 ---- a/xen/common/Kconfig -+++ b/xen/common/Kconfig -@@ -173,6 +173,23 @@ config SPECULATIVE_HARDEN_GUEST_ACCESS - - If unsure, say Y. - -+config SPECULATIVE_HARDEN_LOCK -+ bool "Speculative lock context hardening" -+ default y -+ depends on X86 -+ help -+ Contemporary processors may use speculative execution as a -+ performance optimisation, but this can potentially be abused by an -+ attacker to leak data via speculative sidechannels. -+ -+ One source of data leakage is via speculative accesses to lock -+ critical regions. -+ -+ This option is disabled by default at run time, and needs to be -+ enabled on the command line. -+ -+ If unsure, say Y. -+ - endmenu - - config DIT_DEFAULT -diff --git a/xen/include/xen/nospec.h b/xen/include/xen/nospec.h -index 76255bc46e..4552846403 100644 ---- a/xen/include/xen/nospec.h -+++ b/xen/include/xen/nospec.h -@@ -70,6 +70,21 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, - #define array_access_nospec(array, index) \ - (array)[array_index_nospec(index, ARRAY_SIZE(array))] - -+static always_inline void block_lock_speculation(void) -+{ -+#ifdef CONFIG_SPECULATIVE_HARDEN_LOCK -+ arch_block_lock_speculation(); -+#endif -+} -+ -+static always_inline bool lock_evaluate_nospec(bool condition) -+{ -+#ifdef CONFIG_SPECULATIVE_HARDEN_LOCK -+ return arch_lock_evaluate_nospec(condition); -+#endif -+ return condition; -+} -+ - #endif /* XEN_NOSPEC_H */ - - /* -diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h -index 961891bea4..daf48fdea7 100644 ---- a/xen/include/xen/spinlock.h -+++ b/xen/include/xen/spinlock.h -@@ -1,6 +1,7 @@ - #ifndef __SPINLOCK_H__ - #define __SPINLOCK_H__ - -+#include <xen/nospec.h> - #include <xen/time.h> - #include <asm/system.h> - #include <asm/spinlock.h> -@@ -189,13 +190,30 @@ int _spin_trylock_recursive(spinlock_t *lock); - void _spin_lock_recursive(spinlock_t *lock); - void _spin_unlock_recursive(spinlock_t *lock); - --#define spin_lock(l) _spin_lock(l) --#define spin_lock_cb(l, c, d) _spin_lock_cb(l, c, d) --#define spin_lock_irq(l) _spin_lock_irq(l) -+static always_inline void spin_lock(spinlock_t *l) -+{ -+ _spin_lock(l); -+ block_lock_speculation(); -+} -+ -+static always_inline void spin_lock_cb(spinlock_t *l, void (*c)(void *data), -+ void *d) -+{ -+ _spin_lock_cb(l, c, d); -+ block_lock_speculation(); -+} -+ -+static always_inline void spin_lock_irq(spinlock_t *l) -+{ -+ _spin_lock_irq(l); -+ block_lock_speculation(); -+} -+ - #define spin_lock_irqsave(l, f) \ - ({ \ - BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long)); \ - ((f) = _spin_lock_irqsave(l)); \ -+ block_lock_speculation(); \ - }) - - #define spin_unlock(l) _spin_unlock(l) -@@ -203,7 +221,7 @@ void _spin_unlock_recursive(spinlock_t *lock); - #define spin_unlock_irqrestore(l, f) _spin_unlock_irqrestore(l, f) - - #define spin_is_locked(l) _spin_is_locked(l) --#define spin_trylock(l) _spin_trylock(l) -+#define spin_trylock(l) lock_evaluate_nospec(_spin_trylock(l)) - - #define spin_trylock_irqsave(lock, flags) \ - ({ \ -@@ -224,8 +242,15 @@ void _spin_unlock_recursive(spinlock_t *lock); - * are any critical regions that cannot form part of such a set, they can use - * standard spin_[un]lock(). - */ --#define spin_trylock_recursive(l) _spin_trylock_recursive(l) --#define spin_lock_recursive(l) _spin_lock_recursive(l) -+#define spin_trylock_recursive(l) \ -+ lock_evaluate_nospec(_spin_trylock_recursive(l)) -+ -+static always_inline void spin_lock_recursive(spinlock_t *l) -+{ -+ _spin_lock_recursive(l); -+ block_lock_speculation(); -+} -+ - #define spin_unlock_recursive(l) _spin_unlock_recursive(l) - - #endif /* __SPINLOCK_H__ */ --- -2.44.0 - diff --git a/0049-rwlock-introduce-support-for-blocking-speculation-in.patch b/0049-rwlock-introduce-support-for-blocking-speculation-in.patch deleted file mode 100644 index 593b588..0000000 --- a/0049-rwlock-introduce-support-for-blocking-speculation-in.patch +++ /dev/null @@ -1,125 +0,0 @@ -From 7454dad6ee15f9fa6d84fc285d366b86f3d47494 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 13 Feb 2024 16:08:52 +0100 -Subject: [PATCH 49/67] rwlock: introduce support for blocking speculation into - critical regions -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Introduce inline wrappers as required and add direct calls to -block_lock_speculation() in order to prevent speculation into the rwlock -protected critical regions. - -Note the rwlock primitives are adjusted to use the non speculation safe variants -of the spinlock handlers, as a speculation barrier is added in the rwlock -calling wrappers. - -trylock variants are protected by using lock_evaluate_nospec(). - -This is part of XSA-453 / CVE-2024-2193 - -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit a1fb15f61692b1fa9945fc51f55471ace49cdd59) ---- - xen/common/rwlock.c | 14 +++++++++++--- - xen/include/xen/rwlock.h | 34 ++++++++++++++++++++++++++++------ - 2 files changed, 39 insertions(+), 9 deletions(-) - -diff --git a/xen/common/rwlock.c b/xen/common/rwlock.c -index aa15529bbe..cda06b9d6e 100644 ---- a/xen/common/rwlock.c -+++ b/xen/common/rwlock.c -@@ -34,8 +34,11 @@ void queue_read_lock_slowpath(rwlock_t *lock) - - /* - * Put the reader into the wait queue. -+ * -+ * Use the speculation unsafe helper, as it's the caller responsibility to -+ * issue a speculation barrier if required. - */ -- spin_lock(&lock->lock); -+ _spin_lock(&lock->lock); - - /* - * At the head of the wait queue now, wait until the writer state -@@ -64,8 +67,13 @@ void queue_write_lock_slowpath(rwlock_t *lock) - { - u32 cnts; - -- /* Put the writer into the wait queue. */ -- spin_lock(&lock->lock); -+ /* -+ * Put the writer into the wait queue. -+ * -+ * Use the speculation unsafe helper, as it's the caller responsibility to -+ * issue a speculation barrier if required. -+ */ -+ _spin_lock(&lock->lock); - - /* Try to acquire the lock directly if no reader is present. */ - if ( !atomic_read(&lock->cnts) && -diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h -index 0cc9167715..fd0458be94 100644 ---- a/xen/include/xen/rwlock.h -+++ b/xen/include/xen/rwlock.h -@@ -247,27 +247,49 @@ static inline int _rw_is_write_locked(rwlock_t *lock) - return (atomic_read(&lock->cnts) & _QW_WMASK) == _QW_LOCKED; - } - --#define read_lock(l) _read_lock(l) --#define read_lock_irq(l) _read_lock_irq(l) -+static always_inline void read_lock(rwlock_t *l) -+{ -+ _read_lock(l); -+ block_lock_speculation(); -+} -+ -+static always_inline void read_lock_irq(rwlock_t *l) -+{ -+ _read_lock_irq(l); -+ block_lock_speculation(); -+} -+ - #define read_lock_irqsave(l, f) \ - ({ \ - BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long)); \ - ((f) = _read_lock_irqsave(l)); \ -+ block_lock_speculation(); \ - }) - - #define read_unlock(l) _read_unlock(l) - #define read_unlock_irq(l) _read_unlock_irq(l) - #define read_unlock_irqrestore(l, f) _read_unlock_irqrestore(l, f) --#define read_trylock(l) _read_trylock(l) -+#define read_trylock(l) lock_evaluate_nospec(_read_trylock(l)) -+ -+static always_inline void write_lock(rwlock_t *l) -+{ -+ _write_lock(l); -+ block_lock_speculation(); -+} -+ -+static always_inline void write_lock_irq(rwlock_t *l) -+{ -+ _write_lock_irq(l); -+ block_lock_speculation(); -+} - --#define write_lock(l) _write_lock(l) --#define write_lock_irq(l) _write_lock_irq(l) - #define write_lock_irqsave(l, f) \ - ({ \ - BUILD_BUG_ON(sizeof(f) != sizeof(unsigned long)); \ - ((f) = _write_lock_irqsave(l)); \ -+ block_lock_speculation(); \ - }) --#define write_trylock(l) _write_trylock(l) -+#define write_trylock(l) lock_evaluate_nospec(_write_trylock(l)) - - #define write_unlock(l) _write_unlock(l) - #define write_unlock_irq(l) _write_unlock_irq(l) --- -2.44.0 - diff --git a/0049-tools-libxs-Fix-CLOEXEC-handling-in-xs_fileno.patch b/0049-tools-libxs-Fix-CLOEXEC-handling-in-xs_fileno.patch new file mode 100644 index 0000000..564cece --- /dev/null +++ b/0049-tools-libxs-Fix-CLOEXEC-handling-in-xs_fileno.patch @@ -0,0 +1,109 @@ +From 26b8ff1861a870e01456b31bf999f25df5538ebf Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Thu, 4 Jul 2024 14:13:30 +0200 +Subject: [PATCH 49/56] tools/libxs: Fix CLOEXEC handling in xs_fileno() + +xs_fileno() opens a pipe on first use to communicate between the watch thread +and the main thread. Nothing ever sets CLOEXEC on the file descriptors. + +Check for the availability of the pipe2() function with configure. Despite +starting life as Linux-only, FreeBSD and NetBSD have gained it. + +When pipe2() isn't available, try our best with pipe() and set_cloexec(). + +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Juergen Gross <jgross@suse.com> +Acked-by: Anthony PERARD <anthony.perard@vates.tech> +master commit: a2ff677852f0ce05fa335e8e5682bf2ae0c916ee +master date: 2024-07-02 10:52:59 +0100 +--- + tools/config.h.in | 3 +++ + tools/configure | 12 ++++++++++++ + tools/configure.ac | 2 ++ + tools/libs/store/xs.c | 16 +++++++++++++++- + 4 files changed, 32 insertions(+), 1 deletion(-) + +diff --git a/tools/config.h.in b/tools/config.h.in +index 0bb2fe08a1..50ad60fcb0 100644 +--- a/tools/config.h.in ++++ b/tools/config.h.in +@@ -39,6 +39,9 @@ + /* Define to 1 if you have the <memory.h> header file. */ + #undef HAVE_MEMORY_H + ++/* Define to 1 if you have the `pipe2' function. */ ++#undef HAVE_PIPE2 ++ + /* pygrub enabled */ + #undef HAVE_PYGRUB + +diff --git a/tools/configure b/tools/configure +index 7bb935d23b..e35112b5c5 100755 +--- a/tools/configure ++++ b/tools/configure +@@ -9751,6 +9751,18 @@ if test "$ax_found" = "0"; then : + fi + + ++for ac_func in pipe2 ++do : ++ ac_fn_c_check_func "$LINENO" "pipe2" "ac_cv_func_pipe2" ++if test "x$ac_cv_func_pipe2" = xyes; then : ++ cat >>confdefs.h <<_ACEOF ++#define HAVE_PIPE2 1 ++_ACEOF ++ ++fi ++done ++ ++ + cat >confcache <<\_ACEOF + # This file is a shell script that caches the results of configure + # tests run on this system so they can be shared between configure +diff --git a/tools/configure.ac b/tools/configure.ac +index 618ef8c63f..53ac20af1e 100644 +--- a/tools/configure.ac ++++ b/tools/configure.ac +@@ -543,4 +543,6 @@ AS_IF([test "x$pvshim" = "xy"], [ + + AX_FIND_HEADER([INCLUDE_ENDIAN_H], [endian.h sys/endian.h]) + ++AC_CHECK_FUNCS([pipe2]) ++ + AC_OUTPUT() +diff --git a/tools/libs/store/xs.c b/tools/libs/store/xs.c +index 11a766c508..c8845b69e2 100644 +--- a/tools/libs/store/xs.c ++++ b/tools/libs/store/xs.c +@@ -190,13 +190,27 @@ static bool set_cloexec(int fd) + return fcntl(fd, flags | FD_CLOEXEC) >= 0; + } + ++static int pipe_cloexec(int fds[2]) ++{ ++#if HAVE_PIPE2 ++ return pipe2(fds, O_CLOEXEC); ++#else ++ if (pipe(fds) < 0) ++ return -1; ++ /* Best effort to set CLOEXEC. Racy. */ ++ set_cloexec(fds[0]); ++ set_cloexec(fds[1]); ++ return 0; ++#endif ++} ++ + int xs_fileno(struct xs_handle *h) + { + char c = 0; + + mutex_lock(&h->watch_mutex); + +- if ((h->watch_pipe[0] == -1) && (pipe(h->watch_pipe) != -1)) { ++ if ((h->watch_pipe[0] == -1) && (pipe_cloexec(h->watch_pipe) != -1)) { + /* Kick things off if the watch list is already non-empty. */ + if (!XEN_TAILQ_EMPTY(&h->watch_list)) + while (write(h->watch_pipe[1], &c, 1) != 1) +-- +2.45.2 + diff --git a/0050-cmdline-document-and-enforce-extra_guest_irqs-upper-.patch b/0050-cmdline-document-and-enforce-extra_guest_irqs-upper-.patch new file mode 100644 index 0000000..f7f61e8 --- /dev/null +++ b/0050-cmdline-document-and-enforce-extra_guest_irqs-upper-.patch @@ -0,0 +1,156 @@ +From 30c695ddaf067cbe7a98037474e7910109238807 Mon Sep 17 00:00:00 2001 +From: Jan Beulich <jbeulich@suse.com> +Date: Thu, 4 Jul 2024 14:14:16 +0200 +Subject: [PATCH 50/56] cmdline: document and enforce "extra_guest_irqs" upper + bounds +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +PHYSDEVOP_pirq_eoi_gmfn_v<N> accepting just a single GFN implies that no +more than 32k pIRQ-s can be used by a domain on x86. Document this upper +bound. + +To also enforce the limit, (ab)use both arch_hwdom_irqs() (changing its +parameter type) and setup_system_domains(). This is primarily to avoid +exposing the two static variables or introducing yet further arch hooks. + +While touching arch_hwdom_irqs() also mark it hwdom-init. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Acked-by: Roger Pau Monné <roger.pau@citrix.com> + +amend 'cmdline: document and enforce "extra_guest_irqs" upper bounds' + +Address late review comments for what is now commit 17f6d398f765: +- bound max_irqs right away against nr_irqs +- introduce a #define for a constant used twice + +Requested-by: Roger Pau Monné <roger.pau@citrix.com> +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> +master commit: 17f6d398f76597f8009ec0530842fb8705ece7ba +master date: 2024-07-02 12:00:27 +0200 +master commit: 1f56accba33ffea0abf7d1c6384710823d10cbd6 +master date: 2024-07-03 14:03:27 +0200 +--- + docs/misc/xen-command-line.pandoc | 3 ++- + xen/arch/x86/io_apic.c | 17 ++++++++++------- + xen/common/domain.c | 24 ++++++++++++++++++++++-- + xen/include/xen/irq.h | 3 ++- + 4 files changed, 36 insertions(+), 11 deletions(-) + +diff --git a/docs/misc/xen-command-line.pandoc b/docs/misc/xen-command-line.pandoc +index 10a09bbf23..d857bd0f89 100644 +--- a/docs/misc/xen-command-line.pandoc ++++ b/docs/misc/xen-command-line.pandoc +@@ -1175,7 +1175,8 @@ common for all domUs, while the optional second number (preceded by a comma) + is for dom0. Changing the setting for domU has no impact on dom0 and vice + versa. For example to change dom0 without changing domU, use + `extra_guest_irqs=,512`. The default value for Dom0 and an eventual separate +-hardware domain is architecture dependent. ++hardware domain is architecture dependent. The upper limit for both values on ++x86 is such that the resulting total number of IRQs can't be higher than 32768. + Note that specifying zero as domU value means zero, while for dom0 it means + to use the default. + +diff --git a/xen/arch/x86/io_apic.c b/xen/arch/x86/io_apic.c +index c5342789e8..f7591fd091 100644 +--- a/xen/arch/x86/io_apic.c ++++ b/xen/arch/x86/io_apic.c +@@ -2664,18 +2664,21 @@ void __init ioapic_init(void) + nr_irqs_gsi, nr_irqs - nr_irqs_gsi); + } + +-unsigned int arch_hwdom_irqs(domid_t domid) ++unsigned int __hwdom_init arch_hwdom_irqs(const struct domain *d) + { + unsigned int n = fls(num_present_cpus()); ++ /* Bounding by the domain pirq EOI bitmap capacity. */ ++ const unsigned int max_irqs = min_t(unsigned int, nr_irqs, ++ PAGE_SIZE * BITS_PER_BYTE); + +- if ( !domid ) +- n = min(n, dom0_max_vcpus()); +- n = min(nr_irqs_gsi + n * NR_DYNAMIC_VECTORS, nr_irqs); ++ if ( is_system_domain(d) ) ++ return max_irqs; + +- /* Bounded by the domain pirq eoi bitmap gfn. */ +- n = min_t(unsigned int, n, PAGE_SIZE * BITS_PER_BYTE); ++ if ( !d->domain_id ) ++ n = min(n, dom0_max_vcpus()); ++ n = min(nr_irqs_gsi + n * NR_DYNAMIC_VECTORS, max_irqs); + +- printk("Dom%d has maximum %u PIRQs\n", domid, n); ++ printk("%pd has maximum %u PIRQs\n", d, n); + + return n; + } +diff --git a/xen/common/domain.c b/xen/common/domain.c +index 003f4ab125..62832a5860 100644 +--- a/xen/common/domain.c ++++ b/xen/common/domain.c +@@ -351,7 +351,8 @@ static int late_hwdom_init(struct domain *d) + } + + static unsigned int __read_mostly extra_hwdom_irqs; +-static unsigned int __read_mostly extra_domU_irqs = 32; ++#define DEFAULT_EXTRA_DOMU_IRQS 32U ++static unsigned int __read_mostly extra_domU_irqs = DEFAULT_EXTRA_DOMU_IRQS; + + static int __init cf_check parse_extra_guest_irqs(const char *s) + { +@@ -688,7 +689,7 @@ struct domain *domain_create(domid_t domid, + d->nr_pirqs = nr_static_irqs + extra_domU_irqs; + else + d->nr_pirqs = extra_hwdom_irqs ? nr_static_irqs + extra_hwdom_irqs +- : arch_hwdom_irqs(domid); ++ : arch_hwdom_irqs(d); + d->nr_pirqs = min(d->nr_pirqs, nr_irqs); + + radix_tree_init(&d->pirq_tree); +@@ -812,6 +813,25 @@ void __init setup_system_domains(void) + if ( IS_ERR(dom_xen) ) + panic("Failed to create d[XEN]: %ld\n", PTR_ERR(dom_xen)); + ++#ifdef CONFIG_HAS_PIRQ ++ /* Bound-check values passed via "extra_guest_irqs=". */ ++ { ++ unsigned int n = max(arch_hwdom_irqs(dom_xen), nr_static_irqs); ++ ++ if ( extra_hwdom_irqs > n - nr_static_irqs ) ++ { ++ extra_hwdom_irqs = n - nr_static_irqs; ++ printk(XENLOG_WARNING "hwdom IRQs bounded to %u\n", n); ++ } ++ if ( extra_domU_irqs > ++ max(DEFAULT_EXTRA_DOMU_IRQS, n - nr_static_irqs) ) ++ { ++ extra_domU_irqs = n - nr_static_irqs; ++ printk(XENLOG_WARNING "domU IRQs bounded to %u\n", n); ++ } ++ } ++#endif ++ + /* + * Initialise our DOMID_IO domain. + * This domain owns I/O pages that are within the range of the page_info +diff --git a/xen/include/xen/irq.h b/xen/include/xen/irq.h +index 5dcd2d8f0c..bef170bcb6 100644 +--- a/xen/include/xen/irq.h ++++ b/xen/include/xen/irq.h +@@ -196,8 +196,9 @@ extern struct irq_desc *pirq_spin_lock_irq_desc( + + unsigned int set_desc_affinity(struct irq_desc *desc, const cpumask_t *mask); + ++/* When passed a system domain, this returns the maximum permissible value. */ + #ifndef arch_hwdom_irqs +-unsigned int arch_hwdom_irqs(domid_t domid); ++unsigned int arch_hwdom_irqs(const struct domain *d); + #endif + + #ifndef arch_evtchn_bind_pirq +-- +2.45.2 + diff --git a/0050-percpu-rwlock-introduce-support-for-blocking-specula.patch b/0050-percpu-rwlock-introduce-support-for-blocking-specula.patch deleted file mode 100644 index 1da2128..0000000 --- a/0050-percpu-rwlock-introduce-support-for-blocking-specula.patch +++ /dev/null @@ -1,87 +0,0 @@ -From 468a368b2e5a38fc0be8e9e5f475820f7e4a6b4f Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Tue, 13 Feb 2024 17:57:38 +0100 -Subject: [PATCH 50/67] percpu-rwlock: introduce support for blocking - speculation into critical regions -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Add direct calls to block_lock_speculation() where required in order to prevent -speculation into the lock protected critical regions. Also convert -_percpu_read_lock() from inline to always_inline. - -Note that _percpu_write_lock() has been modified the use the non speculation -safe of the locking primites, as a speculation is added unconditionally by the -calling wrapper. - -This is part of XSA-453 / CVE-2024-2193 - -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit f218daf6d3a3b847736d37c6a6b76031a0d08441) ---- - xen/common/rwlock.c | 6 +++++- - xen/include/xen/rwlock.h | 14 ++++++++++---- - 2 files changed, 15 insertions(+), 5 deletions(-) - -diff --git a/xen/common/rwlock.c b/xen/common/rwlock.c -index cda06b9d6e..4da0ed8fad 100644 ---- a/xen/common/rwlock.c -+++ b/xen/common/rwlock.c -@@ -125,8 +125,12 @@ void _percpu_write_lock(percpu_rwlock_t **per_cpudata, - /* - * First take the write lock to protect against other writers or slow - * path readers. -+ * -+ * Note we use the speculation unsafe variant of write_lock(), as the -+ * calling wrapper already adds a speculation barrier after the lock has -+ * been taken. - */ -- write_lock(&percpu_rwlock->rwlock); -+ _write_lock(&percpu_rwlock->rwlock); - - /* Now set the global variable so that readers start using read_lock. */ - percpu_rwlock->writer_activating = 1; -diff --git a/xen/include/xen/rwlock.h b/xen/include/xen/rwlock.h -index fd0458be94..abe0804bf7 100644 ---- a/xen/include/xen/rwlock.h -+++ b/xen/include/xen/rwlock.h -@@ -326,8 +326,8 @@ static inline void _percpu_rwlock_owner_check(percpu_rwlock_t **per_cpudata, - #define percpu_rwlock_resource_init(l, owner) \ - (*(l) = (percpu_rwlock_t)PERCPU_RW_LOCK_UNLOCKED(&get_per_cpu_var(owner))) - --static inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata, -- percpu_rwlock_t *percpu_rwlock) -+static always_inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata, -+ percpu_rwlock_t *percpu_rwlock) - { - /* Validate the correct per_cpudata variable has been provided. */ - _percpu_rwlock_owner_check(per_cpudata, percpu_rwlock); -@@ -362,6 +362,8 @@ static inline void _percpu_read_lock(percpu_rwlock_t **per_cpudata, - } - else - { -+ /* Other branch already has a speculation barrier in read_lock(). */ -+ block_lock_speculation(); - /* All other paths have implicit check_lock() calls via read_lock(). */ - check_lock(&percpu_rwlock->rwlock.lock.debug, false); - } -@@ -410,8 +412,12 @@ static inline void _percpu_write_unlock(percpu_rwlock_t **per_cpudata, - _percpu_read_lock(&get_per_cpu_var(percpu), lock) - #define percpu_read_unlock(percpu, lock) \ - _percpu_read_unlock(&get_per_cpu_var(percpu), lock) --#define percpu_write_lock(percpu, lock) \ -- _percpu_write_lock(&get_per_cpu_var(percpu), lock) -+ -+#define percpu_write_lock(percpu, lock) \ -+({ \ -+ _percpu_write_lock(&get_per_cpu_var(percpu), lock); \ -+ block_lock_speculation(); \ -+}) - #define percpu_write_unlock(percpu, lock) \ - _percpu_write_unlock(&get_per_cpu_var(percpu), lock) - --- -2.44.0 - diff --git a/0051-locking-attempt-to-ensure-lock-wrappers-are-always-i.patch b/0051-locking-attempt-to-ensure-lock-wrappers-are-always-i.patch deleted file mode 100644 index 822836d..0000000 --- a/0051-locking-attempt-to-ensure-lock-wrappers-are-always-i.patch +++ /dev/null @@ -1,405 +0,0 @@ -From 2cc5e57be680a516aa5cdef4281856d09b9d0ea6 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Mon, 4 Mar 2024 14:29:36 +0100 -Subject: [PATCH 51/67] locking: attempt to ensure lock wrappers are always - inline -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -In order to prevent the locking speculation barriers from being inside of -`call`ed functions that could be speculatively bypassed. - -While there also add an extra locking barrier to _mm_write_lock() in the branch -taken when the lock is already held. - -Note some functions are switched to use the unsafe variants (without speculation -barrier) of the locking primitives, but a speculation barrier is always added -to the exposed public lock wrapping helper. That's the case with -sched_spin_lock_double() or pcidevs_lock() for example. - -This is part of XSA-453 / CVE-2024-2193 - -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit 197ecd838a2aaf959a469df3696d4559c4f8b762) ---- - xen/arch/x86/hvm/vpt.c | 10 +++++++--- - xen/arch/x86/include/asm/irq.h | 1 + - xen/arch/x86/mm/mm-locks.h | 28 +++++++++++++++------------- - xen/arch/x86/mm/p2m-pod.c | 2 +- - xen/common/event_channel.c | 5 +++-- - xen/common/grant_table.c | 6 +++--- - xen/common/sched/core.c | 19 ++++++++++++------- - xen/common/sched/private.h | 26 ++++++++++++++++++++++++-- - xen/common/timer.c | 8 +++++--- - xen/drivers/passthrough/pci.c | 5 +++-- - xen/include/xen/event.h | 4 ++-- - xen/include/xen/pci.h | 8 ++++++-- - 12 files changed, 82 insertions(+), 40 deletions(-) - -diff --git a/xen/arch/x86/hvm/vpt.c b/xen/arch/x86/hvm/vpt.c -index cb1d81bf9e..66f1095245 100644 ---- a/xen/arch/x86/hvm/vpt.c -+++ b/xen/arch/x86/hvm/vpt.c -@@ -161,7 +161,7 @@ static int pt_irq_masked(struct periodic_time *pt) - * pt->vcpu field, because another thread holding the pt_migrate lock - * may already be spinning waiting for your vcpu lock. - */ --static void pt_vcpu_lock(struct vcpu *v) -+static always_inline void pt_vcpu_lock(struct vcpu *v) - { - spin_lock(&v->arch.hvm.tm_lock); - } -@@ -180,9 +180,13 @@ static void pt_vcpu_unlock(struct vcpu *v) - * need to take an additional lock that protects against pt->vcpu - * changing. - */ --static void pt_lock(struct periodic_time *pt) -+static always_inline void pt_lock(struct periodic_time *pt) - { -- read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate); -+ /* -+ * Use the speculation unsafe variant for the first lock, as the following -+ * lock taking helper already includes a speculation barrier. -+ */ -+ _read_lock(&pt->vcpu->domain->arch.hvm.pl_time->pt_migrate); - spin_lock(&pt->vcpu->arch.hvm.tm_lock); - } - -diff --git a/xen/arch/x86/include/asm/irq.h b/xen/arch/x86/include/asm/irq.h -index f6a0207a80..823d627fd0 100644 ---- a/xen/arch/x86/include/asm/irq.h -+++ b/xen/arch/x86/include/asm/irq.h -@@ -178,6 +178,7 @@ void cf_check irq_complete_move(struct irq_desc *); - - extern struct irq_desc *irq_desc; - -+/* Not speculation safe, only used for AP bringup. */ - void lock_vector_lock(void); - void unlock_vector_lock(void); - -diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h -index c1523aeccf..265239c49f 100644 ---- a/xen/arch/x86/mm/mm-locks.h -+++ b/xen/arch/x86/mm/mm-locks.h -@@ -86,8 +86,8 @@ static inline void _set_lock_level(int l) - this_cpu(mm_lock_level) = l; - } - --static inline void _mm_lock(const struct domain *d, mm_lock_t *l, -- const char *func, int level, int rec) -+static always_inline void _mm_lock(const struct domain *d, mm_lock_t *l, -+ const char *func, int level, int rec) - { - if ( !((mm_locked_by_me(l)) && rec) ) - _check_lock_level(d, level); -@@ -137,8 +137,8 @@ static inline int mm_write_locked_by_me(mm_rwlock_t *l) - return (l->locker == get_processor_id()); - } - --static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l, -- const char *func, int level) -+static always_inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l, -+ const char *func, int level) - { - if ( !mm_write_locked_by_me(l) ) - { -@@ -149,6 +149,8 @@ static inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l, - l->unlock_level = _get_lock_level(); - _set_lock_level(_lock_level(d, level)); - } -+ else -+ block_speculation(); - l->recurse_count++; - } - -@@ -162,8 +164,8 @@ static inline void mm_write_unlock(mm_rwlock_t *l) - percpu_write_unlock(p2m_percpu_rwlock, &l->lock); - } - --static inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l, -- int level) -+static always_inline void _mm_read_lock(const struct domain *d, mm_rwlock_t *l, -+ int level) - { - _check_lock_level(d, level); - percpu_read_lock(p2m_percpu_rwlock, &l->lock); -@@ -178,15 +180,15 @@ static inline void mm_read_unlock(mm_rwlock_t *l) - - /* This wrapper uses the line number to express the locking order below */ - #define declare_mm_lock(name) \ -- static inline void mm_lock_##name(const struct domain *d, mm_lock_t *l, \ -- const char *func, int rec) \ -+ static always_inline void mm_lock_##name( \ -+ const struct domain *d, mm_lock_t *l, const char *func, int rec) \ - { _mm_lock(d, l, func, MM_LOCK_ORDER_##name, rec); } - #define declare_mm_rwlock(name) \ -- static inline void mm_write_lock_##name(const struct domain *d, \ -- mm_rwlock_t *l, const char *func) \ -+ static always_inline void mm_write_lock_##name( \ -+ const struct domain *d, mm_rwlock_t *l, const char *func) \ - { _mm_write_lock(d, l, func, MM_LOCK_ORDER_##name); } \ -- static inline void mm_read_lock_##name(const struct domain *d, \ -- mm_rwlock_t *l) \ -+ static always_inline void mm_read_lock_##name(const struct domain *d, \ -+ mm_rwlock_t *l) \ - { _mm_read_lock(d, l, MM_LOCK_ORDER_##name); } - /* These capture the name of the calling function */ - #define mm_lock(name, d, l) mm_lock_##name(d, l, __func__, 0) -@@ -321,7 +323,7 @@ declare_mm_lock(altp2mlist) - #define MM_LOCK_ORDER_altp2m 40 - declare_mm_rwlock(altp2m); - --static inline void p2m_lock(struct p2m_domain *p) -+static always_inline void p2m_lock(struct p2m_domain *p) - { - if ( p2m_is_altp2m(p) ) - mm_write_lock(altp2m, p->domain, &p->lock); -diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c -index fc110506dc..99dbcb3101 100644 ---- a/xen/arch/x86/mm/p2m-pod.c -+++ b/xen/arch/x86/mm/p2m-pod.c -@@ -36,7 +36,7 @@ - #define superpage_aligned(_x) (((_x)&(SUPERPAGE_PAGES-1))==0) - - /* Enforce lock ordering when grabbing the "external" page_alloc lock */ --static inline void lock_page_alloc(struct p2m_domain *p2m) -+static always_inline void lock_page_alloc(struct p2m_domain *p2m) - { - page_alloc_mm_pre_lock(p2m->domain); - spin_lock(&(p2m->domain->page_alloc_lock)); -diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c -index f5e0b12d15..dada9f15f5 100644 ---- a/xen/common/event_channel.c -+++ b/xen/common/event_channel.c -@@ -62,7 +62,7 @@ - * just assume the event channel is free or unbound at the moment when the - * evtchn_read_trylock() returns false. - */ --static inline void evtchn_write_lock(struct evtchn *evtchn) -+static always_inline void evtchn_write_lock(struct evtchn *evtchn) - { - write_lock(&evtchn->lock); - -@@ -364,7 +364,8 @@ int evtchn_alloc_unbound(evtchn_alloc_unbound_t *alloc, evtchn_port_t port) - return rc; - } - --static void double_evtchn_lock(struct evtchn *lchn, struct evtchn *rchn) -+static always_inline void double_evtchn_lock(struct evtchn *lchn, -+ struct evtchn *rchn) - { - ASSERT(lchn != rchn); - -diff --git a/xen/common/grant_table.c b/xen/common/grant_table.c -index ee7cc496b8..62a8685cd5 100644 ---- a/xen/common/grant_table.c -+++ b/xen/common/grant_table.c -@@ -410,7 +410,7 @@ static inline void act_set_gfn(struct active_grant_entry *act, gfn_t gfn) - - static DEFINE_PERCPU_RWLOCK_GLOBAL(grant_rwlock); - --static inline void grant_read_lock(struct grant_table *gt) -+static always_inline void grant_read_lock(struct grant_table *gt) - { - percpu_read_lock(grant_rwlock, >->lock); - } -@@ -420,7 +420,7 @@ static inline void grant_read_unlock(struct grant_table *gt) - percpu_read_unlock(grant_rwlock, >->lock); - } - --static inline void grant_write_lock(struct grant_table *gt) -+static always_inline void grant_write_lock(struct grant_table *gt) - { - percpu_write_lock(grant_rwlock, >->lock); - } -@@ -457,7 +457,7 @@ nr_active_grant_frames(struct grant_table *gt) - return num_act_frames_from_sha_frames(nr_grant_frames(gt)); - } - --static inline struct active_grant_entry * -+static always_inline struct active_grant_entry * - active_entry_acquire(struct grant_table *t, grant_ref_t e) - { - struct active_grant_entry *act; -diff --git a/xen/common/sched/core.c b/xen/common/sched/core.c -index 078beb1adb..29bbab5ac6 100644 ---- a/xen/common/sched/core.c -+++ b/xen/common/sched/core.c -@@ -348,23 +348,28 @@ uint64_t get_cpu_idle_time(unsigned int cpu) - * This avoids dead- or live-locks when this code is running on both - * cpus at the same time. - */ --static void sched_spin_lock_double(spinlock_t *lock1, spinlock_t *lock2, -- unsigned long *flags) -+static always_inline void sched_spin_lock_double( -+ spinlock_t *lock1, spinlock_t *lock2, unsigned long *flags) - { -+ /* -+ * In order to avoid extra overhead, use the locking primitives without the -+ * speculation barrier, and introduce a single barrier here. -+ */ - if ( lock1 == lock2 ) - { -- spin_lock_irqsave(lock1, *flags); -+ *flags = _spin_lock_irqsave(lock1); - } - else if ( lock1 < lock2 ) - { -- spin_lock_irqsave(lock1, *flags); -- spin_lock(lock2); -+ *flags = _spin_lock_irqsave(lock1); -+ _spin_lock(lock2); - } - else - { -- spin_lock_irqsave(lock2, *flags); -- spin_lock(lock1); -+ *flags = _spin_lock_irqsave(lock2); -+ _spin_lock(lock1); - } -+ block_lock_speculation(); - } - - static void sched_spin_unlock_double(spinlock_t *lock1, spinlock_t *lock2, -diff --git a/xen/common/sched/private.h b/xen/common/sched/private.h -index 0527a8c70d..24a93dd0c1 100644 ---- a/xen/common/sched/private.h -+++ b/xen/common/sched/private.h -@@ -207,8 +207,24 @@ DECLARE_PER_CPU(cpumask_t, cpumask_scratch); - #define cpumask_scratch (&this_cpu(cpumask_scratch)) - #define cpumask_scratch_cpu(c) (&per_cpu(cpumask_scratch, c)) - -+/* -+ * Deal with _spin_lock_irqsave() returning the flags value instead of storing -+ * it in a passed parameter. -+ */ -+#define _sched_spinlock0(lock, irq) _spin_lock##irq(lock) -+#define _sched_spinlock1(lock, irq, arg) ({ \ -+ BUILD_BUG_ON(sizeof(arg) != sizeof(unsigned long)); \ -+ (arg) = _spin_lock##irq(lock); \ -+}) -+ -+#define _sched_spinlock__(nr) _sched_spinlock ## nr -+#define _sched_spinlock_(nr) _sched_spinlock__(nr) -+#define _sched_spinlock(lock, irq, args...) \ -+ _sched_spinlock_(count_args(args))(lock, irq, ## args) -+ - #define sched_lock(kind, param, cpu, irq, arg...) \ --static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \ -+static always_inline spinlock_t \ -+*kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \ - { \ - for ( ; ; ) \ - { \ -@@ -220,10 +236,16 @@ static inline spinlock_t *kind##_schedule_lock##irq(param EXTRA_TYPE(arg)) \ - * \ - * It may also be the case that v->processor may change but the \ - * lock may be the same; this will succeed in that case. \ -+ * \ -+ * Use the speculation unsafe locking helper, there's a speculation \ -+ * barrier before returning to the caller. \ - */ \ -- spin_lock##irq(lock, ## arg); \ -+ _sched_spinlock(lock, irq, ## arg); \ - if ( likely(lock == get_sched_res(cpu)->schedule_lock) ) \ -+ { \ -+ block_lock_speculation(); \ - return lock; \ -+ } \ - spin_unlock##irq(lock, ## arg); \ - } \ - } -diff --git a/xen/common/timer.c b/xen/common/timer.c -index 9b5016d5ed..459668d417 100644 ---- a/xen/common/timer.c -+++ b/xen/common/timer.c -@@ -240,7 +240,7 @@ static inline void deactivate_timer(struct timer *timer) - list_add(&timer->inactive, &per_cpu(timers, timer->cpu).inactive); - } - --static inline bool_t timer_lock(struct timer *timer) -+static inline bool_t timer_lock_unsafe(struct timer *timer) - { - unsigned int cpu; - -@@ -254,7 +254,8 @@ static inline bool_t timer_lock(struct timer *timer) - rcu_read_unlock(&timer_cpu_read_lock); - return 0; - } -- spin_lock(&per_cpu(timers, cpu).lock); -+ /* Use the speculation unsafe variant, the wrapper has the barrier. */ -+ _spin_lock(&per_cpu(timers, cpu).lock); - if ( likely(timer->cpu == cpu) ) - break; - spin_unlock(&per_cpu(timers, cpu).lock); -@@ -267,8 +268,9 @@ static inline bool_t timer_lock(struct timer *timer) - #define timer_lock_irqsave(t, flags) ({ \ - bool_t __x; \ - local_irq_save(flags); \ -- if ( !(__x = timer_lock(t)) ) \ -+ if ( !(__x = timer_lock_unsafe(t)) ) \ - local_irq_restore(flags); \ -+ block_lock_speculation(); \ - __x; \ - }) - -diff --git a/xen/drivers/passthrough/pci.c b/xen/drivers/passthrough/pci.c -index 8c62b14d19..1b3d285166 100644 ---- a/xen/drivers/passthrough/pci.c -+++ b/xen/drivers/passthrough/pci.c -@@ -52,9 +52,10 @@ struct pci_seg { - - static spinlock_t _pcidevs_lock = SPIN_LOCK_UNLOCKED; - --void pcidevs_lock(void) -+/* Do not use, as it has no speculation barrier, use pcidevs_lock() instead. */ -+void pcidevs_lock_unsafe(void) - { -- spin_lock_recursive(&_pcidevs_lock); -+ _spin_lock_recursive(&_pcidevs_lock); - } - - void pcidevs_unlock(void) -diff --git a/xen/include/xen/event.h b/xen/include/xen/event.h -index 8eae9984a9..dd96e84c69 100644 ---- a/xen/include/xen/event.h -+++ b/xen/include/xen/event.h -@@ -114,12 +114,12 @@ void notify_via_xen_event_channel(struct domain *ld, int lport); - #define bucket_from_port(d, p) \ - ((group_from_port(d, p))[((p) % EVTCHNS_PER_GROUP) / EVTCHNS_PER_BUCKET]) - --static inline void evtchn_read_lock(struct evtchn *evtchn) -+static always_inline void evtchn_read_lock(struct evtchn *evtchn) - { - read_lock(&evtchn->lock); - } - --static inline bool evtchn_read_trylock(struct evtchn *evtchn) -+static always_inline bool evtchn_read_trylock(struct evtchn *evtchn) - { - return read_trylock(&evtchn->lock); - } -diff --git a/xen/include/xen/pci.h b/xen/include/xen/pci.h -index 5975ca2f30..b373f139d1 100644 ---- a/xen/include/xen/pci.h -+++ b/xen/include/xen/pci.h -@@ -155,8 +155,12 @@ struct pci_dev { - * devices, it also sync the access to the msi capability that is not - * interrupt handling related (the mask bit register). - */ -- --void pcidevs_lock(void); -+void pcidevs_lock_unsafe(void); -+static always_inline void pcidevs_lock(void) -+{ -+ pcidevs_lock_unsafe(); -+ block_lock_speculation(); -+} - void pcidevs_unlock(void); - bool_t __must_check pcidevs_locked(void); - --- -2.44.0 - diff --git a/0051-x86-entry-don-t-clear-DF-when-raising-UD-for-lack-of.patch b/0051-x86-entry-don-t-clear-DF-when-raising-UD-for-lack-of.patch new file mode 100644 index 0000000..acefc8e --- /dev/null +++ b/0051-x86-entry-don-t-clear-DF-when-raising-UD-for-lack-of.patch @@ -0,0 +1,58 @@ +From 7e636b8a16412d4f0d94b2b24d7ebcd2c749afff Mon Sep 17 00:00:00 2001 +From: Jan Beulich <jbeulich@suse.com> +Date: Thu, 4 Jul 2024 14:14:49 +0200 +Subject: [PATCH 51/56] x86/entry: don't clear DF when raising #UD for lack of + syscall handler + +While doing so is intentional when invoking the actual callback, to +mimic a hard-coded SYCALL_MASK / FMASK MSR, the same should not be done +when no handler is available and hence #UD is raised. + +Fixes: ca6fcf4321b3 ("x86/pv: Inject #UD for missing SYSCALL callbacks") +Reported-by: Andrew Cooper <andrew.cooper3@citrix.com> +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> +master commit: d2fe9ab3048d503869ec81bc49db07e55a4a2386 +master date: 2024-07-02 12:01:21 +0200 +--- + xen/arch/x86/x86_64/entry.S | 12 +++++++++++- + 1 file changed, 11 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/x86_64/entry.S b/xen/arch/x86/x86_64/entry.S +index 054fcb225f..d3def49ea3 100644 +--- a/xen/arch/x86/x86_64/entry.S ++++ b/xen/arch/x86/x86_64/entry.S +@@ -38,6 +38,14 @@ switch_to_kernel: + setc %cl + leal (,%rcx,TBF_INTERRUPT),%ecx + ++ /* ++ * The PV ABI hardcodes the (guest-inaccessible and virtual) ++ * SYSCALL_MASK MSR such that DF (and nothing else) would be cleared. ++ * Note that the equivalent of IF (VGCF_syscall_disables_events) is ++ * dealt with separately above. ++ */ ++ mov $~X86_EFLAGS_DF, %esi ++ + test %rax, %rax + UNLIKELY_START(z, syscall_no_callback) /* TB_eip == 0 => #UD */ + mov VCPU_trap_ctxt(%rbx), %rdi +@@ -47,12 +55,14 @@ UNLIKELY_START(z, syscall_no_callback) /* TB_eip == 0 => #UD */ + testb $4, X86_EXC_UD * TRAPINFO_sizeof + TRAPINFO_flags(%rdi) + setnz %cl + lea TBF_EXCEPTION(, %rcx, TBF_INTERRUPT), %ecx ++ or $~0, %esi /* Don't clear DF */ + UNLIKELY_END(syscall_no_callback) + + movq %rax,TRAPBOUNCE_eip(%rdx) + movb %cl,TRAPBOUNCE_flags(%rdx) + call create_bounce_frame +- andl $~X86_EFLAGS_DF,UREGS_eflags(%rsp) ++ /* Conditionally clear DF */ ++ and %esi, UREGS_eflags(%rsp) + /* %rbx: struct vcpu */ + test_all_events: + ASSERT_NOT_IN_ATOMIC +-- +2.45.2 + diff --git a/0052-evtchn-build-fix-for-Arm.patch b/0052-evtchn-build-fix-for-Arm.patch new file mode 100644 index 0000000..6cbeb10 --- /dev/null +++ b/0052-evtchn-build-fix-for-Arm.patch @@ -0,0 +1,43 @@ +From 45c5333935628e7c80de0bd5a9d9eff50b305b16 Mon Sep 17 00:00:00 2001 +From: Jan Beulich <jbeulich@suse.com> +Date: Thu, 4 Jul 2024 16:57:29 +0200 +Subject: [PATCH 52/56] evtchn: build fix for Arm + +When backporting daa90dfea917 ("pirq_cleanup_check() leaks") I neglected +to pay attention to it depending on 13a7b0f9f747 ("restrict concept of +pIRQ to x86"). That one doesn't want backporting imo, so use / adjust +custom #ifdef-ary to address the immediate issue of pirq_cleanup_check() +not being available on Arm. + +Signed-off-by: Jan Beulich <jbeulich@suse.com> +--- + xen/common/event_channel.c | 4 +++- + 1 file changed, 3 insertions(+), 1 deletion(-) + +diff --git a/xen/common/event_channel.c b/xen/common/event_channel.c +index b1a6215c37..e6ec556603 100644 +--- a/xen/common/event_channel.c ++++ b/xen/common/event_channel.c +@@ -643,7 +643,9 @@ static int evtchn_bind_pirq(evtchn_bind_pirq_t *bind) + if ( rc != 0 ) + { + info->evtchn = 0; ++#ifdef CONFIG_X86 + pirq_cleanup_check(info, d); ++#endif + goto out; + } + +@@ -713,8 +715,8 @@ int evtchn_close(struct domain *d1, int port1, bool guest) + * The successful path of unmap_domain_pirq_emuirq() will have + * called pirq_cleanup_check() already. + */ +-#endif + pirq_cleanup_check(pirq, d1); ++#endif + } + unlink_pirq_port(chn1, d1->vcpu[chn1->notify_vcpu_id]); + break; +-- +2.45.2 + diff --git a/0052-x86-mm-add-speculation-barriers-to-open-coded-locks.patch b/0052-x86-mm-add-speculation-barriers-to-open-coded-locks.patch deleted file mode 100644 index 9e20f78..0000000 --- a/0052-x86-mm-add-speculation-barriers-to-open-coded-locks.patch +++ /dev/null @@ -1,73 +0,0 @@ -From 074b4c8987db235a0b86798810c045f68e4775b6 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Mon, 4 Mar 2024 18:08:48 +0100 -Subject: [PATCH 52/67] x86/mm: add speculation barriers to open coded locks -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Add a speculation barrier to the clearly identified open-coded lock taking -functions. - -Note that the memory sharing page_lock() replacement (_page_lock()) is left -as-is, as the code is experimental and not security supported. - -This is part of XSA-453 / CVE-2024-2193 - -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit 42a572a38e22a97d86a4b648a22597628d5b42e4) ---- - xen/arch/x86/include/asm/mm.h | 4 +++- - xen/arch/x86/mm.c | 6 ++++-- - 2 files changed, 7 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/include/asm/mm.h b/xen/arch/x86/include/asm/mm.h -index a5d7fdd32e..5845b729c3 100644 ---- a/xen/arch/x86/include/asm/mm.h -+++ b/xen/arch/x86/include/asm/mm.h -@@ -393,7 +393,9 @@ const struct platform_bad_page *get_platform_badpages(unsigned int *array_size); - * The use of PGT_locked in mem_sharing does not collide, since mem_sharing is - * only supported for hvm guests, which do not have PV PTEs updated. - */ --int page_lock(struct page_info *page); -+int page_lock_unsafe(struct page_info *page); -+#define page_lock(pg) lock_evaluate_nospec(page_lock_unsafe(pg)) -+ - void page_unlock(struct page_info *page); - - void put_page_type(struct page_info *page); -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 330c4abcd1..8d19d719bd 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -2033,7 +2033,7 @@ static inline bool current_locked_page_ne_check(struct page_info *page) { - #define current_locked_page_ne_check(x) true - #endif - --int page_lock(struct page_info *page) -+int page_lock_unsafe(struct page_info *page) - { - unsigned long x, nx; - -@@ -2094,7 +2094,7 @@ void page_unlock(struct page_info *page) - * l3t_lock(), so to avoid deadlock we must avoid grabbing them in - * reverse order. - */ --static void l3t_lock(struct page_info *page) -+static always_inline void l3t_lock(struct page_info *page) - { - unsigned long x, nx; - -@@ -2103,6 +2103,8 @@ static void l3t_lock(struct page_info *page) - cpu_relax(); - nx = x | PGT_locked; - } while ( cmpxchg(&page->u.inuse.type_info, x, nx) != x ); -+ -+ block_lock_speculation(); - } - - static void l3t_unlock(struct page_info *page) --- -2.44.0 - diff --git a/0053-x86-IRQ-avoid-double-unlock-in-map_domain_pirq.patch b/0053-x86-IRQ-avoid-double-unlock-in-map_domain_pirq.patch new file mode 100644 index 0000000..686e142 --- /dev/null +++ b/0053-x86-IRQ-avoid-double-unlock-in-map_domain_pirq.patch @@ -0,0 +1,53 @@ +From d46a1ce3175dc45e97a8c9b89b0d0ff46145ae64 Mon Sep 17 00:00:00 2001 +From: Jan Beulich <jbeulich@suse.com> +Date: Tue, 16 Jul 2024 14:14:43 +0200 +Subject: [PATCH 53/56] x86/IRQ: avoid double unlock in map_domain_pirq() +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Forever since its introduction the main loop in the function dealing +with multi-vector MSI had error exit points ("break") with different +properties: In one case no IRQ descriptor lock is being held. +Nevertheless the subsequent error cleanup path assumed such a lock would +uniformly need releasing. Identify the case by setting "desc" to NULL, +thus allowing the unlock to be skipped as necessary. + +This is CVE-2024-31143 / XSA-458. + +Coverity ID: 1605298 +Fixes: d1b6d0a02489 ("x86: enable multi-vector MSI") +Signed-off-by: Jan Beulich <jbeulich@suse.com> +Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> +master commit: 57338346f29cea7b183403561bdc5f407163b846 +master date: 2024-07-16 14:09:14 +0200 +--- + xen/arch/x86/irq.c | 5 ++++- + 1 file changed, 4 insertions(+), 1 deletion(-) + +diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c +index 00be3b88e8..5dae8bd1b9 100644 +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -2287,6 +2287,7 @@ int map_domain_pirq( + + set_domain_irq_pirq(d, irq, info); + spin_unlock_irqrestore(&desc->lock, flags); ++ desc = NULL; + + info = NULL; + irq = create_irq(NUMA_NO_NODE, true); +@@ -2322,7 +2323,9 @@ int map_domain_pirq( + + if ( ret ) + { +- spin_unlock_irqrestore(&desc->lock, flags); ++ if ( desc ) ++ spin_unlock_irqrestore(&desc->lock, flags); ++ + pci_disable_msi(msi_desc); + if ( nr ) + { +-- +2.45.2 + diff --git a/0053-x86-protect-conditional-lock-taking-from-speculative.patch b/0053-x86-protect-conditional-lock-taking-from-speculative.patch deleted file mode 100644 index f0caa24..0000000 --- a/0053-x86-protect-conditional-lock-taking-from-speculative.patch +++ /dev/null @@ -1,216 +0,0 @@ -From 0ebd2e49bcd0f566ba6b9158555942aab8e41332 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> -Date: Mon, 4 Mar 2024 16:24:21 +0100 -Subject: [PATCH 53/67] x86: protect conditional lock taking from speculative - execution -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Conditionally taken locks that use the pattern: - -if ( lock ) - spin_lock(...); - -Need an else branch in order to issue an speculation barrier in the else case, -just like it's done in case the lock needs to be acquired. - -eval_nospec() could be used on the condition itself, but that would result in a -double barrier on the branch where the lock is taken. - -Introduce a new pair of helpers, {gfn,spin}_lock_if() that can be used to -conditionally take a lock in a speculation safe way. - -This is part of XSA-453 / CVE-2024-2193 - -Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -(cherry picked from commit 03cf7ca23e0e876075954c558485b267b7d02406) ---- - xen/arch/x86/mm.c | 35 +++++++++++++---------------------- - xen/arch/x86/mm/mm-locks.h | 9 +++++++++ - xen/arch/x86/mm/p2m.c | 5 ++--- - xen/include/xen/spinlock.h | 8 ++++++++ - 4 files changed, 32 insertions(+), 25 deletions(-) - -diff --git a/xen/arch/x86/mm.c b/xen/arch/x86/mm.c -index 8d19d719bd..d31b8d56ff 100644 ---- a/xen/arch/x86/mm.c -+++ b/xen/arch/x86/mm.c -@@ -5023,8 +5023,7 @@ static l3_pgentry_t *virt_to_xen_l3e(unsigned long v) - if ( !l3t ) - return NULL; - UNMAP_DOMAIN_PAGE(l3t); -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - if ( !(l4e_get_flags(*pl4e) & _PAGE_PRESENT) ) - { - l4_pgentry_t l4e = l4e_from_mfn(l3mfn, __PAGE_HYPERVISOR); -@@ -5061,8 +5060,7 @@ static l2_pgentry_t *virt_to_xen_l2e(unsigned long v) - return NULL; - } - UNMAP_DOMAIN_PAGE(l2t); -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - if ( !(l3e_get_flags(*pl3e) & _PAGE_PRESENT) ) - { - l3e_write(pl3e, l3e_from_mfn(l2mfn, __PAGE_HYPERVISOR)); -@@ -5100,8 +5098,7 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v) - return NULL; - } - UNMAP_DOMAIN_PAGE(l1t); -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - if ( !(l2e_get_flags(*pl2e) & _PAGE_PRESENT) ) - { - l2e_write(pl2e, l2e_from_mfn(l1mfn, __PAGE_HYPERVISOR)); -@@ -5132,6 +5129,8 @@ l1_pgentry_t *virt_to_xen_l1e(unsigned long v) - do { \ - if ( locking ) \ - l3t_lock(page); \ -+ else \ -+ block_lock_speculation(); \ - } while ( false ) - - #define L3T_UNLOCK(page) \ -@@ -5347,8 +5346,7 @@ int map_pages_to_xen( - if ( l3e_get_flags(ol3e) & _PAGE_GLOBAL ) - flush_flags |= FLUSH_TLB_GLOBAL; - -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - if ( (l3e_get_flags(*pl3e) & _PAGE_PRESENT) && - (l3e_get_flags(*pl3e) & _PAGE_PSE) ) - { -@@ -5452,8 +5450,7 @@ int map_pages_to_xen( - if ( l2e_get_flags(*pl2e) & _PAGE_GLOBAL ) - flush_flags |= FLUSH_TLB_GLOBAL; - -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - if ( (l2e_get_flags(*pl2e) & _PAGE_PRESENT) && - (l2e_get_flags(*pl2e) & _PAGE_PSE) ) - { -@@ -5494,8 +5491,7 @@ int map_pages_to_xen( - unsigned long base_mfn; - const l1_pgentry_t *l1t; - -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - - ol2e = *pl2e; - /* -@@ -5549,8 +5545,7 @@ int map_pages_to_xen( - unsigned long base_mfn; - const l2_pgentry_t *l2t; - -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - - ol3e = *pl3e; - /* -@@ -5694,8 +5689,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) - l3e_get_flags(*pl3e))); - UNMAP_DOMAIN_PAGE(l2t); - -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - if ( (l3e_get_flags(*pl3e) & _PAGE_PRESENT) && - (l3e_get_flags(*pl3e) & _PAGE_PSE) ) - { -@@ -5754,8 +5748,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) - l2e_get_flags(*pl2e) & ~_PAGE_PSE)); - UNMAP_DOMAIN_PAGE(l1t); - -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - if ( (l2e_get_flags(*pl2e) & _PAGE_PRESENT) && - (l2e_get_flags(*pl2e) & _PAGE_PSE) ) - { -@@ -5799,8 +5792,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) - */ - if ( (nf & _PAGE_PRESENT) || ((v != e) && (l1_table_offset(v) != 0)) ) - continue; -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - - /* - * L2E may be already cleared, or set to a superpage, by -@@ -5847,8 +5839,7 @@ int modify_xen_mappings(unsigned long s, unsigned long e, unsigned int nf) - if ( (nf & _PAGE_PRESENT) || - ((v != e) && (l2_table_offset(v) + l1_table_offset(v) != 0)) ) - continue; -- if ( locking ) -- spin_lock(&map_pgdir_lock); -+ spin_lock_if(locking, &map_pgdir_lock); - - /* - * L3E may be already cleared, or set to a superpage, by -diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h -index 265239c49f..3ea2d8eb03 100644 ---- a/xen/arch/x86/mm/mm-locks.h -+++ b/xen/arch/x86/mm/mm-locks.h -@@ -347,6 +347,15 @@ static inline void p2m_unlock(struct p2m_domain *p) - #define p2m_locked_by_me(p) mm_write_locked_by_me(&(p)->lock) - #define gfn_locked_by_me(p,g) p2m_locked_by_me(p) - -+static always_inline void gfn_lock_if(bool condition, struct p2m_domain *p2m, -+ gfn_t gfn, unsigned int order) -+{ -+ if ( condition ) -+ gfn_lock(p2m, gfn, order); -+ else -+ block_lock_speculation(); -+} -+ - /* PoD lock (per-p2m-table) - * - * Protects private PoD data structs: entry and cache -diff --git a/xen/arch/x86/mm/p2m.c b/xen/arch/x86/mm/p2m.c -index b28c899b5e..1fa9e01012 100644 ---- a/xen/arch/x86/mm/p2m.c -+++ b/xen/arch/x86/mm/p2m.c -@@ -292,9 +292,8 @@ mfn_t p2m_get_gfn_type_access(struct p2m_domain *p2m, gfn_t gfn, - if ( q & P2M_UNSHARE ) - q |= P2M_ALLOC; - -- if ( locked ) -- /* Grab the lock here, don't release until put_gfn */ -- gfn_lock(p2m, gfn, 0); -+ /* Grab the lock here, don't release until put_gfn */ -+ gfn_lock_if(locked, p2m, gfn, 0); - - mfn = p2m->get_entry(p2m, gfn, t, a, q, page_order, NULL); - -diff --git a/xen/include/xen/spinlock.h b/xen/include/xen/spinlock.h -index daf48fdea7..7e75d0e2e7 100644 ---- a/xen/include/xen/spinlock.h -+++ b/xen/include/xen/spinlock.h -@@ -216,6 +216,14 @@ static always_inline void spin_lock_irq(spinlock_t *l) - block_lock_speculation(); \ - }) - -+/* Conditionally take a spinlock in a speculation safe way. */ -+static always_inline void spin_lock_if(bool condition, spinlock_t *l) -+{ -+ if ( condition ) -+ _spin_lock(l); -+ block_lock_speculation(); -+} -+ - #define spin_unlock(l) _spin_unlock(l) - #define spin_unlock_irq(l) _spin_unlock_irq(l) - #define spin_unlock_irqrestore(l, f) _spin_unlock_irqrestore(l, f) --- -2.44.0 - diff --git a/0054-tools-ipxe-update-for-fixing-build-with-GCC12.patch b/0054-tools-ipxe-update-for-fixing-build-with-GCC12.patch deleted file mode 100644 index 90efaf8..0000000 --- a/0054-tools-ipxe-update-for-fixing-build-with-GCC12.patch +++ /dev/null @@ -1,33 +0,0 @@ -From a01c0b0f9691a8350e74938329892f949669119e Mon Sep 17 00:00:00 2001 -From: Olaf Hering <olaf@aepfle.de> -Date: Wed, 27 Mar 2024 12:27:03 +0100 -Subject: [PATCH 54/67] tools: ipxe: update for fixing build with GCC12 - -Use a snapshot which includes commit -b0ded89e917b48b73097d3b8b88dfa3afb264ed0 ("[build] Disable dangling -pointer checking for GCC"), which fixes build with gcc12. - -Signed-off-by: Olaf Hering <olaf@aepfle.de> -Acked-by: Andrew Cooper <andrew.cooper3@citrix.com> -master commit: 18a36b4a9b088875486cfe33a2d4a8ae7eb4ab47 -master date: 2023-04-25 23:47:45 +0100 ---- - tools/firmware/etherboot/Makefile | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/tools/firmware/etherboot/Makefile b/tools/firmware/etherboot/Makefile -index 4bc3633ba3..7a56fe8014 100644 ---- a/tools/firmware/etherboot/Makefile -+++ b/tools/firmware/etherboot/Makefile -@@ -11,7 +11,7 @@ IPXE_GIT_URL ?= git://git.ipxe.org/ipxe.git - endif - - # put an updated tar.gz on xenbits after changes to this variable --IPXE_GIT_TAG := 3c040ad387099483102708bb1839110bc788cefb -+IPXE_GIT_TAG := 1d1cf74a5e58811822bee4b3da3cff7282fcdfca - - IPXE_TARBALL_URL ?= $(XEN_EXTFILES_URL)/ipxe-git-$(IPXE_GIT_TAG).tar.gz - --- -2.44.0 - diff --git a/0054-x86-physdev-Return-pirq-that-irq-was-already-mapped-.patch b/0054-x86-physdev-Return-pirq-that-irq-was-already-mapped-.patch new file mode 100644 index 0000000..5e245f9 --- /dev/null +++ b/0054-x86-physdev-Return-pirq-that-irq-was-already-mapped-.patch @@ -0,0 +1,38 @@ +From f9f3062f11e144438fac9e9da6aa4cb41a6009b1 Mon Sep 17 00:00:00 2001 +From: Jiqian Chen <Jiqian.Chen@amd.com> +Date: Thu, 25 Jul 2024 16:20:17 +0200 +Subject: [PATCH 54/56] x86/physdev: Return pirq that irq was already mapped to + +Fix bug introduced by 0762e2502f1f ("x86/physdev: factor out the code to allocate and +map a pirq"). After that re-factoring, when pirq<0 and current_pirq>0, it means +caller want to allocate a free pirq for irq but irq already has a mapped pirq, then +it returns the negative pirq, so it fails. However, the logic before that +re-factoring is different, it should return the current_pirq that irq was already +mapped to and make the call success. + +Fixes: 0762e2502f1f ("x86/physdev: factor out the code to allocate and map a pirq") +Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com> +Signed-off-by: Huang Rui <ray.huang@amd.com> +Signed-off-by: Jiqian Chen <Jiqian.Chen@amd.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +master commit: 0d2b87b5adfc19e87e9027d996db204c66a47f30 +master date: 2024-07-08 14:46:12 +0100 +--- + xen/arch/x86/irq.c | 1 + + 1 file changed, 1 insertion(+) + +diff --git a/xen/arch/x86/irq.c b/xen/arch/x86/irq.c +index 5dae8bd1b9..6b1f338eae 100644 +--- a/xen/arch/x86/irq.c ++++ b/xen/arch/x86/irq.c +@@ -2914,6 +2914,7 @@ static int allocate_pirq(struct domain *d, int index, int pirq, int irq, + d->domain_id, index, pirq, current_pirq); + if ( current_pirq < 0 ) + return -EBUSY; ++ pirq = current_pirq; + } + else if ( type == MAP_PIRQ_TYPE_MULTI_MSI ) + { +-- +2.45.2 + diff --git a/0055-tools-libxs-Fix-fcntl-invocation-in-set_cloexec.patch b/0055-tools-libxs-Fix-fcntl-invocation-in-set_cloexec.patch new file mode 100644 index 0000000..e4cc09e --- /dev/null +++ b/0055-tools-libxs-Fix-fcntl-invocation-in-set_cloexec.patch @@ -0,0 +1,57 @@ +From 81f1e807fadb8111d71b78191e01ca688d74eac7 Mon Sep 17 00:00:00 2001 +From: Andrew Cooper <andrew.cooper3@citrix.com> +Date: Thu, 25 Jul 2024 16:20:53 +0200 +Subject: [PATCH 55/56] tools/libxs: Fix fcntl() invocation in set_cloexec() + +set_cloexec() had a bit too much copy&pate from setnonblock(), and +insufficient testing on ancient versions of Linux... + +As written (emulating ancient linux by undef'ing O_CLOEXEC), strace shows: + + open("/dev/xen/xenbus", O_RDWR) = 3 + fcntl(3, F_GETFL) = 0x8002 (flags O_RDWR|O_LARGEFILE) + fcntl(3, 0x8003 /* F_??? */, 0x7ffe4a771d90) = -1 EINVAL (Invalid argument) + close(3) = 0 + +which is obviously nonsense. + +Switch F_GETFL -> F_GETFD, and fix the second invocation to use F_SETFD. With +this, strace is rather happer: + + open("/dev/xen/xenbus", O_RDWR) = 3 + fcntl(3, F_GETFD) = 0 + fcntl(3, F_SETFD, FD_CLOEXEC) = 0 + +Fixes: bf7c1464706a ("tools/libxs: Fix CLOEXEC handling in get_dev()") +Reported-by: Ross Lagerwall <ross.lagerwall@citrix.com> +Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> +Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com> +Reviewed-by: Juergen Gross <jgross@suse.com> +master commit: 37810b52d003f8a04af41d7b1f85eff24af9f804 +master date: 2024-07-09 15:32:18 +0100 +--- + tools/libs/store/xs.c | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/tools/libs/store/xs.c b/tools/libs/store/xs.c +index c8845b69e2..38a6ce3cf2 100644 +--- a/tools/libs/store/xs.c ++++ b/tools/libs/store/xs.c +@@ -182,12 +182,12 @@ static bool setnonblock(int fd, int nonblock) { + + static bool set_cloexec(int fd) + { +- int flags = fcntl(fd, F_GETFL); ++ int flags = fcntl(fd, F_GETFD); + + if (flags < 0) + return false; + +- return fcntl(fd, flags | FD_CLOEXEC) >= 0; ++ return fcntl(fd, F_SETFD, flags | FD_CLOEXEC) >= 0; + } + + static int pipe_cloexec(int fds[2]) +-- +2.45.2 + diff --git a/0055-x86-mm-use-block_lock_speculation-in-_mm_write_lock.patch b/0055-x86-mm-use-block_lock_speculation-in-_mm_write_lock.patch deleted file mode 100644 index 719234c..0000000 --- a/0055-x86-mm-use-block_lock_speculation-in-_mm_write_lock.patch +++ /dev/null @@ -1,35 +0,0 @@ -From a153b8b42e9027ba3057bc7c8bf55e4d71e86ec3 Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Wed, 27 Mar 2024 12:28:24 +0100 -Subject: [PATCH 55/67] x86/mm: use block_lock_speculation() in - _mm_write_lock() - -I can only guess that using block_speculation() there was a leftover -from, earlier on, SPECULATIVE_HARDEN_LOCK depending on -SPECULATIVE_HARDEN_BRANCH. - -Fixes: 197ecd838a2a ("locking: attempt to ensure lock wrappers are always inline") -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: Andrew Cooper <andrew.cooper3@citrix.com> -master commit: 62018f08708a5ff6ef8fc8ff2aaaac46e5a60430 -master date: 2024-03-18 13:53:37 +0100 ---- - xen/arch/x86/mm/mm-locks.h | 2 +- - 1 file changed, 1 insertion(+), 1 deletion(-) - -diff --git a/xen/arch/x86/mm/mm-locks.h b/xen/arch/x86/mm/mm-locks.h -index 3ea2d8eb03..7d6e4d2a7c 100644 ---- a/xen/arch/x86/mm/mm-locks.h -+++ b/xen/arch/x86/mm/mm-locks.h -@@ -150,7 +150,7 @@ static always_inline void _mm_write_lock(const struct domain *d, mm_rwlock_t *l, - _set_lock_level(_lock_level(d, level)); - } - else -- block_speculation(); -+ block_lock_speculation(); - l->recurse_count++; - } - --- -2.44.0 - diff --git a/0056-x86-altcall-fix-clang-code-gen-when-using-altcall-in.patch b/0056-x86-altcall-fix-clang-code-gen-when-using-altcall-in.patch new file mode 100644 index 0000000..c94c516 --- /dev/null +++ b/0056-x86-altcall-fix-clang-code-gen-when-using-altcall-in.patch @@ -0,0 +1,85 @@ +From d078d0aa86e9e3b937f673dc89306b3afd09d560 Mon Sep 17 00:00:00 2001 +From: =?UTF-8?q?Roger=20Pau=20Monn=C3=A9?= <roger.pau@citrix.com> +Date: Thu, 25 Jul 2024 16:21:17 +0200 +Subject: [PATCH 56/56] x86/altcall: fix clang code-gen when using altcall in + loop constructs +MIME-Version: 1.0 +Content-Type: text/plain; charset=UTF-8 +Content-Transfer-Encoding: 8bit + +Yet another clang code generation issue when using altcalls. + +The issue this time is with using loop constructs around alternative_{,v}call +instances using parameter types smaller than the register size. + +Given the following example code: + +static void bar(bool b) +{ + unsigned int i; + + for ( i = 0; i < 10; i++ ) + { + int ret_; + register union { + bool e; + unsigned long r; + } di asm("rdi") = { .e = b }; + register unsigned long si asm("rsi"); + register unsigned long dx asm("rdx"); + register unsigned long cx asm("rcx"); + register unsigned long r8 asm("r8"); + register unsigned long r9 asm("r9"); + register unsigned long r10 asm("r10"); + register unsigned long r11 asm("r11"); + + asm volatile ( "call %c[addr]" + : "+r" (di), "=r" (si), "=r" (dx), + "=r" (cx), "=r" (r8), "=r" (r9), + "=r" (r10), "=r" (r11), "=a" (ret_) + : [addr] "i" (&(func)), "g" (func) + : "memory" ); + } +} + +See: https://godbolt.org/z/qvxMGd84q + +Clang will generate machine code that only resets the low 8 bits of %rdi +between loop calls, leaving the rest of the register possibly containing +garbage from the use of %rdi inside the called function. Note also that clang +doesn't truncate the input parameters at the callee, thus breaking the psABI. + +Fix this by turning the `e` element in the anonymous union into an array that +consumes the same space as an unsigned long, as this forces clang to reset the +whole %rdi register instead of just the low 8 bits. + +Fixes: 2ce562b2a413 ('x86/altcall: use a union as register type for function parameters on clang') +Suggested-by: Jan Beulich <jbeulich@suse.com> +Signed-off-by: Roger Pau Monné <roger.pau@citrix.com> +Reviewed-by: Jan Beulich <jbeulich@suse.com> +master commit: d51b2f5ea1915fe058f730b0ec542cf84254fca0 +master date: 2024-07-23 13:59:30 +0200 +--- + xen/arch/x86/include/asm/alternative.h | 4 ++-- + 1 file changed, 2 insertions(+), 2 deletions(-) + +diff --git a/xen/arch/x86/include/asm/alternative.h b/xen/arch/x86/include/asm/alternative.h +index 0d3697f1de..e63b459276 100644 +--- a/xen/arch/x86/include/asm/alternative.h ++++ b/xen/arch/x86/include/asm/alternative.h +@@ -185,10 +185,10 @@ extern void alternative_branches(void); + */ + #define ALT_CALL_ARG(arg, n) \ + register union { \ +- typeof(arg) e; \ ++ typeof(arg) e[sizeof(long) / sizeof(arg)]; \ + unsigned long r; \ + } a ## n ## _ asm ( ALT_CALL_arg ## n ) = { \ +- .e = ({ BUILD_BUG_ON(sizeof(arg) > sizeof(void *)); (arg); }) \ ++ .e[0] = ({ BUILD_BUG_ON(sizeof(arg) > sizeof(void *)); (arg); })\ + } + #else + #define ALT_CALL_ARG(arg, n) \ +-- +2.45.2 + diff --git a/0056-x86-boot-Fix-setup_apic_nmi_watchdog-to-fail-more-cl.patch b/0056-x86-boot-Fix-setup_apic_nmi_watchdog-to-fail-more-cl.patch deleted file mode 100644 index 5d549c1..0000000 --- a/0056-x86-boot-Fix-setup_apic_nmi_watchdog-to-fail-more-cl.patch +++ /dev/null @@ -1,120 +0,0 @@ -From 471b53c6a092940f3629990d9ca946aa22bd8535 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Wed, 27 Mar 2024 12:29:11 +0100 -Subject: [PATCH 56/67] x86/boot: Fix setup_apic_nmi_watchdog() to fail more - cleanly - -Right now, if the user requests the watchdog on the command line, -setup_apic_nmi_watchdog() will blindly assume that setting up the watchdog -worked. Reuse nmi_perfctr_msr to identify when the watchdog has been -configured. - -Rearrange setup_p6_watchdog() to not set nmi_perfctr_msr until the sanity -checks are complete. Turn setup_p4_watchdog() into a void function, matching -the others. - -If the watchdog isn't set up, inform the user and override to NMI_NONE, which -will prevent check_nmi_watchdog() from claiming that all CPUs are stuck. - -e.g.: - - (XEN) alt table ffff82d040697c38 -> ffff82d0406a97f0 - (XEN) Failed to configure NMI watchdog - (XEN) Brought up 512 CPUs - (XEN) Scheduling granularity: cpu, 1 CPU per sched-resource - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: f658321374687c7339235e1ac643e0427acff717 -master date: 2024-03-19 18:29:37 +0000 ---- - xen/arch/x86/nmi.c | 25 ++++++++++++------------- - 1 file changed, 12 insertions(+), 13 deletions(-) - -diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c -index 7656023748..7c9591b65e 100644 ---- a/xen/arch/x86/nmi.c -+++ b/xen/arch/x86/nmi.c -@@ -323,8 +323,6 @@ static void setup_p6_watchdog(unsigned counter) - { - unsigned int evntsel; - -- nmi_perfctr_msr = MSR_P6_PERFCTR(0); -- - if ( !nmi_p6_event_width && current_cpu_data.cpuid_level >= 0xa ) - nmi_p6_event_width = MASK_EXTR(cpuid_eax(0xa), P6_EVENT_WIDTH_MASK); - if ( !nmi_p6_event_width ) -@@ -334,6 +332,8 @@ static void setup_p6_watchdog(unsigned counter) - nmi_p6_event_width > BITS_PER_LONG ) - return; - -+ nmi_perfctr_msr = MSR_P6_PERFCTR(0); -+ - clear_msr_range(MSR_P6_EVNTSEL(0), 2); - clear_msr_range(MSR_P6_PERFCTR(0), 2); - -@@ -349,13 +349,13 @@ static void setup_p6_watchdog(unsigned counter) - wrmsr(MSR_P6_EVNTSEL(0), evntsel, 0); - } - --static int setup_p4_watchdog(void) -+static void setup_p4_watchdog(void) - { - uint64_t misc_enable; - - rdmsrl(MSR_IA32_MISC_ENABLE, misc_enable); - if (!(misc_enable & MSR_IA32_MISC_ENABLE_PERF_AVAIL)) -- return 0; -+ return; - - nmi_perfctr_msr = MSR_P4_IQ_PERFCTR0; - nmi_p4_cccr_val = P4_NMI_IQ_CCCR0; -@@ -378,13 +378,12 @@ static int setup_p4_watchdog(void) - clear_msr_range(0x3E0, 2); - clear_msr_range(MSR_P4_BPU_CCCR0, 18); - clear_msr_range(MSR_P4_BPU_PERFCTR0, 18); -- -+ - wrmsrl(MSR_P4_CRU_ESCR0, P4_NMI_CRU_ESCR0); - wrmsrl(MSR_P4_IQ_CCCR0, P4_NMI_IQ_CCCR0 & ~P4_CCCR_ENABLE); - write_watchdog_counter("P4_IQ_COUNTER0"); - apic_write(APIC_LVTPC, APIC_DM_NMI); - wrmsrl(MSR_P4_IQ_CCCR0, nmi_p4_cccr_val); -- return 1; - } - - void setup_apic_nmi_watchdog(void) -@@ -399,8 +398,6 @@ void setup_apic_nmi_watchdog(void) - case 0xf ... 0x19: - setup_k7_watchdog(); - break; -- default: -- return; - } - break; - case X86_VENDOR_INTEL: -@@ -411,14 +408,16 @@ void setup_apic_nmi_watchdog(void) - : CORE_EVENT_CPU_CLOCKS_NOT_HALTED); - break; - case 15: -- if (!setup_p4_watchdog()) -- return; -+ setup_p4_watchdog(); - break; -- default: -- return; - } - break; -- default: -+ } -+ -+ if ( nmi_perfctr_msr == 0 ) -+ { -+ printk(XENLOG_WARNING "Failed to configure NMI watchdog\n"); -+ nmi_watchdog = NMI_NONE; - return; - } - --- -2.44.0 - diff --git a/0057-x86-PoD-tie-together-P2M-update-and-increment-of-ent.patch b/0057-x86-PoD-tie-together-P2M-update-and-increment-of-ent.patch deleted file mode 100644 index dedc1c2..0000000 --- a/0057-x86-PoD-tie-together-P2M-update-and-increment-of-ent.patch +++ /dev/null @@ -1,61 +0,0 @@ -From bfb69205376d94ff91b09a337c47fb665ee12da3 Mon Sep 17 00:00:00 2001 -From: Jan Beulich <jbeulich@suse.com> -Date: Wed, 27 Mar 2024 12:29:33 +0100 -Subject: [PATCH 57/67] x86/PoD: tie together P2M update and increment of entry - count - -When not holding the PoD lock across the entire region covering P2M -update and stats update, the entry count - if to be incorrect at all - -should indicate too large a value in preference to a too small one, to -avoid functions bailing early when they find the count is zero. However, -instead of moving the increment ahead (and adjust back upon failure), -extend the PoD-locked region. - -Fixes: 99af3cd40b6e ("x86/mm: Rework locking in the PoD layer") -Signed-off-by: Jan Beulich <jbeulich@suse.com> -Reviewed-by: George Dunlap <george.dunlap@cloud.com> -master commit: cc950c49ae6a6690f7fc3041a1f43122c250d250 -master date: 2024-03-21 09:48:10 +0100 ---- - xen/arch/x86/mm/p2m-pod.c | 15 ++++++++++++--- - 1 file changed, 12 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/mm/p2m-pod.c b/xen/arch/x86/mm/p2m-pod.c -index 99dbcb3101..e903db9d93 100644 ---- a/xen/arch/x86/mm/p2m-pod.c -+++ b/xen/arch/x86/mm/p2m-pod.c -@@ -1370,19 +1370,28 @@ mark_populate_on_demand(struct domain *d, unsigned long gfn_l, - } - } - -+ /* -+ * P2M update and stats increment need to collectively be under PoD lock, -+ * to prevent code elsewhere observing PoD entry count being zero despite -+ * there actually still being PoD entries (created by the p2m_set_entry() -+ * invocation below). -+ */ -+ pod_lock(p2m); -+ - /* Now, actually do the two-way mapping */ - rc = p2m_set_entry(p2m, gfn, INVALID_MFN, order, - p2m_populate_on_demand, p2m->default_access); - if ( rc == 0 ) - { -- pod_lock(p2m); - p2m->pod.entry_count += 1UL << order; - p2m->pod.entry_count -= pod_count; - BUG_ON(p2m->pod.entry_count < 0); -- pod_unlock(p2m); -+ } -+ -+ pod_unlock(p2m); - -+ if ( rc == 0 ) - ioreq_request_mapcache_invalidate(d); -- } - else if ( order ) - { - /* --- -2.44.0 - diff --git a/0058-tools-oxenstored-Use-Map-instead-of-Hashtbl-for-quot.patch b/0058-tools-oxenstored-Use-Map-instead-of-Hashtbl-for-quot.patch deleted file mode 100644 index dfc7f5a..0000000 --- a/0058-tools-oxenstored-Use-Map-instead-of-Hashtbl-for-quot.patch +++ /dev/null @@ -1,143 +0,0 @@ -From 7abd305607938b846da1a37dd1bda7bf7d47dba5 Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edwin.torok@cloud.com> -Date: Wed, 31 Jan 2024 10:52:55 +0000 -Subject: [PATCH 58/67] tools/oxenstored: Use Map instead of Hashtbl for quotas -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -On a stress test running 1000 VMs flamegraphs have shown that -`oxenstored` spends a large amount of time in `Hashtbl.copy` and the GC. - -Hashtable complexity: - * read/write: O(1) average - * copy: O(domains) -- copying the entire table - -Map complexity: - * read/write: O(log n) worst case - * copy: O(1) -- a word copy - -We always perform at least one 'copy' when processing each xenstore -packet (regardless whether it is a readonly operation or inside a -transaction or not), so the actual complexity per packet is: - * Hashtbl: O(domains) - * Map: O(log domains) - -Maps are the clear winner, and a better fit for the immutable xenstore -tree. - -Signed-off-by: Edwin Török <edwin.torok@cloud.com> -Acked-by: Christian Lindig <christian.lindig@cloud.com> -(cherry picked from commit b6cf604207fd0a04451a48f2ce6d05fb66c612ab) ---- - tools/ocaml/xenstored/quota.ml | 65 ++++++++++++++++++---------------- - 1 file changed, 34 insertions(+), 31 deletions(-) - -diff --git a/tools/ocaml/xenstored/quota.ml b/tools/ocaml/xenstored/quota.ml -index 6e3d6401ae..ee8dd22581 100644 ---- a/tools/ocaml/xenstored/quota.ml -+++ b/tools/ocaml/xenstored/quota.ml -@@ -23,66 +23,69 @@ let activate = ref true - let maxent = ref (1000) - let maxsize = ref (2048) - -+module Domid = struct -+ type t = Xenctrl.domid -+ let compare (a:t) (b:t) = compare a b -+end -+ -+module DomidMap = Map.Make(Domid) -+ - type t = { - maxent: int; (* max entities per domU *) - maxsize: int; (* max size of data store in one node *) -- cur: (Xenctrl.domid, int) Hashtbl.t; (* current domains quota *) -+ mutable cur: int DomidMap.t; (* current domains quota *) - } - - let to_string quota domid = -- if Hashtbl.mem quota.cur domid -- then Printf.sprintf "dom%i quota: %i/%i" domid (Hashtbl.find quota.cur domid) quota.maxent -- else Printf.sprintf "dom%i quota: not set" domid -+ try -+ Printf.sprintf "dom%i quota: %i/%i" domid (DomidMap.find domid quota.cur) quota.maxent -+ with Not_found -> -+ Printf.sprintf "dom%i quota: not set" domid - - let create () = -- { maxent = !maxent; maxsize = !maxsize; cur = Hashtbl.create 100; } -+ { maxent = !maxent; maxsize = !maxsize; cur = DomidMap.empty; } - --let copy quota = { quota with cur = (Hashtbl.copy quota.cur) } -+let copy quota = { quota with cur = quota.cur } - --let del quota id = Hashtbl.remove quota.cur id -+let del quota id = { quota with cur = DomidMap.remove id quota.cur } - - let _check quota id size = - if size > quota.maxsize then ( - warn "domain %u err create entry: data too big %d" id size; - raise Data_too_big - ); -- if id > 0 && Hashtbl.mem quota.cur id then -- let entry = Hashtbl.find quota.cur id in -+ if id > 0 then -+ try -+ let entry = DomidMap.find id quota.cur in - if entry >= quota.maxent then ( - warn "domain %u cannot create entry: quota reached" id; - raise Limit_reached - ) -+ with Not_found -> () - - let check quota id size = - if !activate then - _check quota id size - --let get_entry quota id = Hashtbl.find quota.cur id -+let find_or_zero quota_cur id = -+ try DomidMap.find id quota_cur with Not_found -> 0 - --let set_entry quota id nb = -- if nb = 0 -- then Hashtbl.remove quota.cur id -- else begin -- if Hashtbl.mem quota.cur id then -- Hashtbl.replace quota.cur id nb -- else -- Hashtbl.add quota.cur id nb -- end -+let update_entry quota_cur id diff = -+ let nb = diff + find_or_zero quota_cur id in -+ if nb = 0 then DomidMap.remove id quota_cur -+ else DomidMap.add id nb quota_cur - - let del_entry quota id = -- try -- let nb = get_entry quota id in -- set_entry quota id (nb - 1) -- with Not_found -> () -+ quota.cur <- update_entry quota.cur id (-1) - - let add_entry quota id = -- let nb = try get_entry quota id with Not_found -> 0 in -- set_entry quota id (nb + 1) -- --let add quota diff = -- Hashtbl.iter (fun id nb -> set_entry quota id (get_entry quota id + nb)) diff.cur -+ quota.cur <- update_entry quota.cur id (+1) - - let merge orig_quota mod_quota dest_quota = -- Hashtbl.iter (fun id nb -> let diff = nb - (try get_entry orig_quota id with Not_found -> 0) in -- if diff <> 0 then -- set_entry dest_quota id ((try get_entry dest_quota id with Not_found -> 0) + diff)) mod_quota.cur -+ let fold_merge id nb dest = -+ match nb - find_or_zero orig_quota.cur id with -+ | 0 -> dest (* not modified *) -+ | diff -> update_entry dest id diff (* update with [x=x+diff] *) -+ in -+ dest_quota.cur <- DomidMap.fold fold_merge mod_quota.cur dest_quota.cur -+ (* dest_quota = dest_quota + (mod_quota - orig_quota) *) --- -2.44.0 - diff --git a/0059-tools-oxenstored-Make-Quota.t-pure.patch b/0059-tools-oxenstored-Make-Quota.t-pure.patch deleted file mode 100644 index 7616b90..0000000 --- a/0059-tools-oxenstored-Make-Quota.t-pure.patch +++ /dev/null @@ -1,121 +0,0 @@ -From f38a815a54000ca51ff5165b2863d60b6bbea49c Mon Sep 17 00:00:00 2001 -From: =?UTF-8?q?Edwin=20T=C3=B6r=C3=B6k?= <edwin.torok@cloud.com> -Date: Wed, 31 Jan 2024 10:52:56 +0000 -Subject: [PATCH 59/67] tools/oxenstored: Make Quota.t pure -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Now that we no longer have a hashtable inside we can make Quota.t pure, and -push the mutable update to its callers. Store.t already had a mutable Quota.t -field. - -No functional change. - -Signed-off-by: Edwin Török <edwin.torok@cloud.com> -Acked-by: Christian Lindig <christian.lindig@cloud.com> -(cherry picked from commit 098d868e52ac0165b7f36e22b767ea70cef70054) ---- - tools/ocaml/xenstored/quota.ml | 8 ++++---- - tools/ocaml/xenstored/store.ml | 17 ++++++++++------- - 2 files changed, 14 insertions(+), 11 deletions(-) - -diff --git a/tools/ocaml/xenstored/quota.ml b/tools/ocaml/xenstored/quota.ml -index ee8dd22581..b3ab678c72 100644 ---- a/tools/ocaml/xenstored/quota.ml -+++ b/tools/ocaml/xenstored/quota.ml -@@ -33,7 +33,7 @@ module DomidMap = Map.Make(Domid) - type t = { - maxent: int; (* max entities per domU *) - maxsize: int; (* max size of data store in one node *) -- mutable cur: int DomidMap.t; (* current domains quota *) -+ cur: int DomidMap.t; (* current domains quota *) - } - - let to_string quota domid = -@@ -76,10 +76,10 @@ let update_entry quota_cur id diff = - else DomidMap.add id nb quota_cur - - let del_entry quota id = -- quota.cur <- update_entry quota.cur id (-1) -+ {quota with cur = update_entry quota.cur id (-1)} - - let add_entry quota id = -- quota.cur <- update_entry quota.cur id (+1) -+ {quota with cur = update_entry quota.cur id (+1)} - - let merge orig_quota mod_quota dest_quota = - let fold_merge id nb dest = -@@ -87,5 +87,5 @@ let merge orig_quota mod_quota dest_quota = - | 0 -> dest (* not modified *) - | diff -> update_entry dest id diff (* update with [x=x+diff] *) - in -- dest_quota.cur <- DomidMap.fold fold_merge mod_quota.cur dest_quota.cur -+ {dest_quota with cur = DomidMap.fold fold_merge mod_quota.cur dest_quota.cur} - (* dest_quota = dest_quota + (mod_quota - orig_quota) *) -diff --git a/tools/ocaml/xenstored/store.ml b/tools/ocaml/xenstored/store.ml -index c94dbf3a62..5dd965db15 100644 ---- a/tools/ocaml/xenstored/store.ml -+++ b/tools/ocaml/xenstored/store.ml -@@ -85,7 +85,9 @@ let check_owner node connection = - raise Define.Permission_denied; - end - --let rec recurse fct node = fct node; SymbolMap.iter (fun _ -> recurse fct) node.children -+let rec recurse fct node acc = -+ let acc = fct node acc in -+ SymbolMap.fold (fun _ -> recurse fct) node.children acc - - (** [recurse_filter_map f tree] applies [f] on each node in the tree recursively, - possibly removing some nodes. -@@ -408,7 +410,7 @@ let dump_buffer store = dump_store_buf store.root - let set_node store path node orig_quota mod_quota = - let root = Path.set_node store.root path node in - store.root <- root; -- Quota.merge orig_quota mod_quota store.quota -+ store.quota <- Quota.merge orig_quota mod_quota store.quota - - let write store perm path value = - let node, existing = get_deepest_existing_node store path in -@@ -422,7 +424,7 @@ let write store perm path value = - let root, node_created = path_write store perm path value in - store.root <- root; - if node_created -- then Quota.add_entry store.quota owner -+ then store.quota <- Quota.add_entry store.quota owner - - let mkdir store perm path = - let node, existing = get_deepest_existing_node store path in -@@ -431,7 +433,7 @@ let mkdir store perm path = - if not (existing || (Perms.Connection.is_dom0 perm)) then Quota.check store.quota owner 0; - store.root <- path_mkdir store perm path; - if not existing then -- Quota.add_entry store.quota owner -+ store.quota <- Quota.add_entry store.quota owner - - let rm store perm path = - let rmed_node = Path.get_node store.root path in -@@ -439,7 +441,7 @@ let rm store perm path = - | None -> raise Define.Doesnt_exist - | Some rmed_node -> - store.root <- path_rm store perm path; -- Node.recurse (fun node -> Quota.del_entry store.quota (Node.get_owner node)) rmed_node -+ store.quota <- Node.recurse (fun node quota -> Quota.del_entry quota (Node.get_owner node)) rmed_node store.quota - - let setperms store perm path nperms = - match Path.get_node store.root path with -@@ -450,8 +452,9 @@ let setperms store perm path nperms = - if not ((old_owner = new_owner) || (Perms.Connection.is_dom0 perm)) then - raise Define.Permission_denied; - store.root <- path_setperms store perm path nperms; -- Quota.del_entry store.quota old_owner; -- Quota.add_entry store.quota new_owner -+ store.quota <- -+ let quota = Quota.del_entry store.quota old_owner in -+ Quota.add_entry quota new_owner - - let reset_permissions store domid = - Logging.info "store|node" "Cleaning up xenstore ACLs for domid %d" domid; --- -2.44.0 - diff --git a/0060-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch b/0060-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch deleted file mode 100644 index ce2b89d..0000000 --- a/0060-x86-cpu-policy-Hide-x2APIC-from-PV-guests.patch +++ /dev/null @@ -1,90 +0,0 @@ -From bb27e11c56963e170d1f6d2fbddbc956f7164121 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 2 Apr 2024 16:17:25 +0200 -Subject: [PATCH 60/67] x86/cpu-policy: Hide x2APIC from PV guests -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -PV guests can't write to MSR_APIC_BASE (in order to set EXTD), nor can they -access any of the x2APIC MSR range. Therefore they mustn't see the x2APIC -CPUID bit saying that they can. - -Right now, the host x2APIC flag filters into PV guests, meaning that PV guests -generally see x2APIC except on Zen1-and-older AMD systems. - -Linux works around this by explicitly hiding the bit itself, and filtering -EXTD out of MSR_APIC_BASE reads. NetBSD behaves more in the spirit of PV -guests, and entirely ignores the APIC when built as a PV guest. - -Change the annotation from !A to !S. This has a consequence of stripping it -out of both PV featuremasks. However, as existing guests may have seen the -bit, set it back into the PV Max policy; a VM which saw the bit and is alive -enough to migrate will have ignored it one way or another. - -Hiding x2APIC does change the contents of leaf 0xb, but as the information is -nonsense to begin with, this is likely an improvement on the status quo. - -Xen's blind assumption that APIC_ID = vCPU_ID * 2 isn't interlinked with the -host's topology structure, where a PV guest may see real host values, and the -APIC_IDs are useless without an MADT to start with. Dom0 is the only PV VM to -get an MADT but it's the host one, meaning the two sets of APIC_IDs are from -different address spaces. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -master commit: 5420aa165dfa5fe95dd84bb71cb96c15459935b1 -master date: 2024-03-01 20:14:19 +0000 ---- - xen/arch/x86/cpu-policy.c | 11 +++++++++-- - xen/include/public/arch-x86/cpufeatureset.h | 2 +- - 2 files changed, 10 insertions(+), 3 deletions(-) - -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index 96c2cee1a8..ed64d56294 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -559,6 +559,14 @@ static void __init calculate_pv_max_policy(void) - for ( i = 0; i < ARRAY_SIZE(fs); ++i ) - fs[i] &= pv_max_featuremask[i]; - -+ /* -+ * Xen at the time of writing (Feb 2024, 4.19 dev cycle) used to leak the -+ * host x2APIC capability into PV guests, but never supported the guest -+ * trying to turn x2APIC mode on. Tolerate an incoming VM which saw the -+ * x2APIC CPUID bit and is alive enough to migrate. -+ */ -+ __set_bit(X86_FEATURE_X2APIC, fs); -+ - /* - * If Xen isn't virtualising MSR_SPEC_CTRL for PV guests (functional - * availability, or admin choice), hide the feature. -@@ -837,11 +845,10 @@ void recalculate_cpuid_policy(struct domain *d) - } - - /* -- * Allow the toolstack to set HTT, X2APIC and CMP_LEGACY. These bits -+ * Allow the toolstack to set HTT and CMP_LEGACY. These bits - * affect how to interpret topology information in other cpuid leaves. - */ - __set_bit(X86_FEATURE_HTT, max_fs); -- __set_bit(X86_FEATURE_X2APIC, max_fs); - __set_bit(X86_FEATURE_CMP_LEGACY, max_fs); - - /* -diff --git a/xen/include/public/arch-x86/cpufeatureset.h b/xen/include/public/arch-x86/cpufeatureset.h -index 113e6cadc1..bc971f3c6f 100644 ---- a/xen/include/public/arch-x86/cpufeatureset.h -+++ b/xen/include/public/arch-x86/cpufeatureset.h -@@ -123,7 +123,7 @@ XEN_CPUFEATURE(PCID, 1*32+17) /*H Process Context ID */ - XEN_CPUFEATURE(DCA, 1*32+18) /* Direct Cache Access */ - XEN_CPUFEATURE(SSE4_1, 1*32+19) /*A Streaming SIMD Extensions 4.1 */ - XEN_CPUFEATURE(SSE4_2, 1*32+20) /*A Streaming SIMD Extensions 4.2 */ --XEN_CPUFEATURE(X2APIC, 1*32+21) /*!A Extended xAPIC */ -+XEN_CPUFEATURE(X2APIC, 1*32+21) /*!S Extended xAPIC */ - XEN_CPUFEATURE(MOVBE, 1*32+22) /*A movbe instruction */ - XEN_CPUFEATURE(POPCNT, 1*32+23) /*A POPCNT instruction */ - XEN_CPUFEATURE(TSC_DEADLINE, 1*32+24) /*S TSC Deadline Timer */ --- -2.44.0 - diff --git a/0061-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch b/0061-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch deleted file mode 100644 index d1b8786..0000000 --- a/0061-x86-cpu-policy-Fix-visibility-of-HTT-CMP_LEGACY-in-m.patch +++ /dev/null @@ -1,85 +0,0 @@ -From 70ad9c5fdeac4814050080c87e06d44292ecf868 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 2 Apr 2024 16:18:05 +0200 -Subject: [PATCH 61/67] x86/cpu-policy: Fix visibility of HTT/CMP_LEGACY in max - policies -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -The block in recalculate_cpuid_policy() predates the proper split between -default and max policies, and was a "slightly max for a toolstack which knows -about it" capability. It didn't get transformed properly in Xen 4.14. - -Because Xen will accept a VM with HTT/CMP_LEGACY seen, they should be visible -in the max polices. Keep the default policy matching host settings. - -This manifested as an incorrectly-rejected migration across XenServer's Xen -4.13 -> 4.17 upgrade, as Xapi is slowly growing the logic to check a VM -against the target max policy. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -master commit: e2d8a652251660c3252d92b442e1a9c5d6e6a1e9 -master date: 2024-03-01 20:14:19 +0000 ---- - xen/arch/x86/cpu-policy.c | 29 ++++++++++++++++++++++------- - 1 file changed, 22 insertions(+), 7 deletions(-) - -diff --git a/xen/arch/x86/cpu-policy.c b/xen/arch/x86/cpu-policy.c -index ed64d56294..24acd12ce2 100644 ---- a/xen/arch/x86/cpu-policy.c -+++ b/xen/arch/x86/cpu-policy.c -@@ -458,6 +458,16 @@ static void __init guest_common_max_feature_adjustments(uint32_t *fs) - raw_cpu_policy.feat.clwb ) - __set_bit(X86_FEATURE_CLWB, fs); - } -+ -+ /* -+ * Topology information inside the guest is entirely at the toolstack's -+ * discretion, and bears no relationship to the host we're running on. -+ * -+ * HTT identifies p->basic.lppp as valid -+ * CMP_LEGACY identifies p->extd.nc as valid -+ */ -+ __set_bit(X86_FEATURE_HTT, fs); -+ __set_bit(X86_FEATURE_CMP_LEGACY, fs); - } - - static void __init guest_common_default_feature_adjustments(uint32_t *fs) -@@ -512,6 +522,18 @@ static void __init guest_common_default_feature_adjustments(uint32_t *fs) - __clear_bit(X86_FEATURE_CLWB, fs); - } - -+ /* -+ * Topology information is at the toolstack's discretion so these are -+ * unconditionally set in max, but pick a default which matches the host. -+ */ -+ __clear_bit(X86_FEATURE_HTT, fs); -+ if ( cpu_has_htt ) -+ __set_bit(X86_FEATURE_HTT, fs); -+ -+ __clear_bit(X86_FEATURE_CMP_LEGACY, fs); -+ if ( cpu_has_cmp_legacy ) -+ __set_bit(X86_FEATURE_CMP_LEGACY, fs); -+ - /* - * On certain hardware, speculative or errata workarounds can result in - * TSX being placed in "force-abort" mode, where it doesn't actually -@@ -844,13 +866,6 @@ void recalculate_cpuid_policy(struct domain *d) - } - } - -- /* -- * Allow the toolstack to set HTT and CMP_LEGACY. These bits -- * affect how to interpret topology information in other cpuid leaves. -- */ -- __set_bit(X86_FEATURE_HTT, max_fs); -- __set_bit(X86_FEATURE_CMP_LEGACY, max_fs); -- - /* - * 32bit PV domains can't use any Long Mode features, and cannot use - * SYSCALL on non-AMD hardware. --- -2.44.0 - diff --git a/0062-xen-virtual-region-Rename-the-start-end-fields.patch b/0062-xen-virtual-region-Rename-the-start-end-fields.patch deleted file mode 100644 index 9dbd5c9..0000000 --- a/0062-xen-virtual-region-Rename-the-start-end-fields.patch +++ /dev/null @@ -1,140 +0,0 @@ -From 2392e958ec6fd2e48e011781344cf94dee6d6142 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 2 Apr 2024 16:18:51 +0200 -Subject: [PATCH 62/67] xen/virtual-region: Rename the start/end fields -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -... to text_{start,end}. We're about to introduce another start/end pair. - -Despite it's name, struct virtual_region has always been a module-ish -description. Call this out specifically. - -As minor cleanup, replace ROUNDUP(x, PAGE_SIZE) with the more concise -PAGE_ALIGN() ahead of duplicating the example. - -No functional change. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com> -master commit: 989556c6f8ca080f5f202417af97d1188b9ba52a -master date: 2024-03-07 14:24:42 +0000 ---- - xen/common/livepatch.c | 9 +++++---- - xen/common/virtual_region.c | 19 ++++++++++--------- - xen/include/xen/virtual_region.h | 11 +++++++++-- - 3 files changed, 24 insertions(+), 15 deletions(-) - -diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c -index a5068a2217..29395f286f 100644 ---- a/xen/common/livepatch.c -+++ b/xen/common/livepatch.c -@@ -785,8 +785,8 @@ static int prepare_payload(struct payload *payload, - region = &payload->region; - - region->symbols_lookup = livepatch_symbols_lookup; -- region->start = payload->text_addr; -- region->end = payload->text_addr + payload->text_size; -+ region->text_start = payload->text_addr; -+ region->text_end = payload->text_addr + payload->text_size; - - /* Optional sections. */ - for ( i = 0; i < BUGFRAME_NR; i++ ) -@@ -823,8 +823,9 @@ static int prepare_payload(struct payload *payload, - const void *instr = ALT_ORIG_PTR(a); - const void *replacement = ALT_REPL_PTR(a); - -- if ( (instr < region->start && instr >= region->end) || -- (replacement < region->start && replacement >= region->end) ) -+ if ( (instr < region->text_start && instr >= region->text_end) || -+ (replacement < region->text_start && -+ replacement >= region->text_end) ) - { - printk(XENLOG_ERR LIVEPATCH "%s Alt patching outside payload: %p\n", - elf->name, instr); -diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c -index 9f12c30efe..b22ffb75c4 100644 ---- a/xen/common/virtual_region.c -+++ b/xen/common/virtual_region.c -@@ -11,15 +11,15 @@ - - static struct virtual_region core = { - .list = LIST_HEAD_INIT(core.list), -- .start = _stext, -- .end = _etext, -+ .text_start = _stext, -+ .text_end = _etext, - }; - - /* Becomes irrelevant when __init sections are cleared. */ - static struct virtual_region core_init __initdata = { - .list = LIST_HEAD_INIT(core_init.list), -- .start = _sinittext, -- .end = _einittext, -+ .text_start = _sinittext, -+ .text_end = _einittext, - }; - - /* -@@ -39,7 +39,8 @@ const struct virtual_region *find_text_region(unsigned long addr) - rcu_read_lock(&rcu_virtual_region_lock); - list_for_each_entry_rcu( region, &virtual_region_list, list ) - { -- if ( (void *)addr >= region->start && (void *)addr < region->end ) -+ if ( (void *)addr >= region->text_start && -+ (void *)addr < region->text_end ) - { - rcu_read_unlock(&rcu_virtual_region_lock); - return region; -@@ -88,8 +89,8 @@ void relax_virtual_region_perms(void) - - rcu_read_lock(&rcu_virtual_region_lock); - list_for_each_entry_rcu( region, &virtual_region_list, list ) -- modify_xen_mappings_lite((unsigned long)region->start, -- ROUNDUP((unsigned long)region->end, PAGE_SIZE), -+ modify_xen_mappings_lite((unsigned long)region->text_start, -+ PAGE_ALIGN((unsigned long)region->text_end), - PAGE_HYPERVISOR_RWX); - rcu_read_unlock(&rcu_virtual_region_lock); - } -@@ -100,8 +101,8 @@ void tighten_virtual_region_perms(void) - - rcu_read_lock(&rcu_virtual_region_lock); - list_for_each_entry_rcu( region, &virtual_region_list, list ) -- modify_xen_mappings_lite((unsigned long)region->start, -- ROUNDUP((unsigned long)region->end, PAGE_SIZE), -+ modify_xen_mappings_lite((unsigned long)region->text_start, -+ PAGE_ALIGN((unsigned long)region->text_end), - PAGE_HYPERVISOR_RX); - rcu_read_unlock(&rcu_virtual_region_lock); - } -diff --git a/xen/include/xen/virtual_region.h b/xen/include/xen/virtual_region.h -index d053620711..442a45bf1f 100644 ---- a/xen/include/xen/virtual_region.h -+++ b/xen/include/xen/virtual_region.h -@@ -9,11 +9,18 @@ - #include <xen/list.h> - #include <xen/symbols.h> - -+/* -+ * Despite it's name, this is a module(ish) description. -+ * -+ * There's one region for the runtime .text/etc, one region for .init during -+ * boot only, and one region per livepatch. -+ */ - struct virtual_region - { - struct list_head list; -- const void *start; /* Virtual address start. */ -- const void *end; /* Virtual address end. */ -+ -+ const void *text_start; /* .text virtual address start. */ -+ const void *text_end; /* .text virtual address end. */ - - /* If this is NULL the default lookup mechanism is used. */ - symbols_lookup_t *symbols_lookup; --- -2.44.0 - diff --git a/0063-xen-virtual-region-Include-rodata-pointers.patch b/0063-xen-virtual-region-Include-rodata-pointers.patch deleted file mode 100644 index 9f51d4d..0000000 --- a/0063-xen-virtual-region-Include-rodata-pointers.patch +++ /dev/null @@ -1,71 +0,0 @@ -From 335cbb55567b20df8e8bd2d1b340609e272ddab6 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 2 Apr 2024 16:19:11 +0200 -Subject: [PATCH 63/67] xen/virtual-region: Include rodata pointers -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -These are optional. .init doesn't distinguish types of data like this, and -livepatches don't necesserily have any .rodata either. - -No functional change. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com> -master commit: ef969144a425e39f5b214a875b5713d0ea8575fb -master date: 2024-03-07 14:24:42 +0000 ---- - xen/common/livepatch.c | 6 ++++++ - xen/common/virtual_region.c | 2 ++ - xen/include/xen/virtual_region.h | 3 +++ - 3 files changed, 11 insertions(+) - -diff --git a/xen/common/livepatch.c b/xen/common/livepatch.c -index 29395f286f..28c09ddf58 100644 ---- a/xen/common/livepatch.c -+++ b/xen/common/livepatch.c -@@ -788,6 +788,12 @@ static int prepare_payload(struct payload *payload, - region->text_start = payload->text_addr; - region->text_end = payload->text_addr + payload->text_size; - -+ if ( payload->ro_size ) -+ { -+ region->rodata_start = payload->ro_addr; -+ region->rodata_end = payload->ro_addr + payload->ro_size; -+ } -+ - /* Optional sections. */ - for ( i = 0; i < BUGFRAME_NR; i++ ) - { -diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c -index b22ffb75c4..9c566f8ec9 100644 ---- a/xen/common/virtual_region.c -+++ b/xen/common/virtual_region.c -@@ -13,6 +13,8 @@ static struct virtual_region core = { - .list = LIST_HEAD_INIT(core.list), - .text_start = _stext, - .text_end = _etext, -+ .rodata_start = _srodata, -+ .rodata_end = _erodata, - }; - - /* Becomes irrelevant when __init sections are cleared. */ -diff --git a/xen/include/xen/virtual_region.h b/xen/include/xen/virtual_region.h -index 442a45bf1f..dcdc95ba49 100644 ---- a/xen/include/xen/virtual_region.h -+++ b/xen/include/xen/virtual_region.h -@@ -22,6 +22,9 @@ struct virtual_region - const void *text_start; /* .text virtual address start. */ - const void *text_end; /* .text virtual address end. */ - -+ const void *rodata_start; /* .rodata virtual address start (optional). */ -+ const void *rodata_end; /* .rodata virtual address end. */ -+ - /* If this is NULL the default lookup mechanism is used. */ - symbols_lookup_t *symbols_lookup; - --- -2.44.0 - diff --git a/0064-x86-livepatch-Relax-permissions-on-rodata-too.patch b/0064-x86-livepatch-Relax-permissions-on-rodata-too.patch deleted file mode 100644 index bc80769..0000000 --- a/0064-x86-livepatch-Relax-permissions-on-rodata-too.patch +++ /dev/null @@ -1,85 +0,0 @@ -From c3ff11b11c21777a9b1c616607705f3a7340b391 Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 2 Apr 2024 16:19:36 +0200 -Subject: [PATCH 64/67] x86/livepatch: Relax permissions on rodata too -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -This reinstates the capability to patch .rodata in load/unload hooks, which -was lost when we stopped using CR0.WP=0 to patch. - -This turns out to be rather less of a large TODO than I thought at the time. - -Fixes: 8676092a0f16 ("x86/livepatch: Fix livepatch application when CET is active") -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -Reviewed-by: Ross Lagerwall <ross.lagerwall@citrix.com> -master commit: b083b1c393dc8961acf0959b1d2e0ad459985ae3 -master date: 2024-03-07 14:24:42 +0000 ---- - xen/arch/x86/livepatch.c | 4 ++-- - xen/common/virtual_region.c | 12 ++++++++++++ - 2 files changed, 14 insertions(+), 2 deletions(-) - -diff --git a/xen/arch/x86/livepatch.c b/xen/arch/x86/livepatch.c -index ee539f001b..4f76127e1f 100644 ---- a/xen/arch/x86/livepatch.c -+++ b/xen/arch/x86/livepatch.c -@@ -62,7 +62,7 @@ int arch_livepatch_safety_check(void) - int noinline arch_livepatch_quiesce(void) - { - /* -- * Relax perms on .text to be RWX, so we can modify them. -+ * Relax perms on .text/.rodata, so we can modify them. - * - * This relaxes perms globally, but all other CPUs are waiting on us. - */ -@@ -75,7 +75,7 @@ int noinline arch_livepatch_quiesce(void) - void noinline arch_livepatch_revive(void) - { - /* -- * Reinstate perms on .text to be RX. This also cleans out the dirty -+ * Reinstate perms on .text/.rodata. This also cleans out the dirty - * bits, which matters when CET Shstk is active. - * - * The other CPUs waiting for us could in principle have re-walked while -diff --git a/xen/common/virtual_region.c b/xen/common/virtual_region.c -index 9c566f8ec9..aefc08e75f 100644 ---- a/xen/common/virtual_region.c -+++ b/xen/common/virtual_region.c -@@ -91,9 +91,15 @@ void relax_virtual_region_perms(void) - - rcu_read_lock(&rcu_virtual_region_lock); - list_for_each_entry_rcu( region, &virtual_region_list, list ) -+ { - modify_xen_mappings_lite((unsigned long)region->text_start, - PAGE_ALIGN((unsigned long)region->text_end), - PAGE_HYPERVISOR_RWX); -+ if ( region->rodata_start ) -+ modify_xen_mappings_lite((unsigned long)region->rodata_start, -+ PAGE_ALIGN((unsigned long)region->rodata_end), -+ PAGE_HYPERVISOR_RW); -+ } - rcu_read_unlock(&rcu_virtual_region_lock); - } - -@@ -103,9 +109,15 @@ void tighten_virtual_region_perms(void) - - rcu_read_lock(&rcu_virtual_region_lock); - list_for_each_entry_rcu( region, &virtual_region_list, list ) -+ { - modify_xen_mappings_lite((unsigned long)region->text_start, - PAGE_ALIGN((unsigned long)region->text_end), - PAGE_HYPERVISOR_RX); -+ if ( region->rodata_start ) -+ modify_xen_mappings_lite((unsigned long)region->rodata_start, -+ PAGE_ALIGN((unsigned long)region->rodata_end), -+ PAGE_HYPERVISOR_RO); -+ } - rcu_read_unlock(&rcu_virtual_region_lock); - } - #endif /* CONFIG_X86 */ --- -2.44.0 - diff --git a/0065-x86-boot-Improve-the-boot-watchdog-determination-of-.patch b/0065-x86-boot-Improve-the-boot-watchdog-determination-of-.patch deleted file mode 100644 index 4a46326..0000000 --- a/0065-x86-boot-Improve-the-boot-watchdog-determination-of-.patch +++ /dev/null @@ -1,106 +0,0 @@ -From 846fb984b506135917c2862d2e4607005d6afdeb Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 2 Apr 2024 16:20:09 +0200 -Subject: [PATCH 65/67] x86/boot: Improve the boot watchdog determination of - stuck cpus -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Right now, check_nmi_watchdog() has two processing loops over all online CPUs -using prev_nmi_count as storage. - -Use a cpumask_t instead (1/32th as much initdata) and have wait_for_nmis() -make the determination of whether it is stuck, rather than having both -functions needing to agree on how many ticks mean stuck. - -More importantly though, it means we can use the standard cpumask -infrastructure, including turning this: - - (XEN) Brought up 512 CPUs - (XEN) Testing NMI watchdog on all CPUs: {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99,100,101,102,103,104,105,106,107,108,109,110,111,112,113,114,115,116,117,118,119,120,121,122,123,124,125,126,127,128,129,130,131,132,133,134,135,136,137,138,139,140,141,142,143,144,145,146,147,148,149,150,151,152,153,154,155,156,157,158,159,160,161,162,163,164,165,166,167,168,169,170,171,172,173,174,175,176,177,178,179,180,181,182,183,184,185,186,187,188,189,190,191,192,193,194,195,196,197,198,199,200,201,202,203,204,205,206,207,208,209,210,211,212,213,214,215,216,217,218,219,220,221,222,223,224,225,226,227,228,229,230,231,232,233,234,235,236,237,238,239,240,241,242,243,244,245,246,247,248,249,250,251,252,253,254,255,256,257,258,259,260,261,262,263,264,265,266,267,268,269,270,271,272,273,274,275,276,277,278,279,280,281,282,283,284,285,286,287,288,289,290,291,292,293,294,295,296,297,298,299,300,301,302,303,304,305,306,307,308,309,310,311,312,313,314,315,316,317,318,319,320,321,322,323,324,325,326,327,328,329,330,331,332,333,334,335,336,337,338,339,340,341,342,343,344,345,346,347,348,349,350,351,352,353,354,355,356,357,358,359,360,361,362,363,364,365,366,367,368,369,370,371,372,373,374,375,376,377,378,379,380,381,382,383,384,385,386,387,388,389,390,391,392,393,394,395,396,397,398,399,400,401,402,403,404,405,406,407,408,409,410,411,412,413,414,415,416,417,418,419,420,421,422,423,424,425,426,427,428,429,430,431,432,433,434,435,436,437,438,439,440,441,442,443,444,445,446,447,448,449,450,451,452,453,454,455,456,457,458,459,460,461,462,463,464,465,466,467,468,469,470,471,472,473,474,475,476,477,478,479,480,481,482,483,484,485,486,487,488,489,490,491,492,493,494,495,496,497,498,499,500,501,502,503,504,505,506,507,508,509,510,511} stuck - -into the rather more manageable: - - (XEN) Brought up 512 CPUs - (XEN) Testing NMI watchdog on all CPUs: {0-511} stuck - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Roger Pau Monné <roger.pau@citrix.com> -master commit: 9e18f339830c828798aef465556d4029d83476a0 -master date: 2024-03-19 18:29:37 +0000 ---- - xen/arch/x86/nmi.c | 33 ++++++++++++++------------------- - 1 file changed, 14 insertions(+), 19 deletions(-) - -diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c -index 7c9591b65e..dd31034ac8 100644 ---- a/xen/arch/x86/nmi.c -+++ b/xen/arch/x86/nmi.c -@@ -150,6 +150,8 @@ int nmi_active; - - static void __init cf_check wait_for_nmis(void *p) - { -+ cpumask_t *stuck_cpus = p; -+ unsigned int cpu = smp_processor_id(); - unsigned int start_count = this_cpu(nmi_count); - unsigned long ticks = 10 * 1000 * cpu_khz / nmi_hz; - unsigned long s, e; -@@ -158,42 +160,35 @@ static void __init cf_check wait_for_nmis(void *p) - do { - cpu_relax(); - if ( this_cpu(nmi_count) >= start_count + 2 ) -- break; -+ return; -+ - e = rdtsc(); -- } while( e - s < ticks ); -+ } while ( e - s < ticks ); -+ -+ /* Timeout. Mark ourselves as stuck. */ -+ cpumask_set_cpu(cpu, stuck_cpus); - } - - void __init check_nmi_watchdog(void) - { -- static unsigned int __initdata prev_nmi_count[NR_CPUS]; -- int cpu; -- bool ok = true; -+ static cpumask_t __initdata stuck_cpus; - - if ( nmi_watchdog == NMI_NONE ) - return; - - printk("Testing NMI watchdog on all CPUs:"); - -- for_each_online_cpu ( cpu ) -- prev_nmi_count[cpu] = per_cpu(nmi_count, cpu); -- - /* - * Wait at most 10 ticks for 2 watchdog NMIs on each CPU. - * Busy-wait on all CPUs: the LAPIC counter that the NMI watchdog - * uses only runs while the core's not halted - */ -- on_selected_cpus(&cpu_online_map, wait_for_nmis, NULL, 1); -- -- for_each_online_cpu ( cpu ) -- { -- if ( per_cpu(nmi_count, cpu) - prev_nmi_count[cpu] < 2 ) -- { -- printk(" %d", cpu); -- ok = false; -- } -- } -+ on_selected_cpus(&cpu_online_map, wait_for_nmis, &stuck_cpus, 1); - -- printk(" %s\n", ok ? "ok" : "stuck"); -+ if ( cpumask_empty(&stuck_cpus) ) -+ printk("ok\n"); -+ else -+ printk("{%*pbl} stuck\n", CPUMASK_PR(&stuck_cpus)); - - /* - * Now that we know it works we can reduce NMI frequency to --- -2.44.0 - diff --git a/0066-x86-boot-Support-the-watchdog-on-newer-AMD-systems.patch b/0066-x86-boot-Support-the-watchdog-on-newer-AMD-systems.patch deleted file mode 100644 index e501861..0000000 --- a/0066-x86-boot-Support-the-watchdog-on-newer-AMD-systems.patch +++ /dev/null @@ -1,48 +0,0 @@ -From 2777b499f1f6d5cea68f9479f82d055542b822ad Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 2 Apr 2024 16:20:30 +0200 -Subject: [PATCH 66/67] x86/boot: Support the watchdog on newer AMD systems - -The MSRs used by setup_k7_watchdog() are architectural in 64bit. The Unit -Select (0x76, cycles not in halt state) isn't, but it hasn't changed in 25 -years, making this a trend likely to continue. - -Drop the family check. If the Unit Select does happen to change meaning in -the future, check_nmi_watchdog() will still notice the watchdog not operating -as expected. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Reviewed-by: Jan Beulich <jbeulich@suse.com> -master commit: 131892e0dcc1265b621c2b7d844cb9e7c3a4404f -master date: 2024-03-19 18:29:37 +0000 ---- - xen/arch/x86/nmi.c | 11 ++++------- - 1 file changed, 4 insertions(+), 7 deletions(-) - -diff --git a/xen/arch/x86/nmi.c b/xen/arch/x86/nmi.c -index dd31034ac8..c7c51614a6 100644 ---- a/xen/arch/x86/nmi.c -+++ b/xen/arch/x86/nmi.c -@@ -386,15 +386,12 @@ void setup_apic_nmi_watchdog(void) - if ( nmi_watchdog == NMI_NONE ) - return; - -- switch (boot_cpu_data.x86_vendor) { -+ switch ( boot_cpu_data.x86_vendor ) -+ { - case X86_VENDOR_AMD: -- switch (boot_cpu_data.x86) { -- case 6: -- case 0xf ... 0x19: -- setup_k7_watchdog(); -- break; -- } -+ setup_k7_watchdog(); - break; -+ - case X86_VENDOR_INTEL: - switch (boot_cpu_data.x86) { - case 6: --- -2.44.0 - diff --git a/0067-tests-resource-Fix-HVM-guest-in-SHADOW-builds.patch b/0067-tests-resource-Fix-HVM-guest-in-SHADOW-builds.patch deleted file mode 100644 index 5ce4e17..0000000 --- a/0067-tests-resource-Fix-HVM-guest-in-SHADOW-builds.patch +++ /dev/null @@ -1,110 +0,0 @@ -From 9bc40dbcf9eafccc1923b2555286bf6a2af03b7a Mon Sep 17 00:00:00 2001 -From: Andrew Cooper <andrew.cooper3@citrix.com> -Date: Tue, 2 Apr 2024 16:24:07 +0200 -Subject: [PATCH 67/67] tests/resource: Fix HVM guest in !SHADOW builds -MIME-Version: 1.0 -Content-Type: text/plain; charset=UTF-8 -Content-Transfer-Encoding: 8bit - -Right now, test-resource always creates HVM Shadow guests. But if Xen has -SHADOW compiled out, running the test yields: - - $./test-resource - XENMEM_acquire_resource tests - Test x86 PV - Created d1 - Test grant table - Test x86 PVH - Skip: 95 - Operation not supported - -and doesn't really test HVM guests, but doesn't fail either. - -There's nothing paging-mode-specific about this test, so default to HAP if -possible and provide a more specific message if neither HAP or Shadow are -available. - -As we've got physinfo to hand, also provide more specific message about the -absence of PV or HVM support. - -Signed-off-by: Andrew Cooper <andrew.cooper3@citrix.com> -Acked-by: Roger Pau Monné <roger.pau@citrix.com> -master commit: 0263dc9069ddb66335c72a159e09050b1600e56a -master date: 2024-03-01 20:14:19 +0000 ---- - tools/tests/resource/test-resource.c | 39 ++++++++++++++++++++++++++++ - 1 file changed, 39 insertions(+) - -diff --git a/tools/tests/resource/test-resource.c b/tools/tests/resource/test-resource.c -index 0a950072f9..e2c4ba3478 100644 ---- a/tools/tests/resource/test-resource.c -+++ b/tools/tests/resource/test-resource.c -@@ -20,6 +20,8 @@ static xc_interface *xch; - static xenforeignmemory_handle *fh; - static xengnttab_handle *gh; - -+static xc_physinfo_t physinfo; -+ - static void test_gnttab(uint32_t domid, unsigned int nr_frames, - unsigned long gfn) - { -@@ -172,6 +174,37 @@ static void test_domain_configurations(void) - - printf("Test %s\n", t->name); - -+#if defined(__x86_64__) || defined(__i386__) -+ if ( t->create.flags & XEN_DOMCTL_CDF_hvm ) -+ { -+ if ( !(physinfo.capabilities & XEN_SYSCTL_PHYSCAP_hvm) ) -+ { -+ printf(" Skip: HVM not available\n"); -+ continue; -+ } -+ -+ /* -+ * On x86, use HAP guests if possible, but skip if neither HAP nor -+ * SHADOW is available. -+ */ -+ if ( physinfo.capabilities & XEN_SYSCTL_PHYSCAP_hap ) -+ t->create.flags |= XEN_DOMCTL_CDF_hap; -+ else if ( !(physinfo.capabilities & XEN_SYSCTL_PHYSCAP_shadow) ) -+ { -+ printf(" Skip: Neither HAP or SHADOW available\n"); -+ continue; -+ } -+ } -+ else -+ { -+ if ( !(physinfo.capabilities & XEN_SYSCTL_PHYSCAP_pv) ) -+ { -+ printf(" Skip: PV not available\n"); -+ continue; -+ } -+ } -+#endif -+ - rc = xc_domain_create(xch, &domid, &t->create); - if ( rc ) - { -@@ -214,6 +247,8 @@ static void test_domain_configurations(void) - - int main(int argc, char **argv) - { -+ int rc; -+ - printf("XENMEM_acquire_resource tests\n"); - - xch = xc_interface_open(NULL, NULL, 0); -@@ -227,6 +262,10 @@ int main(int argc, char **argv) - if ( !gh ) - err(1, "xengnttab_open"); - -+ rc = xc_physinfo(xch, &physinfo); -+ if ( rc ) -+ err(1, "Failed to obtain physinfo"); -+ - test_domain_configurations(); - - return !!nr_failures; --- -2.44.0 - @@ -1,6 +1,6 @@ -Xen upstream patchset #1 for 4.17.4-pre +Xen upstream patchset #0 for 4.18.3-pre Containing patches from -RELEASE-4.17.3 (07f413d7ffb06eab36045bd19f53555de1cacf62) +RELEASE-4.18.2 (844f9931c6c207588a70f897262c628cd542f75a) to -staging-4.17 (9bc40dbcf9eafccc1923b2555286bf6a2af03b7a) +staging-4.18 (d078d0aa86e9e3b937f673dc89306b3afd09d560) |